# If you don't want to go through all the code, the last cell in this notebook is all of the code compiled into one cell

# The complete data set with election data from 2000-2016 is called "election_data" 

# The approval rating data from Sep-Nov from each election from 2004-2016 are "gwbush_kerry", "obama_mccain", "obama_romney", and "trump_clinton"


In [41]:
import pandas as pd

In [58]:
'''This is the reading in and data manipulation for the voter turnout data'''

voter_turnout = pd.read_csv('U.S. VEP Turnout 1789-Present - Statistics.csv', usecols = (0,1))
voter_turnout.rename(columns = {'United States Presidential VEP Turnout Rate' : 'VEP Turnout Rate'}, inplace = True)

for i in range(0,53):
    voter_turnout.drop([i], inplace = True)

In [59]:
'''This is the reading in and data manipulation for the presidential candidate data: 
It includes the candidates and their party affiliation from 1984-2016'''

presidential_candidate_data = pd.read_csv('presidential_data.csv')

for i in range(0,4):
    presidential_candidate_data.drop([i], inplace = True)
    
for i in range(len(presidential_candidate_data)):
    if presidential_candidate_data.iloc[i,2] == 'R':
        presidential_candidate_data.iloc[i,2] = 'Republican'
    else:
        presidential_candidate_data.iloc[i,2] = 'Democrat'
    if presidential_candidate_data.iloc[i,4] == 'R':
        presidential_candidate_data.iloc[i,4] = 'Republican'
    else:
        presidential_candidate_data.iloc[i,4] = 'Democrat'

In [60]:
'''This is the reading in and data manipulation for the campaign spending data: 
It includes the candidates and their spending from 1984-2016'''

spending_data = pd.read_csv('campaign-spending-2016.csv')

for i in range(0,4):
    spending_data.drop([i], inplace = True)

spending_data['Winning Candidate Spend ($1mil)'] = (spending_data['Winning Candidate Spend ($1mil)'] * 1000000)
spending_data['Losing Candidate Spend ($1mil)'] = (spending_data['Losing Candidate Spend ($1mil)'] * 1000000)
spending_data.rename(columns = {'Winning Candidate Spend ($1mil)':'Winning Candidate Spending ($)',
                                'Losing Candidate Spend ($1mil)':'Losing Candidate Spending ($)'}, 
                    inplace = True)

In [61]:
'''Merging presidential data and spending data'''

candidate_info = presidential_candidate_data.merge(spending_data)
candidate_info = candidate_info[['Year','Winning Candidate','Winning Candidate Party','Winning Candidate Spending ($)',
                          'Losing Candidate','Losing Candidate Party','Losing Candidate Spending ($)']]

In [62]:
'''Merging the candidate info and the voter turnout data'''

election_data = candidate_info.merge(voter_turnout)

In [63]:
'''This is how the complete dataset looks'''

election_data

Unnamed: 0,Year,Winning Candidate,Winning Candidate Party,Winning Candidate Spending ($),Losing Candidate,Losing Candidate Party,Losing Candidate Spending ($),VEP Turnout Rate
0,2000,George W. Bush,Republican,186500000.0,Al Gore,Democrat,120300000.0,54.2
1,2004,George W. Bush,Republican,355000000.0,John Kerry,Democrat,332700000.0,60.1
2,2008,Barack Obama,Democrat,760400000.0,John McCain,Republican,239700000.0,61.6
3,2012,Barack Obama,Democrat,737100000.0,Mitt Romney,Republican,458700000.0,58.6
4,2016,Donald Trump,Republican,239000000.0,Hillary Clinton,Democrat,450600000.0,60.1


In [64]:
'''This is the reading in and data manipulation for approval rating data'''

gwbush_kerry = pd.read_csv('wbush_kerry.csv',usecols = (0,1,3,4))
gwbush_kerry = gwbush_kerry.reindex(index = gwbush_kerry.index[::-1])
gwbush_kerry.drop(index = 0, inplace = True)
gwbush_kerry.reset_index(drop=True, inplace = True)

obama_mccain = pd.read_csv('obama_mccain.csv',usecols = (0,1,4,5))
obama_mccain = obama_mccain.reindex(index = obama_mccain.index[::-1])
obama_mccain.drop(index = 0, inplace = True)
obama_mccain.reset_index(drop=True, inplace = True)

obama_romney = pd.read_csv('obama_romney.csv',usecols = (0,1,4,5))
obama_romney = obama_romney.reindex(index = obama_romney.index[::-1])
obama_romney.drop(index = 0, inplace = True)
obama_romney.reset_index(drop=True, inplace = True)

trump_clinton = pd.read_csv('trump_clinton.csv',usecols = (0,1,4,5))
trump_clinton = trump_clinton.reindex(index = trump_clinton.index[::-1])
trump_clinton.drop(index = 0, inplace = True)
trump_clinton.reset_index(drop=True, inplace = True)



gwbush_kerry #This is the 2004 approval rating data (George W. Bush vs John Kerry)
obama_mccain #This is the 2008 approval rating data (Barack Obama vs John McCain)
obama_romney #This is the 2012 approval rating data (Barack Obama vs Mitt Romney)
trump_clinton #This is the 2016 approval rating data (Donald Trump vs Hillary Clinton)

Unnamed: 0,Poll,Date,Hillary Clinton,Donald Trump
0,IBD/TIPP,8/26 - 9/1,44.0,43.0
1,NBC News/SM,8/29 - 9/4,48.0,42.0
2,CNN/ORC,9/1 - 9/4,48.0,49.0
3,Reuters/Ipsos,9/1 - 9/5,40.0,38.0
4,Economist/YouGov,9/4 - 9/6,44.0,42.0
...,...,...,...,...
94,LA Times/USC Tracking,11/1 - 11/7,44.0,47.0
95,Economist/YouGov,11/4 - 11/7,49.0,45.0
96,IBD/TIPP Tracking,11/4 - 11/7,43.0,42.0
97,Bloomberg,11/4 - 11/6,46.0,43.0


In [75]:
'''This is the cell with all of the code from above'''

import pandas as pd

voter_turnout = pd.read_csv('U.S. VEP Turnout 1789-Present - Statistics.csv', usecols = (0,1))
voter_turnout.rename(columns = {'United States Presidential VEP Turnout Rate' : 'VEP Turnout Rate'}, inplace = True)
for i in range(0,53):
    voter_turnout.drop([i], inplace = True)
    
presidential_candidate_data = pd.read_csv('presidential_data.csv')
for i in range(0,4):
    presidential_candidate_data.drop([i], inplace = True)
for i in range(len(presidential_candidate_data)):
    if presidential_candidate_data.iloc[i,2] == 'R':
        presidential_candidate_data.iloc[i,2] = 'Republican'
    else:
        presidential_candidate_data.iloc[i,2] = 'Democrat'
    if presidential_candidate_data.iloc[i,4] == 'R':
        presidential_candidate_data.iloc[i,4] = 'Republican'
    else:
        presidential_candidate_data.iloc[i,4] = 'Democrat'
        
spending_data = pd.read_csv('campaign-spending-2016.csv')
for i in range(0,4):
    spending_data.drop([i], inplace = True)
spending_data['Winning Candidate Spend ($1mil)'] = (spending_data['Winning Candidate Spend ($1mil)'] * 1000000)
spending_data['Losing Candidate Spend ($1mil)'] = (spending_data['Losing Candidate Spend ($1mil)'] * 1000000)
spending_data.rename(columns = {'Winning Candidate Spend ($1mil)':'Winning Candidate Spending ($)',
                                'Losing Candidate Spend ($1mil)':'Losing Candidate Spending ($)'}, 
                    inplace = True)

candidate_info = presidential_candidate_data.merge(spending_data)
candidate_info = candidate_info[['Year','Winning Candidate','Winning Candidate Party','Winning Candidate Spending ($)',
                          'Losing Candidate','Losing Candidate Party','Losing Candidate Spending ($)']]

election_data = candidate_info.merge(voter_turnout)

gwbush_kerry = pd.read_csv('wbush_kerry.csv',usecols = (0,1,3,4))
gwbush_kerry = gwbush_kerry.reindex(index = gwbush_kerry.index[::-1])
gwbush_kerry.drop(index = 0, inplace = True)
gwbush_kerry.reset_index(drop=True, inplace = True)

obama_mccain = pd.read_csv('obama_mccain.csv',usecols = (0,1,4,5))
obama_mccain = obama_mccain.reindex(index = obama_mccain.index[::-1])
obama_mccain.drop(index = 0, inplace = True)
obama_mccain.reset_index(drop=True, inplace = True)

obama_romney = pd.read_csv('obama_romney.csv',usecols = (0,1,4,5))
obama_romney = obama_romney.reindex(index = obama_romney.index[::-1])
obama_romney.drop(index = 0, inplace = True)
obama_romney.reset_index(drop=True, inplace = True)

trump_clinton = pd.read_csv('trump_clinton.csv',usecols = (0,1,4,5))
trump_clinton = trump_clinton.reindex(index = trump_clinton.index[::-1])
trump_clinton.drop(index = 0, inplace = True)
trump_clinton.reset_index(drop=True, inplace = True)