This is a processing script to aggregate <a href="https://electionlab.mit.edu/data">MIT's Election Data</a> for United States presidental election at the state and county levels.  I use this data for teaching an Analysis in GIS course at Virginia Tech.

In [1]:
import pandas as pd
import numpy as np

# County Election Data

In [2]:
mit_data = pd.read_csv('original_data/countypres_2000-2016.csv',dtype={'FIPS':str})
mit_data = mit_data[~mit_data['FIPS'].isnull()]
mit_data['FIPS'] = mit_data.FIPS.str.zfill(5)

In [3]:
presidential_candidates = {2000:{'gop':'George W. Bush','dem':'Al Gore'},
                           2004:{'gop':'George W. Bush','dem':'John Kerry'},
                           2008:{'gop':'John McCain','dem':'Barack Obama'},
                           2012:{'gop':'Mitt Romney','dem':'Barack Obama'},
                           2016:{'gop':'Donald Trump','dem':'Hillary Clinton'}
                           }

In [4]:
output_df = pd.DataFrame()
output_df['FIPS'] = mit_data['FIPS'].unique()

years = np.sort(list(presidential_candidates.keys()))

for year in years:
    # Pull this year as a dataframe, pull this year's candidates, and 
    # convert year to a string, since it will now be used to name fields
    df=mit_data[mit_data['year']==year]
    candidates = presidential_candidates[year]
    year = str(year)

    # Get candidate info for this year, rename
    gop = df.candidate == candidates['gop']
    gop = df.loc[gop,['FIPS','candidatevotes']]
    gop = gop.rename(columns={'candidatevotes':'gop' + '_' + year + '_votes'})
    dem = df.candidate == candidates['dem']
    dem = df.loc[dem,['FIPS','candidatevotes','totalvotes']]
    dem = dem.rename(columns={'candidatevotes':'dem' + '_' + year + '_votes'})
    dem = dem.rename(columns={'totalvotes':'totalvotes' + '_' + year}) 
    
    # Write this information to the output dataframe and calculate some fields
    output_df = output_df.merge(gop,on='FIPS',how='left')
    output_df = output_df.merge(dem,on='FIPS',how='left')
    output_df['gop_' + year + '_prc'] = np.round(100 * output_df['gop_' + year + '_votes'] / output_df['totalvotes_' + year],decimals=2)
    output_df['dem_' + year + '_prc'] = np.round(100 * output_df['dem_' + year + '_votes'] / output_df['totalvotes_' + year],decimals=2)
    output_df['gop_minus_dem_prc_' + year] = output_df['gop_' + year + '_prc'] - output_df['dem_' + year + '_prc']

    
    
output_df.to_csv('county_election_data_2000-2016.csv',index=False,float_format='%.2f')

# State Election Data

In [11]:
mit_data = pd.read_csv('original_data/1976-2016-president.csv',dtype={'state_fips':str})
mit_data = mit_data[~mit_data['state_fips'].isnull()]
mit_data['state_fips'] = mit_data.state_fips.str.zfill(2)

In [12]:
presidential_candidates = {1976:{'gop':'Ford, Gerald','dem':'Carter, Jimmy'},
                           1980:{'gop':'Reagan, Ronald','dem':'Carter, Jimmy'},
                           1984:{'gop':'Reagan, Ronald','dem':'Mondale, Walter'},
                           1988:{'gop':'Bush, George H.W.','dem':'Dukakis, Michael'},
                           1992:{'gop':'Bush, George H.W.','dem':'Clinton, Bill'},
                           1996:{'gop':'Dole, Robert','dem':'Clinton, Bill'},
                           2000:{'gop':'Bush, George W.','dem':'Gore, Al'},
                           2004:{'gop':'Bush, George W.','dem':'Kerry, John'},
                           2008:{'gop':'McCain, John','dem':'Obama, Barack H.'},
                           2012:{'gop':'Romney, Mitt','dem':'Obama, Barack H.'},
                           2016:{'gop':'Trump, Donald J.','dem':'Clinton, Hillary'}
                           }

In [13]:
output_df = mit_data.loc[:,['state','state_po','state_fips']]
output_df = output_df.drop_duplicates()

years = np.sort(list(presidential_candidates.keys()))

for year in years:
    # Pull this year as a dataframe, pull this year's candidates, and 
    # convert year to a string, since it will now be used to name fields
    df=mit_data[mit_data['year']==year]
    candidates = presidential_candidates[year]
    year = str(year)

    # Get candidate info for this year, rename
    gop = df.candidate == candidates['gop']
    gop = df.loc[gop,['state_po','candidatevotes']]
    gop = gop.groupby('state_po').sum()
    gop = gop.rename(columns={'candidatevotes':'gop' + '_' + year + '_votes'})
    dem = df.candidate == candidates['dem']
    dem = df.loc[dem,['state_po','candidatevotes','totalvotes']]
    dem = dem.groupby('state_po').sum()
    dem = dem.rename(columns={'candidatevotes':'dem' + '_' + year + '_votes'})
    dem = dem.rename(columns={'totalvotes':'totalvotes' + '_' + year}) 
    
    # Write this information to the output dataframe and calculate some fields
    output_df = output_df.merge(gop,on='state_po',how='left')
    output_df = output_df.merge(dem,on='state_po',how='left')
    output_df['gop_' + year + '_prc'] = np.round(100 * output_df['gop_' + year + '_votes'] / output_df['totalvotes_' + year],decimals=2)
    output_df['dem_' + year + '_prc'] = np.round(100 * output_df['dem_' + year + '_votes'] / output_df['totalvotes_' + year],decimals=2)
    output_df['gop_minus_dem_prc_' + year] = output_df['gop_' + year + '_prc'] - output_df['dem_' + year + '_prc']
   


In [14]:
output_df.to_csv('state_election_data_1976-2016.csv',index=False,float_format='%.2f')   