# Election Results

In this script, I analyze the outcome of the 2020 US Presidential Elections on a county level. The data used can be found [here](https://github.com/tonmcg/US_County_Level_Election_Results_08-20).

In [17]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import statsmodels.formula.api as sm

First we take the data from 2020 and 2016 and combine them into one data set. This will help us to later analyze the differences between 2016 and 2020.

In [18]:
# careful, many county names appear multiple times, e.g. in different states
df20 = pd.read_csv('../data_raw/US_County_Level_Election_Results_08-20-master/2020_US_County_Level_Presidential_Results.csv')
df16 = pd.read_csv('../data_raw/US_County_Level_Election_Results_08-20-master/2016_US_County_Level_Presidential_Results.csv')
df20.columns = ['state','fips','county','2020votes_gop','2020votes_dem','2020total_votes','2020diff','2020per_gop','2020per_dem','2020per_point_diff']
df16.columns = ['ind','2016votes_dem','2016votes_gop','2016total_votes','2016per_dem','2016per_gop','2016diff','2016per_point_diff','state','county','fips']
df20

Unnamed: 0,state,fips,county,2020votes_gop,2020votes_dem,2020total_votes,2020diff,2020per_gop,2020per_dem,2020per_point_diff
0,Alabama,1001,Autauga County,19838,7503,27770,12335,0.714368,0.270184,0.444184
1,Alabama,1003,Baldwin County,83544,24578,109679,58966,0.761714,0.224090,0.537623
2,Alabama,1005,Barbour County,5622,4816,10518,806,0.534512,0.457882,0.076631
3,Alabama,1007,Bibb County,7525,1986,9595,5539,0.784263,0.206983,0.577280
4,Alabama,1009,Blount County,24711,2640,27588,22071,0.895716,0.095694,0.800022
...,...,...,...,...,...,...,...,...,...,...
3147,Wyoming,56037,Sweetwater County,12229,3823,16603,8406,0.736554,0.230260,0.506294
3148,Wyoming,56039,Teton County,4341,9848,14677,-5507,0.295769,0.670982,-0.375213
3149,Wyoming,56041,Uinta County,7496,1591,9402,5905,0.797277,0.169219,0.628058
3150,Wyoming,56043,Washakie County,3245,651,4012,2594,0.808824,0.162263,0.646560


In [19]:
df16 = df16.drop(['ind','state','county'],axis=1)
df = pd.merge(df20, df16,how='left', on=['fips'])
df

Unnamed: 0,state,fips,county,2020votes_gop,2020votes_dem,2020total_votes,2020diff,2020per_gop,2020per_dem,2020per_point_diff,2016votes_dem,2016votes_gop,2016total_votes,2016per_dem,2016per_gop,2016diff,2016per_point_diff
0,Alabama,1001,Autauga County,19838,7503,27770,12335,0.714368,0.270184,0.444184,5908.0,18110.0,24661.0,0.239569,0.734358,12202,49.48%
1,Alabama,1003,Baldwin County,83544,24578,109679,58966,0.761714,0.224090,0.537623,18409.0,72780.0,94090.0,0.195653,0.773515,54371,57.79%
2,Alabama,1005,Barbour County,5622,4816,10518,806,0.534512,0.457882,0.076631,4848.0,5431.0,10390.0,0.466603,0.522714,583,5.61%
3,Alabama,1007,Bibb County,7525,1986,9595,5539,0.784263,0.206983,0.577280,1874.0,6733.0,8748.0,0.214220,0.769662,4859,55.54%
4,Alabama,1009,Blount County,24711,2640,27588,22071,0.895716,0.095694,0.800022,2150.0,22808.0,25384.0,0.084699,0.898519,20658,81.38%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3147,Wyoming,56037,Sweetwater County,12229,3823,16603,8406,0.736554,0.230260,0.506294,3233.0,12153.0,16661.0,0.194046,0.729428,8920,53.54%
3148,Wyoming,56039,Teton County,4341,9848,14677,-5507,0.295769,0.670982,-0.375213,7313.0,3920.0,12176.0,0.600608,0.321945,3393,27.87%
3149,Wyoming,56041,Uinta County,7496,1591,9402,5905,0.797277,0.169219,0.628058,1202.0,6154.0,8053.0,0.149261,0.764187,4952,61.49%
3150,Wyoming,56043,Washakie County,3245,651,4012,2594,0.808824,0.162263,0.646560,532.0,2911.0,3715.0,0.143203,0.783580,2379,64.04%


In [20]:
df['change'] = np.nan
for i in range(len(df)):
    df['change'][i] = (df['2020per_dem'][i] - df['2016per_dem'][i]) * 100

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['change'][i] = (df['2020per_dem'][i] - df['2016per_dem'][i]) * 100


In [21]:
# look at the greates increases for democrats
df.sort_values(by=['change'], inplace=True, ascending=False)
df.head()

Unnamed: 0,state,fips,county,2020votes_gop,2020votes_dem,2020total_votes,2020diff,2020per_gop,2020per_dem,2020per_point_diff,2016votes_dem,2016votes_gop,2016total_votes,2016per_dem,2016per_gop,2016diff,2016per_point_diff,change
2811,Utah,49049,Utah County,192812,76033,284480,116779,0.67777,0.26727,0.4105,24579.0,89755.0,174942.0,0.140498,0.513056,65176,37.26%,12.677211
2792,Utah,49011,Davis County,104135,57411,169895,46724,0.612937,0.33792,0.275017,22575.0,46985.0,104661.0,0.215696,0.448926,24410,23.32%,12.222409
2796,Utah,49019,Grand County,2248,2806,5185,-558,0.433558,0.541176,-0.107618,1932.0,1934.0,4456.0,0.433573,0.434022,2,0.04%,10.760376
2804,Utah,49035,Salt Lake County,230174,289906,541175,-59732,0.425323,0.535697,-0.110375,154831.0,117901.0,361347.0,0.428483,0.326282,36930,10.22%,10.721445
2789,Utah,49005,Cache County,38032,16650,57571,21382,0.66061,0.289208,0.371402,6705.0,16643.0,35879.0,0.186878,0.463865,9938,27.70%,10.232999


In [22]:
# look at the greates increases for republicans
df.sort_values(by=['change'], inplace=True)
df.head()

Unnamed: 0,state,fips,county,2020votes_gop,2020votes_dem,2020total_votes,2020diff,2020per_gop,2020per_dem,2020per_point_diff,2016votes_dem,2016votes_gop,2016total_votes,2016per_dem,2016per_gop,2016diff,2016per_point_diff,change
2746,Texas,48427,Starr County,8247,9123,17525,-876,0.470585,0.520571,-0.049986,9246.0,2218.0,11691.0,0.790865,0.189719,7028,60.11%,-27.029415
2694,Texas,48323,Maverick County,6881,8332,15346,-1451,0.44839,0.542943,-0.094552,10397.0,2816.0,13588.0,0.76516,0.207242,7581,55.79%,-22.221765
2663,Texas,48261,Kenedy County,127,65,194,62,0.654639,0.335052,0.319588,99.0,84.0,186.0,0.532258,0.451613,15,8.06%,-19.720652
2785,Texas,48505,Zapata County,2033,1826,3874,207,0.524781,0.471347,0.053433,2056.0,1028.0,3134.0,0.656031,0.328015,1028,32.80%,-18.468319
2656,Texas,48247,Jim Hogg County,833,1197,2036,-364,0.409136,0.587917,-0.178782,1635.0,430.0,2119.0,0.77159,0.202926,1205,56.87%,-18.367289


# Covid-19 Data

The county-level Covid-19 data was used from [Johns Hopkins University](https://github.com/CSSEGISandData/COVID-19).

In [23]:
df_cov = pd.read_csv('../data_raw/COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/11-01-2020.csv')
df_cov = df_cov[df_cov['Country_Region'].str.contains('US')].reset_index(drop=True)
# convert fips to int without deleting rows with unavailable data
for i in range(len(df_cov)):
    if df_cov['FIPS'][i] != np.nan:
        df_cov['FIPS'][i] = df_cov['FIPS'][i].astype(int)
    else:
        continue
df_cov.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cov['FIPS'][i] = df_cov['FIPS'][i].astype(int)


Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incidence_Rate,Case-Fatality_Ratio
0,1001.0,Autauga,Alabama,US,2020-11-02 05:25:04,32.539527,-86.644082,2182,31,0,2142.0,"Autauga, Alabama, US",3889.455691,1.426599
1,1003.0,Baldwin,Alabama,US,2020-11-02 05:25:04,30.72775,-87.722071,6974,71,0,6895.0,"Baldwin, Alabama, US",3120.492398,1.019236
2,1005.0,Barbour,Alabama,US,2020-11-02 05:25:04,31.868263,-85.387129,1062,9,0,1052.0,"Barbour, Alabama, US",4297.982662,0.848256
3,1007.0,Bibb,Alabama,US,2020-11-02 05:25:04,32.996421,-87.125115,883,15,0,863.0,"Bibb, Alabama, US",3920.693043,1.708428
4,1009.0,Blount,Alabama,US,2020-11-02 05:25:04,33.982109,-86.567906,2128,25,0,2070.0,"Blount, Alabama, US",3622.937779,1.193317


Choose important columns and merge them with the county-level election data from 2020.

In [24]:
df_cov = df_cov[['FIPS','Confirmed','Deaths']]
df_cov.columns = ['fips','incidences','deaths']
df = pd.merge(df, df_cov,how='left', on=['fips'])
df.sort_values(by=['fips'], inplace=False)
df.head()

Unnamed: 0,state,fips,county,2020votes_gop,2020votes_dem,2020total_votes,2020diff,2020per_gop,2020per_dem,2020per_point_diff,2016votes_dem,2016votes_gop,2016total_votes,2016per_dem,2016per_gop,2016diff,2016per_point_diff,change,incidences,deaths
0,Texas,48427,Starr County,8247,9123,17525,-876,0.470585,0.520571,-0.049986,9246.0,2218.0,11691.0,0.790865,0.189719,7028,60.11%,-27.029415,3622.0,186.0
1,Texas,48323,Maverick County,6881,8332,15346,-1451,0.44839,0.542943,-0.094552,10397.0,2816.0,13588.0,0.76516,0.207242,7581,55.79%,-22.221765,4322.0,151.0
2,Texas,48261,Kenedy County,127,65,194,62,0.654639,0.335052,0.319588,99.0,84.0,186.0,0.532258,0.451613,15,8.06%,-19.720652,11.0,2.0
3,Texas,48505,Zapata County,2033,1826,3874,207,0.524781,0.471347,0.053433,2056.0,1028.0,3134.0,0.656031,0.328015,1028,32.80%,-18.468319,360.0,9.0
4,Texas,48247,Jim Hogg County,833,1197,2036,-364,0.409136,0.587917,-0.178782,1635.0,430.0,2119.0,0.77159,0.202926,1205,56.87%,-18.367289,185.0,6.0


In [25]:
# save data set
df.to_csv('../data_clean/results_covid_county.csv')

Now we can use this data set to run regressions.

In [26]:
ols1 = sm.ols(formula="change ~ incidences", data=df).fit()
print(ols1.summary())

                            OLS Regression Results                            
Dep. Variable:                 change   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     11.85
Date:                Wed, 19 May 2021   Prob (F-statistic):           0.000584
Time:                        15:49:54   Log-Likelihood:                -7729.5
No. Observations:                3087   AIC:                         1.546e+04
Df Residuals:                    3085   BIC:                         1.548e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.4957      0.055     27.084      0.0

In [27]:
ols2 = sm.ols(formula="change ~ deaths", data=df).fit()
print(ols2.summary())

                            OLS Regression Results                            
Dep. Variable:                 change   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                  0.002922
Date:                Wed, 19 May 2021   Prob (F-statistic):              0.957
Time:                        15:49:56   Log-Likelihood:                -7735.4
No. Observations:                3087   AIC:                         1.547e+04
Df Residuals:                    3085   BIC:                         1.549e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.5451      0.055     28.289      0.0

In [28]:
df[['change','incidences','deaths']].describe()

Unnamed: 0,change,incidences,deaths
count,3111.0,3088.0,3088.0
mean,1.56856,2950.038536,74.278497
std,2.974568,10821.671132,343.387244
min,-27.029415,0.0,0.0
25%,-0.128371,253.75,3.0
50%,1.479335,671.0,12.0
75%,3.271285,1853.0,37.0
max,12.677211,309197.0,7404.0


In [29]:
df.sort_values(by=['incidences'], inplace=True, ascending=False)
df[df['incidences'] >= 100000]

Unnamed: 0,state,fips,county,2020votes_gop,2020votes_dem,2020total_votes,2020diff,2020per_gop,2020per_dem,2020per_point_diff,2016votes_dem,2016votes_gop,2016total_votes,2016per_dem,2016per_gop,2016diff,2016per_point_diff,change,incidences,deaths
652,California,6037,Los Angeles County,1145530,3028885,4264365,-1883355,0.268629,0.710278,-0.44165,1893770.0,620285.0,2652072.0,0.714072,0.233887,1273485,48.02%,-0.379378,309197.0,7074.0
763,Illinois,17031,Cook County,558269,1725973,2325405,-1167704,0.240074,0.742225,-0.502151,1528582.0,440213.0,2055215.0,0.743758,0.214193,1088369,52.96%,-0.153302,193102.0,5500.0
17,Florida,12086,Miami-Dade County,532833,617864,1156816,-85031,0.460603,0.534107,-0.073504,623006.0,333666.0,978670.0,0.636584,0.340938,289340,29.56%,-10.247693,186809.0,3662.0
1690,Texas,48201,Harris County,700630,918193,1640818,-217563,0.427,0.559595,-0.132594,706471.0,544960.0,1302887.0,0.542235,0.418271,161511,12.40%,1.735961,162306.0,2811.0
2687,Arizona,4013,Maricopa County,995665,1040774,2069475,-45109,0.48112,0.502917,-0.021797,549040.0,590465.0,1201934.0,0.456797,0.491262,41425,3.45%,4.611979,159781.0,3606.0
2528,Texas,48113,Dallas County,307076,598576,919504,-291500,0.333958,0.650977,-0.317019,458845.0,261865.0,750649.0,0.611264,0.348851,196980,26.24%,3.971266,103392.0,1369.0


# Extra

## Comparison to 2018

We want to compare the election results of 2020 to the House and Senate elections in 2018, in order to separate the effect of Covid from other issues as well as possible. Nevertheless, many other factors are still in play. This data comes from the [MIT Election Data and Science Lab](https://github.com/MEDSL/2018-elections-official).

### Senate

In [7]:
df18 = pd.read_csv('../data_raw/2018-elections-official-master/senate_overall_2018.csv')
a = df18['state'].unique()

Since these elections are held in districts and not by county, I look at them on a State-level.

In [8]:
# calculate percentage of votes for dem in each state
df18['2018per_dem'] = np.nan
for i in range(len(df18)):
    if df18['party'][i] == 'democrat':
        df18['2018per_dem'][i] = df18['candidatevotes'][i] / df18['totalvotes'][i]
    else:
        continue

# drop all the non-dem rows 
df18 = df18.dropna().reset_index()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df18['2018per_dem'][i] = df18['candidatevotes'][i] / df18['totalvotes'][i]


In [9]:
# correct for states with multiple dem candidates
for i in range(1,len(df18)):
    if df18['state'][i] == df18['state'][i-1]:
        df18['2018per_dem'][i] = df18['2018per_dem'][i] + df18['2018per_dem'][i-1]
    else:
        continue

df18 = df18.drop(df18.index[[1,12]])
df18 = df18.reset_index()
df18 = df18[['state','totalvotes','2018per_dem']]
df18.columns = ['state','2018total_votes_sen','2018per_dem_sen']
#df18

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df18['2018per_dem'][i] = df18['2018per_dem'][i] + df18['2018per_dem'][i-1]


In [10]:
# some states might not have had any dem candidates
b = []
for i in range(len(df18)):
    b.append(df18['state'][i])

for i in a:
    if i in b:
        continue
    else:
        print(i)

Minnesota
North Dakota
Vermont


In [11]:
# lets insert these states
m = {'state': 'Minnesota','2018total_votes_sen': 5184235,'2018per_dem_sen':0}
n = {'state': 'North Dakota','2018total_votes_sen': 326138,'2018per_dem_sen':0}
o = {'state': 'Vermont','2018total_votes_sen': 272624,'2018per_dem_sen':0}
df18 = df18.append([m,n,o],ignore_index=True)
df18.sort_values(by=['state'], inplace=True)
df18 = df18.reset_index(drop=True)
df18

Unnamed: 0,state,2018total_votes_sen,2018per_dem_sen
0,Arizona,2384308,0.499558
1,California,11113364,1.0
2,Connecticut,1386840,0.567971
3,Delaware,362592,0.599531
4,Florida,8190005,0.499325
5,Hawaii,388351,0.711511
6,Indiana,2282565,0.448422
7,Maine,634409,0.104456
8,Maryland,2299889,0.648559
9,Massachusetts,2707090,0.603368


### House

In [12]:
hor = pd.read_csv('../data_raw/2018-elections-official-master/district_overall_2018.csv')
states = hor['state'].unique()

In [13]:
hor_state = pd.DataFrame() # to store the values

for i in states:
    temp = hor[hor['state'].str.contains(i)].reset_index() # select the entries by state
    c = [] # for the total votes
    
    # calculate total votes only once per district
    c.append(temp['totalvotes'][0])

    for j in range(1,len(temp)): 
        if temp['district'].nunique() == 1:
            c.append(temp['totalvotes'][0])
        else:
            if temp['district'][j] == temp['district'][j-1]:
                continue
            else:
                c.append(temp['totalvotes'][j])
    
    # calculate the amount of votes for dem candidates
    dem = temp.dropna(subset=['party']).reset_index(drop=True)
    dem = dem[dem['party'].str.contains('democrat')]
        
    tot = sum(c)
    per = sum(dem['candidatevotes']) / tot
    
    new = {'state': i,'2018total_votes_hor': tot,'2018per_dem_hor': per}
    hor_state = hor_state.append(new,ignore_index=True)

In [14]:
hor_state.sort_values(by=['state'], inplace=True)
hor_state = hor_state.reset_index(drop=True)
hor_state = hor_state.reindex(columns=['state','2018total_votes_hor','2018per_dem_hor'])
hor_state

Unnamed: 0,state,2018total_votes_hor,2018per_dem_hor
0,Alabama,1659895.0,0.347792
1,Alaska,846498.0,0.15499
2,Arizona,2341270.0,0.503655
3,Arkansas,889298.0,0.351938
4,California,24369044.0,0.657428
5,Colorado,2513546.0,0.534389
6,Connecticut,1379808.0,0.586061
7,Delaware,705474.0,0.32227
8,Florida,28085904.0,0.235508
9,Georgia,5144171.0,0.352723


In [16]:
df18 = pd.merge(hor_state, df18, how='left', on=['state'])
df18

Unnamed: 0,state,2018total_votes_hor,2018per_dem_hor,2018total_votes_sen,2018per_dem_sen
0,Alabama,1659895.0,0.347792,,
1,Alaska,846498.0,0.15499,,
2,Arizona,2341270.0,0.503655,2384308.0,0.499558
3,Arkansas,889298.0,0.351938,,
4,California,24369044.0,0.657428,11113364.0,1.0
5,Colorado,2513546.0,0.534389,,
6,Connecticut,1379808.0,0.586061,1386840.0,0.567971
7,Delaware,705474.0,0.32227,362592.0,0.599531
8,Florida,28085904.0,0.235508,8190005.0,0.499325
9,Georgia,5144171.0,0.352723,,


Now that we have the State-level election data from 2018 (Senate and House of Representatives), I want to merge the data set with the State-level data of the 2020 Presidential Election.

In [17]:
data = pd.DataFrame() # to store the values
states = df['state'].unique()

for i in states:
    temp = df[df['state'].str.contains(i)].reset_index() # select the entries by state
        
    tot20 = sum(temp['2020total_votes'])
    per20 = sum(temp['2020votes_dem']) / tot20
    tot16 = sum(temp['2016total_votes'])
    per16 = sum(temp['2016votes_dem']) / tot16
    
    new = {'state': i,'2020total_votes': tot20,'2020per_dem': per20,'2016total_votes': tot16,'2016per_dem': per16 }
    data = data.append(new,ignore_index=True)
    
data

Unnamed: 0,2016per_dem,2016total_votes,2020per_dem,2020total_votes,state
0,0.434428,8903237.0,0.46479,11315056.0,Texas
1,0.341624,1108615.0,0.347751,1219069.0,Arkansas
2,0.477881,9386750.0,0.478615,11067456.0,Florida
3,0.615885,11954317.0,0.634839,17500881.0,California
4,0.348981,2484691.0,0.374514,3053851.0,Tennessee
5,0.467024,4629471.0,0.485862,5524804.0,North Carolina
6,0.588103,7046175.0,0.608677,8616861.0,New York
7,0.380127,2775098.0,0.414088,3025962.0,Missouri
8,0.407528,2084444.0,0.434301,2513329.0,South Carolina
9,0.435085,5325395.0,0.452393,5922202.0,Ohio


In [18]:
data = pd.merge(data, df18, how='left', on=['state'])
data = data.reindex(columns=['state','2020total_votes','2020per_dem','2018total_votes_hor','2018per_dem_hor','2018total_votes_sen','2018per_dem_sen','2016total_votes','2016per_dem'])

In [19]:
data['change_total_1620'] = np.nan
data['change_dem_1820'] = np.nan
data['change_dem_1620'] = np.nan

for i in range(len(data)):
    data['change_total_1620'][i] = data['2020total_votes'][i] - data['2016total_votes'][i]
    data['change_dem_1820'][i] = data['2020per_dem'][i] - data['2018per_dem_hor'][i]
    data['change_dem_1620'][i] = data['2020per_dem'][i] - data['2016per_dem'][i]
    
data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['change_total_1620'][i] = data['2020total_votes'][i] - data['2016total_votes'][i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['change_dem_1820'][i] = data['2020per_dem'][i] - data['2018per_dem_hor'][i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['change_dem_1620'][i] = data['2020per_dem'][i] - data['2016per_dem'][i]


Unnamed: 0,state,2020total_votes,2020per_dem,2018total_votes_hor,2018per_dem_hor,2018total_votes_sen,2018per_dem_sen,2016total_votes,2016per_dem,change_total_1620,change_dem_1820,change_dem_1620
0,Texas,11315056.0,0.46479,8202708.0,0.469693,8371655.0,0.483254,8903237.0,0.434428,2411819.0,-0.004903,0.030362
1,Arkansas,1219069.0,0.347751,889298.0,0.351938,,,1108615.0,0.341624,110454.0,-0.004188,0.006127
2,Florida,11067456.0,0.478615,28085904.0,0.235508,8190005.0,0.499325,9386750.0,0.477881,1680706.0,0.243106,0.000734
3,California,17500881.0,0.634839,24369044.0,0.657428,11113364.0,1.0,11954317.0,0.615885,5546564.0,-0.022588,0.018954
4,Tennessee,3053851.0,0.374514,2159825.0,0.391907,2243740.0,0.4392,2484691.0,0.348981,569160.0,-0.017392,0.025533
5,North Carolina,5524804.0,0.485862,3663326.0,0.483457,,,4629471.0,0.467024,895333.0,0.002405,0.018838
6,New York,8616861.0,0.608677,5948673.0,0.632169,6055151.0,0.620214,7046175.0,0.588103,1570686.0,-0.023492,0.020574
7,Missouri,3025962.0,0.414088,2418413.0,0.425059,2442289.0,0.455693,2775098.0,0.380127,250864.0,-0.010972,0.033961
8,South Carolina,2513329.0,0.434301,1709292.0,0.443657,,,2084444.0,0.407528,428885.0,-0.009356,0.026773
9,Ohio,5922202.0,0.452393,4406358.0,0.472654,4410898.0,0.534114,5325395.0,0.435085,596807.0,-0.020261,0.017308


In [20]:
data.to_csv('../data_clean/ele_results_state.csv')