# USA Presidential Election data analysis

The objective of this analysis is to gain insights in to the US presidential elections over the years

Data source: https://electionlab.mit.edu/data
https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/42MVDX

### Import libraries

In [41]:
import pandas as pd
import numpy as np

### Read data

In [42]:
df = pd.read_csv("1976-2020-president.csv")

In [43]:
df.head()

Unnamed: 0,year,state,state_po,state_fips,state_cen,state_ic,office,candidate,party_detailed,writein,candidatevotes,totalvotes,version,notes,party_simplified
0,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"CARTER, JIMMY",DEMOCRAT,False,659170,1182850,20210113,,DEMOCRAT
1,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"FORD, GERALD",REPUBLICAN,False,504070,1182850,20210113,,REPUBLICAN
2,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"MADDOX, LESTER",AMERICAN INDEPENDENT PARTY,False,9198,1182850,20210113,,OTHER
3,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"BUBAR, BENJAMIN """"BEN""""",PROHIBITION,False,6669,1182850,20210113,,OTHER
4,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"HALL, GUS",COMMUNIST PARTY USE,False,1954,1182850,20210113,,OTHER


### Initial exploration

In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4287 entries, 0 to 4286
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   year              4287 non-null   int64  
 1   state             4287 non-null   object 
 2   state_po          4287 non-null   object 
 3   state_fips        4287 non-null   int64  
 4   state_cen         4287 non-null   int64  
 5   state_ic          4287 non-null   int64  
 6   office            4287 non-null   object 
 7   candidate         4000 non-null   object 
 8   party_detailed    3831 non-null   object 
 9   writein           4284 non-null   object 
 10  candidatevotes    4287 non-null   int64  
 11  totalvotes        4287 non-null   int64  
 12  version           4287 non-null   int64  
 13  notes             0 non-null      float64
 14  party_simplified  4287 non-null   object 
dtypes: float64(1), int64(7), object(7)
memory usage: 502.5+ KB


In [45]:
df.describe()

Unnamed: 0,year,state_fips,state_cen,state_ic,candidatevotes,totalvotes,version,notes
count,4287.0,4287.0,4287.0,4287.0,4287.0,4287.0,4287.0,0.0
mean,1999.080942,28.616982,53.665034,39.75484,311907.6,2366924.0,20210113.0,
std,14.220014,15.616459,26.029189,22.772216,764801.1,2465008.0,0.0,
min,1976.0,1.0,11.0,1.0,0.0,123574.0,20210113.0,
25%,1988.0,16.0,33.0,22.0,1177.0,652274.0,20210113.0,
50%,2000.0,28.0,53.0,42.0,7499.0,1569180.0,20210113.0,
75%,2012.0,41.0,81.0,61.0,199241.5,3033118.0,20210113.0,
max,2020.0,56.0,95.0,82.0,11110250.0,17500880.0,20210113.0,


In [46]:
us_df = df[(df['party_detailed']== 'DEMOCRAT') | (df['party_detailed'] == 'REPUBLICAN')]
us_df.head()

Unnamed: 0,year,state,state_po,state_fips,state_cen,state_ic,office,candidate,party_detailed,writein,candidatevotes,totalvotes,version,notes,party_simplified
0,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"CARTER, JIMMY",DEMOCRAT,False,659170,1182850,20210113,,DEMOCRAT
1,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"FORD, GERALD",REPUBLICAN,False,504070,1182850,20210113,,REPUBLICAN
7,1976,ALASKA,AK,2,94,81,US PRESIDENT,"FORD, GERALD",REPUBLICAN,False,71555,123574,20210113,,REPUBLICAN
8,1976,ALASKA,AK,2,94,81,US PRESIDENT,"CARTER, JIMMY",DEMOCRAT,False,44058,123574,20210113,,DEMOCRAT
11,1976,ARIZONA,AZ,4,86,61,US PRESIDENT,"FORD, GERALD",REPUBLICAN,False,418642,742719,20210113,,REPUBLICAN


In [47]:
us_df.aggregate('candidate')

0             CARTER, JIMMY
1              FORD, GERALD
7              FORD, GERALD
8             CARTER, JIMMY
11             FORD, GERALD
               ...         
4264       TRUMP, DONALD J.
4267    BIDEN, JOSEPH R. JR
4268       TRUMP, DONALD J.
4280    BIDEN, JOSEPH R. JR
4281       TRUMP, DONALD J.
Name: candidate, Length: 1225, dtype: object

In [48]:
us_df.groupby(['candidate','year','party_detailed']).sum()#.agg('candidatevotes')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,state_fips,state_cen,state_ic,candidatevotes,totalvotes,version,notes
candidate,year,party_detailed,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"BIDEN, JOSEPH R. JR",2020,DEMOCRAT,1477,2796,2085,81268908,158528503,1030715763,0.0
"BUSH, GEORGE H.W.",1988,REPUBLICAN,1477,2796,2085,48642640,91586825,1030715763,0.0
"BUSH, GEORGE H.W.",1992,REPUBLICAN,1477,2796,2085,38798913,104599780,1030715763,0.0
"BUSH, GEORGE W.",2000,REPUBLICAN,1477,2796,2085,50311372,105593982,1030715763,0.0
"BUSH, GEORGE W.",2004,REPUBLICAN,1477,2796,2085,61872711,122349450,1030715763,0.0
"CARTER, JIMMY",1976,DEMOCRAT,1477,2796,2085,40680446,81601344,1030715763,0.0
"CARTER, JIMMY",1980,DEMOCRAT,1477,2796,2085,35480948,86496851,1030715763,0.0
"CLINTON, BILL",1992,DEMOCRAT,1477,2796,2085,44856747,104599780,1030715763,0.0
"CLINTON, BILL",1996,DEMOCRAT,1477,2796,2085,47295351,96389818,1030715763,0.0
"CLINTON, HILLARY",2016,DEMOCRAT,1501,2848,2137,65677246,139568633,1050925876,0.0


In [49]:
us_total_votes= us_df.groupby(['candidate','year','party_detailed'],as_index=False).sum()
print(us_total_votes)

              candidate  year party_detailed  state_fips  state_cen  state_ic  \
0   BIDEN, JOSEPH R. JR  2020       DEMOCRAT        1477       2796      2085   
1     BUSH, GEORGE H.W.  1988     REPUBLICAN        1477       2796      2085   
2     BUSH, GEORGE H.W.  1992     REPUBLICAN        1477       2796      2085   
3       BUSH, GEORGE W.  2000     REPUBLICAN        1477       2796      2085   
4       BUSH, GEORGE W.  2004     REPUBLICAN        1477       2796      2085   
5         CARTER, JIMMY  1976       DEMOCRAT        1477       2796      2085   
6         CARTER, JIMMY  1980       DEMOCRAT        1477       2796      2085   
7         CLINTON, BILL  1992       DEMOCRAT        1477       2796      2085   
8         CLINTON, BILL  1996       DEMOCRAT        1477       2796      2085   
9      CLINTON, HILLARY  2016       DEMOCRAT        1501       2848      2137   
10         DOLE, ROBERT  1996     REPUBLICAN        1477       2796      2085   
11     DUKAKIS, MICHAEL  198

In [50]:
us_total_votes['percentage']= (us_total_votes['candidatevotes']/us_total_votes['totalvotes'])*100
print(us_total_votes)

              candidate  year party_detailed  state_fips  state_cen  state_ic  \
0   BIDEN, JOSEPH R. JR  2020       DEMOCRAT        1477       2796      2085   
1     BUSH, GEORGE H.W.  1988     REPUBLICAN        1477       2796      2085   
2     BUSH, GEORGE H.W.  1992     REPUBLICAN        1477       2796      2085   
3       BUSH, GEORGE W.  2000     REPUBLICAN        1477       2796      2085   
4       BUSH, GEORGE W.  2004     REPUBLICAN        1477       2796      2085   
5         CARTER, JIMMY  1976       DEMOCRAT        1477       2796      2085   
6         CARTER, JIMMY  1980       DEMOCRAT        1477       2796      2085   
7         CLINTON, BILL  1992       DEMOCRAT        1477       2796      2085   
8         CLINTON, BILL  1996       DEMOCRAT        1477       2796      2085   
9      CLINTON, HILLARY  2016       DEMOCRAT        1501       2848      2137   
10         DOLE, ROBERT  1996     REPUBLICAN        1477       2796      2085   
11     DUKAKIS, MICHAEL  198

In [51]:
us_final= us_total_votes[["candidate","party_detailed","year","percentage"]]
print(us_final)

              candidate party_detailed  year  percentage
0   BIDEN, JOSEPH R. JR       DEMOCRAT  2020   51.264540
1     BUSH, GEORGE H.W.     REPUBLICAN  1988   53.110958
2     BUSH, GEORGE H.W.     REPUBLICAN  1992   37.092729
3       BUSH, GEORGE W.     REPUBLICAN  2000   47.646060
4       BUSH, GEORGE W.     REPUBLICAN  2004   50.570486
5         CARTER, JIMMY       DEMOCRAT  1976   49.852667
6         CARTER, JIMMY       DEMOCRAT  1980   41.019930
7         CLINTON, BILL       DEMOCRAT  1992   42.884170
8         CLINTON, BILL       DEMOCRAT  1996   49.066750
9      CLINTON, HILLARY       DEMOCRAT  2016   47.057311
10         DOLE, ROBERT     REPUBLICAN  1996   40.464541
11     DUKAKIS, MICHAEL       DEMOCRAT  1988   45.548777
12         FORD, GERALD     REPUBLICAN  1976   47.635114
13             GORE, AL       DEMOCRAT  2000   48.143251
14          KERRY, JOHN       DEMOCRAT  2004   48.066457
15         MCCAIN, JOHN     REPUBLICAN  2008   45.361569
16      MONDALE, WALTER       D

In [53]:
us_final.sort_values(['year','candidate'])

Unnamed: 0,candidate,party_detailed,year,percentage
5,"CARTER, JIMMY",DEMOCRAT,1976,49.852667
12,"FORD, GERALD",REPUBLICAN,1976,47.635114
6,"CARTER, JIMMY",DEMOCRAT,1980,41.01993
20,"REAGAN, RONALD",REPUBLICAN,1980,50.455755
16,"MONDALE, WALTER",DEMOCRAT,1984,40.418617
21,"REAGAN, RONALD",REPUBLICAN,1984,58.46086
1,"BUSH, GEORGE H.W.",REPUBLICAN,1988,53.110958
11,"DUKAKIS, MICHAEL",DEMOCRAT,1988,45.548777
2,"BUSH, GEORGE H.W.",REPUBLICAN,1992,37.092729
7,"CLINTON, BILL",DEMOCRAT,1992,42.88417


In [54]:
us_final_with_votes= us_total_votes[["candidate","party_detailed","year","candidatevotes","totalvotes","percentage"]]

              candidate party_detailed  year  candidatevotes  totalvotes  \
0   BIDEN, JOSEPH R. JR       DEMOCRAT  2020        81268908   158528503   
1     BUSH, GEORGE H.W.     REPUBLICAN  1988        48642640    91586825   
2     BUSH, GEORGE H.W.     REPUBLICAN  1992        38798913   104599780   
3       BUSH, GEORGE W.     REPUBLICAN  2000        50311372   105593982   
4       BUSH, GEORGE W.     REPUBLICAN  2004        61872711   122349450   
5         CARTER, JIMMY       DEMOCRAT  1976        40680446    81601344   
6         CARTER, JIMMY       DEMOCRAT  1980        35480948    86496851   
7         CLINTON, BILL       DEMOCRAT  1992        44856747   104599780   
8         CLINTON, BILL       DEMOCRAT  1996        47295351    96389818   
9      CLINTON, HILLARY       DEMOCRAT  2016        65677246   139568633   
10         DOLE, ROBERT     REPUBLICAN  1996        39003697    96389818   
11     DUKAKIS, MICHAEL       DEMOCRAT  1988        41716679    91586825   
12         F

In [55]:
us_final_with_votes.sort_values(['year','candidate'])

Unnamed: 0,candidate,party_detailed,year,candidatevotes,totalvotes,percentage
5,"CARTER, JIMMY",DEMOCRAT,1976,40680446,81601344,49.852667
12,"FORD, GERALD",REPUBLICAN,1976,38870893,81601344,47.635114
6,"CARTER, JIMMY",DEMOCRAT,1980,35480948,86496851,41.01993
20,"REAGAN, RONALD",REPUBLICAN,1980,43642639,86496851,50.455755
16,"MONDALE, WALTER",DEMOCRAT,1984,37449813,92654861,40.418617
21,"REAGAN, RONALD",REPUBLICAN,1984,54166829,92654861,58.46086
1,"BUSH, GEORGE H.W.",REPUBLICAN,1988,48642640,91586825,53.110958
11,"DUKAKIS, MICHAEL",DEMOCRAT,1988,41716679,91586825,45.548777
2,"BUSH, GEORGE H.W.",REPUBLICAN,1992,38798913,104599780,37.092729
7,"CLINTON, BILL",DEMOCRAT,1992,44856747,104599780,42.88417
