# USA Presidential Election data analysis

The objective of this analysis is to gain insights in to the US presidential elections over the years

Data source: https://electionlab.mit.edu/data
https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/42MVDX

### Import libraries

In [1]:
import pandas as pd
import numpy as np

### Read data

In [2]:
df = pd.read_csv("1976-2020-president.csv")

In [3]:
df.head()

Unnamed: 0,year,state,state_po,state_fips,state_cen,state_ic,office,candidate,party_detailed,writein,candidatevotes,totalvotes,version,notes,party_simplified
0,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"CARTER, JIMMY",DEMOCRAT,False,659170,1182850,20210113,,DEMOCRAT
1,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"FORD, GERALD",REPUBLICAN,False,504070,1182850,20210113,,REPUBLICAN
2,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"MADDOX, LESTER",AMERICAN INDEPENDENT PARTY,False,9198,1182850,20210113,,OTHER
3,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"BUBAR, BENJAMIN """"BEN""""",PROHIBITION,False,6669,1182850,20210113,,OTHER
4,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"HALL, GUS",COMMUNIST PARTY USE,False,1954,1182850,20210113,,OTHER


### Initial exploration

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4287 entries, 0 to 4286
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   year              4287 non-null   int64  
 1   state             4287 non-null   object 
 2   state_po          4287 non-null   object 
 3   state_fips        4287 non-null   int64  
 4   state_cen         4287 non-null   int64  
 5   state_ic          4287 non-null   int64  
 6   office            4287 non-null   object 
 7   candidate         4000 non-null   object 
 8   party_detailed    3831 non-null   object 
 9   writein           4284 non-null   object 
 10  candidatevotes    4287 non-null   int64  
 11  totalvotes        4287 non-null   int64  
 12  version           4287 non-null   int64  
 13  notes             0 non-null      float64
 14  party_simplified  4287 non-null   object 
dtypes: float64(1), int64(7), object(7)
memory usage: 502.5+ KB


In [5]:
df.describe()

Unnamed: 0,year,state_fips,state_cen,state_ic,candidatevotes,totalvotes,version,notes
count,4287.0,4287.0,4287.0,4287.0,4287.0,4287.0,4287.0,0.0
mean,1999.080942,28.616982,53.665034,39.75484,311907.6,2366924.0,20210113.0,
std,14.220014,15.616459,26.029189,22.772216,764801.1,2465008.0,0.0,
min,1976.0,1.0,11.0,1.0,0.0,123574.0,20210113.0,
25%,1988.0,16.0,33.0,22.0,1177.0,652274.0,20210113.0,
50%,2000.0,28.0,53.0,42.0,7499.0,1569180.0,20210113.0,
75%,2012.0,41.0,81.0,61.0,199241.5,3033118.0,20210113.0,
max,2020.0,56.0,95.0,82.0,11110250.0,17500880.0,20210113.0,


In [6]:
us_df = df[(df['party_detailed']== 'DEMOCRAT') | (df['party_detailed'] == 'REPUBLICAN')]
us_df.head()

Unnamed: 0,year,state,state_po,state_fips,state_cen,state_ic,office,candidate,party_detailed,writein,candidatevotes,totalvotes,version,notes,party_simplified
0,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"CARTER, JIMMY",DEMOCRAT,False,659170,1182850,20210113,,DEMOCRAT
1,1976,ALABAMA,AL,1,63,41,US PRESIDENT,"FORD, GERALD",REPUBLICAN,False,504070,1182850,20210113,,REPUBLICAN
7,1976,ALASKA,AK,2,94,81,US PRESIDENT,"FORD, GERALD",REPUBLICAN,False,71555,123574,20210113,,REPUBLICAN
8,1976,ALASKA,AK,2,94,81,US PRESIDENT,"CARTER, JIMMY",DEMOCRAT,False,44058,123574,20210113,,DEMOCRAT
11,1976,ARIZONA,AZ,4,86,61,US PRESIDENT,"FORD, GERALD",REPUBLICAN,False,418642,742719,20210113,,REPUBLICAN


In [7]:
us_df.aggregate('candidate')

0             CARTER, JIMMY
1              FORD, GERALD
7              FORD, GERALD
8             CARTER, JIMMY
11             FORD, GERALD
               ...         
4264       TRUMP, DONALD J.
4267    BIDEN, JOSEPH R. JR
4268       TRUMP, DONALD J.
4280    BIDEN, JOSEPH R. JR
4281       TRUMP, DONALD J.
Name: candidate, Length: 1225, dtype: object

In [8]:
us_df.groupby(['candidate','year','state']).sum()#.agg('candidatevotes')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,state_fips,state_cen,state_ic,candidatevotes,totalvotes,version,notes
candidate,year,state,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"BIDEN, JOSEPH R. JR",2020,ALABAMA,1,63,41,849624,2323282,20210113,0.0
"BIDEN, JOSEPH R. JR",2020,ALASKA,2,94,81,153778,359530,20210113,0.0
"BIDEN, JOSEPH R. JR",2020,ARIZONA,4,86,61,1672143,3387326,20210113,0.0
"BIDEN, JOSEPH R. JR",2020,ARKANSAS,5,71,42,423932,1219069,20210113,0.0
"BIDEN, JOSEPH R. JR",2020,CALIFORNIA,6,93,71,11110250,17500881,20210113,0.0
...,...,...,...,...,...,...,...,...,...
"TRUMP, DONALD J.",2020,VIRGINIA,51,54,40,1962430,4460524,20210113,0.0
"TRUMP, DONALD J.",2020,WASHINGTON,53,91,73,1584651,4087631,20210113,0.0
"TRUMP, DONALD J.",2020,WEST VIRGINIA,54,55,56,545382,794652,20210113,0.0
"TRUMP, DONALD J.",2020,WISCONSIN,55,35,25,1610184,3298041,20210113,0.0


In [None]:
us_