# Clean Data for Presidential Party Wins by Election
This notebook takes in a CSV of data from a Wikipedia table and cleans it for export to CSV and use in Tableau

In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

### Import CSV File
I used the site https://wikitable2csv.ggor.de/ to convert the table of presidential wins by election from https://en.wikipedia.org/wiki/List_of_United_States_presidential_elections_by_popular_vote_margin into a CSV file

In [2]:
input_file = '../raw_data/pres_wins_by_election.csv'
df = pd.read_csv(input_file)
df.head()

Unnamed: 0,Election,Election.1,Winner & party,Winner & party.1,Electoral College,Electoral College.1,Popular vote,Popular vote.1,Popular vote.2,Popular vote.3,Runner-up & party,Runner-up & party.1,Turnout[4]
0,,,,,Votes,%,%,Margin,Votes,Margin,,,
1,1.0,1788–89,George Washington,Ind.,69/69,100.00%,100.00%,100.00%,43782,43782,No candidate,None[Note 1],11.6%
2,2.0,1792,George Washington,Ind.,132/132,100.00%,100.00%,100.00%,28579,28579,No candidate,None[Note 1],6.3%
3,3.0,1796,John Adams,Fed.,71/138,51.45%,53.45%,6.90%,35726,4611,Thomas Jefferson,D.-R.[Note 2],20.1%
4,4.0,1800,Thomas Jefferson,D.-R.,73/138,52.90%,61.43%,22.86%,41330,15378,Aaron Burr,D.-R.[Note 3],32.3%


### Remove Unnecessary Rows and Columns
Take out header rows and keep only columns containing election year, winner, and winner's party

In [3]:
df = df.iloc[1:, 1:4]
df.columns = ['Year', 'President', 'Party']
df = df.sort_values('Year').reset_index(drop=True)
df.head()

Unnamed: 0,Year,President,Party
0,1788–89,George Washington,Ind.
1,1792,George Washington,Ind.
2,1796,John Adams,Fed.
3,1800,Thomas Jefferson,D.-R.
4,1804,Thomas Jefferson,D.-R.


### Convert Year Column to Numbers

In [4]:
df.loc[0, 'Year'] = '1789'
df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
df.head()

Unnamed: 0,Year,President,Party
0,1789,George Washington,Ind.
1,1792,George Washington,Ind.
2,1796,John Adams,Fed.
3,1800,Thomas Jefferson,D.-R.
4,1804,Thomas Jefferson,D.-R.


### Convert Party Values to D, R, and O

In [5]:
df['Orig_Party'] = df['Party']
df['Party'] = 'O'
df.loc[df['Orig_Party'] == 'Dem.', 'Party'] = 'D'
df.loc[df['Orig_Party'] == 'Rep.', 'Party'] = 'R'
df.drop('Orig_Party', axis=1, inplace=True)
print(df['Party'].unique())
df.head()

['O' 'D' 'R']


Unnamed: 0,Year,President,Party
0,1789,George Washington,O
1,1792,George Washington,O
2,1796,John Adams,O
3,1800,Thomas Jefferson,O
4,1804,Thomas Jefferson,O


### Add a Column Tracking Party Flips
1 if the winning party switched from D to R or R to D since the last election, 0 if it stayed the same

In [6]:
prev_winners = df['Party'].shift(1)
winner_pairs = df['Party'] + prev_winners
df['Flip'] = winner_pairs.isin(['DR', 'RD']).astype(int)

### Export DataFrame to CSV

In [7]:
output_file = '../data/pres_wins_by_election.csv'
df.to_csv(output_file, index=False)