# This notebook downloads and prepares all election data

In [1]:
import requests

import numpy as np
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile

import xlrd
import xlwt

In [2]:
#r = requests.get('https://transition.fec.gov/pubrec/fe2016/federalelections2016.xlsx')

#with open("../raw-data/federal-election-results/federalelections2016.xlsx",'wb') as f: 
  
    # Saving received content as a png file in 
    # binary format 
  
    # write the contents of the response (r.content) 
    # to a new file in binary mode. 
 #   f.write(r.content) 

In [3]:
house_by_dist = pd.read_excel("../raw-data/federal-election-results/federalelections2016.xlsx", sheet_name='2016 US House Results by State')
house_by_dist.head()

Unnamed: 0,1,STATE ABBREVIATION,STATE,D,FEC ID#,(I),CANDIDATE NAME (First),CANDIDATE NAME (Last),CANDIDATE NAME,TOTAL VOTES,...,RUNOFF VOTES,RUNOFF %,GENERAL VOTES,GENERAL %,GE RUNOFF ELECTION VOTES (LA),GE RUNOFF ELECTION % (LA),"COMBINED GE PARTY TOTALS (CT, NY, SC)","COMBINED % (CT, NY, SC)",GE WINNER INDICATOR,FOOTNOTES
0,2,,,,,,,,,,...,,,,,,,,,,
1,3,AL,Alabama,,,,,,,,...,,,,,,,,,,
2,4,AL,Alabama,1.0,H4AL01123,(I),Bradley,Byrne,"Byrne, Bradley",,...,,,208083.0,0.963825,,,,,W,
3,5,AL,Alabama,1.0,H6AL01060,,Dean,Young,"Young, Dean",,...,,,,,,,,,,
4,6,AL,Alabama,1.0,,,,,,Party Votes:,...,,,,,,,,,,


In [4]:
house_by_dist.rename(columns={1:'row_excel',
                              'TOTAL VOTES':'TOTAL_VOTES',
                              'GENERAL VOTES ':'GENERAL_VOTES',
                              'FEC ID#':'FEC_ID',
                              'STATE ABBREVIATION':'STATE_ABBREVIATION'}
                              , inplace=True)
list(house_by_dist.columns.values)

['row_excel',
 'STATE_ABBREVIATION',
 'STATE',
 'D',
 'FEC_ID',
 '(I)',
 'CANDIDATE NAME (First)',
 'CANDIDATE NAME (Last)',
 'CANDIDATE NAME',
 'TOTAL_VOTES',
 'PARTY',
 'PRIMARY VOTES',
 'PRIMARY %',
 'RUNOFF VOTES',
 'RUNOFF %',
 'GENERAL_VOTES',
 'GENERAL %',
 'GE RUNOFF ELECTION VOTES (LA)',
 'GE RUNOFF ELECTION % (LA)',
 'COMBINED GE PARTY TOTALS (CT, NY, SC)',
 'COMBINED % (CT, NY, SC)',
 'GE WINNER INDICATOR',
 'FOOTNOTES']

In [5]:
#Typo in District number for these two candidates. Double checked with 
# https://transition.fec.gov/pubrec/fe2016/federalelections2016.pdf page 165
house_by_dist.loc[(house_by_dist.FEC_ID == "H6SC05137") , 'D'] = "05"
house_by_dist.loc[(house_by_dist.FEC_ID == "H6SC05145") , 'D'] = "05"

#Typo in District number for these two candidates. Double checked with 
# https://transition.fec.gov/pubrec/fe2016/federalelections2016.pdf page 175
house_by_dist.loc[(house_by_dist.FEC_ID == "H8UT03089") , 'D'] = "03"
house_by_dist.loc[(house_by_dist.FEC_ID == "H6UT03166") , 'D'] = "03"

#Look into what is best for unexpired distircts
# 1 distrinct in HI and one in KY
house_by_dist.loc[(house_by_dist.D == "01 - FULL TERM") , 'D'] = "01"
house_by_dist = house_by_dist[house_by_dist.D != "01 - UNEXPIRED TERM"]
house_by_dist = house_by_dist[house_by_dist.D != "1 - UNEXPIRED TERM"]

# 1 distrinct in PA
house_by_dist.loc[(house_by_dist.D == "02 - FULL TERM") , 'D'] = "02"
house_by_dist = house_by_dist[house_by_dist.D != "02 - UNEXPIRED TERM"]

#Line used to check result
#house_by_dist.loc[house_by_dist.row_excel > 3736].head(100)

In [6]:
house_by_dist["D_ID"] = house_by_dist.STATE_ABBREVIATION + "_" + house_by_dist.D.map(str)
house_by_dist.D_ID = house_by_dist.D_ID.str.strip()

In [7]:
house_by_dist = house_by_dist[['row_excel',"STATE_ABBREVIATION",'D_ID','PARTY','TOTAL_VOTES','GENERAL_VOTES',"D"]]

house_by_dist.head()

Unnamed: 0,row_excel,STATE_ABBREVIATION,D_ID,PARTY,TOTAL_VOTES,GENERAL_VOTES,D
0,2,,,,,,
1,3,AL,AL_nan,,,,
2,4,AL,AL_01,R,,208083.0,1.0
3,5,AL,AL_01,R,,,1.0
4,6,AL,AL_01,R,Party Votes:,,1.0


In [8]:
print(house_by_dist.STATE_ABBREVIATION.unique())

[nan 'AL' 'AK' 'AS' 'AZ' 'AR' 'CA' 'CO' 'CT' 'DE' 'DC' 'FL' 'GA' 'GU' 'HI'
 'ID' 'IL' 'IN' 'IA' 'KS' 'KY' 'LA' 'ME' 'MD' 'MA' 'MI' 'MN' 'MS' 'MO'
 'MT' 'NE' 'NV' 'NH' 'NJ' 'NM' 'NY' 'NC' 'ND' 'MP' 'OH' 'OK' 'OR' 'PA'
 'PR' 'RI' 'SC' 'SD' 'TN' 'TX' 'UT' 'VT' 'VA' 'VI' 'WA' 'WV' 'WI' 'WY']


In [9]:
state_nan = house_by_dist[(house_by_dist.STATE_ABBREVIATION.isnull())]
state_nan.head()
#state_nan.head(500)

Unnamed: 0,row_excel,STATE_ABBREVIATION,D_ID,PARTY,TOTAL_VOTES,GENERAL_VOTES,D
0,2,,,,,,
42,44,,,,,,
45,47,,,,,,
64,66,,,,,,
67,69,,,,,,


In [10]:
print("Shape before dropping missing state abb: ",house_by_dist.shape)
house_by_dist = house_by_dist.dropna(subset=['STATE_ABBREVIATION'])
print("Shape after dropping missing state abb: ",house_by_dist.shape)

Shape before dropping missing state abb:  (4109, 7)
Shape after dropping missing state abb:  (4047, 7)


In [11]:
print(house_by_dist.D.unique())

[nan '01' '02' '03' '04' '05' '06' '07' 'H' '00' '08' '09' '10' '11' '12'
 '13' '14' '15' '16' '17' '18' '19' '20' '21' '22' '23' '24' '25' '26'
 '27' '28' '29' '30' '31' '32' '33' '34' '35' '36' '37' '38' '39' '40'
 '41' '42' '43' '44' '45' '46' '47' '48' '49' '50' '51' '52' '53' '04 ' 10
 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27]


In [12]:
D_nan = house_by_dist[(house_by_dist.D.isnull())]
D_nan.head()
#D_nan.head(500)

Unnamed: 0,row_excel,STATE_ABBREVIATION,D_ID,PARTY,TOTAL_VOTES,GENERAL_VOTES,D
1,3,AL,AL_nan,,,,
7,9,AL,AL_nan,,,,
15,17,AL,AL_nan,,,,
22,24,AL,AL_nan,,,,
28,30,AL,AL_nan,,,,


In [13]:
print("Shape before dropping missing district: ",house_by_dist.shape)
house_by_dist = house_by_dist.dropna(subset=['D'])
print("Shape after dropping missing district: ",house_by_dist.shape)

Shape before dropping missing district:  (4047, 7)
Shape after dropping missing district:  (3949, 7)


In [14]:
# H is a row with state total votes
D_H = house_by_dist[(house_by_dist.D == "H")]
D_H.head()
#D_H.head(500)

Unnamed: 0,row_excel,STATE_ABBREVIATION,D_ID,PARTY,TOTAL_VOTES,GENERAL_VOTES,D
43,45,AL,AL_H,,Total State Votes:,1889685,H
65,67,AK,AK_H,,Total State Votes:,308198,H
74,76,AS,AS_H,,Total State Votes:,11834,H
178,180,AZ,AZ_H,,Total State Votes:,2412064,H
202,204,AR,AR_H,,Total State Votes:,1068577,H


In [15]:
print("Shape before dropping state wide total voter number row: ",house_by_dist.shape)
house_by_dist = house_by_dist[(house_by_dist["D"] != "H")]
print("Shape after dropping state wide total voter number row: ",house_by_dist.shape)

Shape before dropping state wide total voter number row:  (3949, 7)
Shape after dropping state wide total voter number row:  (3893, 7)


In [16]:
# Remove D, use only D_ID which is unique across states form now on
house_by_dist = house_by_dist.drop(['D',], axis=1)


In [17]:
# Other aggregate rows. Party aggregates were only applicable in primary elections
votes_party_agg = house_by_dist[(house_by_dist.TOTAL_VOTES == "Party Votes:")]
votes_party_agg.head()
#votes_party_agg.head(500)

Unnamed: 0,row_excel,STATE_ABBREVIATION,D_ID,PARTY,TOTAL_VOTES,GENERAL_VOTES
4,6,AL,AL_01,R,Party Votes:,
11,13,AL,AL_02,R,Party Votes:,
18,20,AL,AL_03,R,Party Votes:,
25,27,AL,AL_04,R,Party Votes:,
51,53,AK,AK_00,R,Party Votes:,


In [18]:
print("Shape before dropping party primary total voter number row: ",house_by_dist.shape)
house_by_dist = house_by_dist[(house_by_dist.TOTAL_VOTES != "Party Votes:")]
print("Shape after dropping party primary total voter number row: ",house_by_dist.shape)

Shape before dropping party primary total voter number row:  (3893, 6)
Shape after dropping party primary total voter number row:  (3276, 6)


In [19]:
# Other aggregate rows. PDistrict votes showing the total number of voters in that district
votes_dist_agg = house_by_dist[(house_by_dist.TOTAL_VOTES == "District Votes:")]
votes_dist_agg.head()
#votes_dist_agg.head(500)

Unnamed: 0,row_excel,STATE_ABBREVIATION,D_ID,PARTY,TOTAL_VOTES,GENERAL_VOTES
6,8,AL,AL_01,,District Votes:,215893
14,16,AL,AL_02,,District Votes:,276584
21,23,AL,AL_03,,District Votes:,287104
27,29,AL,AL_04,,District Votes:,239444
32,34,AL,AL_05,,District Votes:,308326


In [20]:
print("Shape before dropping district total voter number row: ",house_by_dist.shape)
house_by_dist = house_by_dist[(house_by_dist.TOTAL_VOTES != "District Votes:")]
print("Shape afer dropping district total voter number row: ",house_by_dist.shape)

Shape before dropping district total voter number row:  (3276, 6)
Shape afer dropping district total voter number row:  (2837, 6)


In [21]:
# Remove TOTAL_VOTES
house_by_dist = house_by_dist.drop(['TOTAL_VOTES'], axis=1)

In [22]:
votes_dist_agg.rename(columns={'GENERAL_VOTES':'DISTRICT_VOTES'}, inplace=True)
votes_dist_agg = votes_dist_agg[['D_ID','DISTRICT_VOTES']]
votes_dist_agg.head()

Unnamed: 0,D_ID,DISTRICT_VOTES
6,AL_01,215893
14,AL_02,276584
21,AL_03,287104
27,AL_04,239444
32,AL_05,308326


In [23]:
house_by_dist = pd.merge(house_by_dist, votes_dist_agg, on='D_ID')

In [24]:
print(house_by_dist.PARTY.unique())

['R' 'W' 'D' 'LIB' 'NAF' 'IND' nan 'W(GRE)/GRE' 'W(LIB)' 'W(GRE)' 'W(D)'
 'NOP' 'GRE' 'W(R)/R' 'PAF' 'W(NOP)' 'LIB  ' 'WF' 'IP' 'R/W' 'DCG'
 'W(DCG)' 'W(R)' 'LBF' 'NPA' 'R   ' 'R  ' 'W(IP)' 'N' 'CON' 'NNE' 'R\xa0'
 'D\xa0' 'OTH' 'W(IND)' 'U' 'UST' 'W(D)/D' 'NLP' 'WC' 'W ' 'DFL' 'IDP'
 'LMN' 'REF' 'VPA' 'NPY' 'IAP' 'WDB' 'AO' 'MGW' 'RNN' 'PIP' 'FPR' 'EG'
 'WUA' 'NSA' 'WOP' 'NBP' 'FI' 'LMP' 'TED' 'WTP' 'CRV' 'WEP' 'R/TRP' 'BLM'
 'HBP' 'SID' 'TGP' 'PCC' 'UPJ' 'DNL' 'W(DNL)' 'D/IP' 'R/IP' 'IP/R' 'PRO'
 'D/PRO/WF/IP' 'R/CON' 'PG' 'W(D)/W' 'AM' 'UN' 'D/R' 'LBU' 'W(PRO)' 'INP'
 'WRN' 'TC' 'W(WG)' 'W(CON)']


In [25]:
# Empty parties is at this point just empty rows
PARTY_nan = house_by_dist[(house_by_dist.PARTY.isnull())]
PARTY_nan.head()
#PARTY_nan.head(500)

Unnamed: 0,row_excel,STATE_ABBREVIATION,D_ID,PARTY,GENERAL_VOTES,DISTRICT_VOTES
41,77,AS,AS_00,,,11834
113,215,CA,CA_01,,,314036
119,222,CA,CA_02,,,330766
123,227,CA,CA_03,,,256966
127,232,CA,CA_04,,,350978


In [26]:
print("Shape before dopping missing party: ",house_by_dist.shape)
house_by_dist = house_by_dist.dropna(subset=['PARTY'])
print("Shape after dopping missing party: ",house_by_dist.shape)

Shape before dopping missing party:  (2809, 6)
Shape after dopping missing party:  (2391, 6)


In [27]:
# Remove excessive spaces , for example turn " R" into "R", otherwise they are considered different
house_by_dist.PARTY = house_by_dist.PARTY.str.strip()

#Parties to not consider
house_by_dist["PARTY_VALID"] = house_by_dist.PARTY


In [28]:
#Standardizing valid party abbreviations. Either from typo, format, or unambigious write-ins.

house_by_dist.loc[house_by_dist.PARTY == "W(GRE)"    , 'PARTY_VALID'] = "GRE"
house_by_dist.loc[house_by_dist.PARTY == "W(GRE)/GRE", 'PARTY_VALID'] = "GRE"

house_by_dist.loc[house_by_dist.PARTY == "W(LIB)"    , 'PARTY_VALID'] = "LIB"

house_by_dist.loc[house_by_dist.PARTY == "W(D)"      , 'PARTY_VALID'] = "D"
house_by_dist.loc[house_by_dist.PARTY == "W(D)/D"    , 'PARTY_VALID'] = "D"
house_by_dist.loc[house_by_dist.PARTY == "D\xa0"     , 'PARTY_VALID'] = "D"

house_by_dist.loc[house_by_dist.PARTY == "W(R)"      , 'PARTY_VALID'] = "R"
house_by_dist.loc[house_by_dist.PARTY == "W(R)/R"    , 'PARTY_VALID'] = "R"
house_by_dist.loc[house_by_dist.PARTY == "R\xa0"     , 'PARTY_VALID'] = "R"

house_by_dist.loc[house_by_dist.PARTY == "W(DNL)"    , 'PARTY_VALID'] = "DNL"

house_by_dist.loc[house_by_dist.PARTY == "W(PRO)"    , 'PARTY_VALID'] = "PRO"

house_by_dist.loc[house_by_dist.PARTY == "W(WG)"     , 'PARTY_VALID'] = "WG"

house_by_dist.loc[house_by_dist.PARTY == "W(IP)"     , 'PARTY_VALID'] = "IP"

house_by_dist.loc[house_by_dist.PARTY == "W(DCG)"    , 'PARTY_VALID'] = "DCG"

house_by_dist.loc[house_by_dist.PARTY == "W(CON)"    , 'PARTY_VALID'] = "CON"

house_by_dist.loc[house_by_dist.PARTY == "W(NPP)"    , 'PARTY_VALID'] = "NPP"

house_by_dist.loc[house_by_dist.PARTY == "W(PPD)"    , 'PARTY_VALID'] = "PPD"

#Print categories after standardization
print(sorted(house_by_dist.PARTY_VALID.unique()))

['AM', 'AO', 'BLM', 'CON', 'CRV', 'D', 'D/IP', 'D/PRO/WF/IP', 'D/R', 'DCG', 'DFL', 'DNL', 'EG', 'FI', 'FPR', 'GRE', 'HBP', 'IAP', 'IDP', 'IND', 'INP', 'IP', 'IP/R', 'LBF', 'LBU', 'LIB', 'LMN', 'LMP', 'MGW', 'N', 'NAF', 'NBP', 'NLP', 'NNE', 'NOP', 'NPA', 'NPY', 'NSA', 'OTH', 'PAF', 'PCC', 'PG', 'PIP', 'PRO', 'R', 'R/CON', 'R/IP', 'R/TRP', 'R/W', 'REF', 'RNN', 'SID', 'TC', 'TED', 'TGP', 'U', 'UN', 'UPJ', 'UST', 'VPA', 'W', 'W(D)/W', 'W(IND)', 'W(NOP)', 'WC', 'WDB', 'WEP', 'WF', 'WG', 'WOP', 'WRN', 'WTP', 'WUA']


In [29]:
#Remove party names from non-valid party names

# NOP - No Party Affiliance
house_by_dist.loc[house_by_dist.PARTY == "NOP"    , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "W(NOP)" , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "NAF"    , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "NNE"    , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "NPA"    , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "NPY"    , 'PARTY_VALID'] = "no-party"

# OTH - Other
house_by_dist.loc[house_by_dist.PARTY == "OTH"    , 'PARTY_VALID'] = "no-party"
# IND independent
house_by_dist.loc[house_by_dist.PARTY == "IND"    , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "INP"    , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "W(IND)" , 'PARTY_VALID'] = "no-party"

#Ambigious
house_by_dist.loc[house_by_dist.PARTY == "D/IP"        , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "D/PRO/WF/IP" , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "D/R"         , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "IP/R"        , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "R/CON"       , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "R/IP"        , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "R/TRP"       , 'PARTY_VALID'] = "no-party"

# Un enrolloed
house_by_dist.loc[house_by_dist.PARTY == "U"      , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "UN"     , 'PARTY_VALID'] = "no-party"

#Write ins
# W - unspecified write ins
house_by_dist.loc[house_by_dist.PARTY == "W"      , 'PARTY_VALID'] = "no-party"
# Ambigious write ins 
#  - W(D)/W, R/W
house_by_dist.loc[house_by_dist.PARTY == "W(D)/W" , 'PARTY_VALID'] = "no-party"
house_by_dist.loc[house_by_dist.PARTY == "R/W"    , 'PARTY_VALID'] = "no-party"

#Print categories after removind invaid party names
print(sorted(house_by_dist.PARTY_VALID.unique()))

['AM', 'AO', 'BLM', 'CON', 'CRV', 'D', 'DCG', 'DFL', 'DNL', 'EG', 'FI', 'FPR', 'GRE', 'HBP', 'IAP', 'IDP', 'IP', 'LBF', 'LBU', 'LIB', 'LMN', 'LMP', 'MGW', 'N', 'NBP', 'NLP', 'NSA', 'PAF', 'PCC', 'PG', 'PIP', 'PRO', 'R', 'REF', 'RNN', 'SID', 'TC', 'TED', 'TGP', 'UPJ', 'UST', 'VPA', 'WC', 'WDB', 'WEP', 'WF', 'WG', 'WOP', 'WRN', 'WTP', 'WUA', 'no-party']


In [30]:
votes_nan = house_by_dist[(house_by_dist.GENERAL_VOTES.isnull())]
votes_nan.head()
#votes_nan.head(500)

Unnamed: 0,row_excel,STATE_ABBREVIATION,D_ID,PARTY,GENERAL_VOTES,DISTRICT_VOTES,PARTY_VALID
1,5,AL,AL_01,R,,215893,R
4,11,AL,AL_02,R,,276584,R
5,12,AL,AL_02,R,,276584,R
9,19,AL,AL_03,R,,287104,R
13,26,AL,AL_04,R,,239444,R


In [31]:
#Drop obs with no general votes. I.e. candidates that were eliminated in primaries

print("Shape before dopping missing vote number: ",house_by_dist.shape)
house_by_dist = house_by_dist.dropna(subset=['GENERAL_VOTES'])
print("Shape after dopping missing vote number: ",house_by_dist.shape)

Shape before dopping missing vote number:  (2391, 7)
Shape after dopping missing vote number:  (1441, 7)


In [32]:
#Calculate check sum to make sure district vote is correct
control_sum_series = house_by_dist['GENERAL_VOTES'].groupby([house_by_dist.D_ID]).sum()

control_sum = pd.DataFrame({'D_ID':control_sum_series.index, 
                            'DISTRICT_VOTES_ctrl':control_sum_series.values})

house_by_dist = pd.merge(house_by_dist, control_sum, on='D_ID')

In [33]:
#Use this line to explore district by district
house_by_dist.loc[house_by_dist.D_ID == "AR_02"].head(100)

Unnamed: 0,row_excel,STATE_ABBREVIATION,D_ID,PARTY,GENERAL_VOTES,DISTRICT_VOTES,PARTY_VALID,DISTRICT_VOTES_ctrl
56,188,AR,AR_02,R,176472,302464,R,302464
57,191,AR,AR_02,D,111347,302464,D,302464
58,192,AR,AR_02,LIB,14342,302464,LIB,302464
59,193,AR,AR_02,W,303,302464,no-party,302464


In [34]:
#List observation where control sum is incorrect
house_by_dist[house_by_dist.DISTRICT_VOTES != house_by_dist.DISTRICT_VOTES_ctrl].head(100)

Unnamed: 0,row_excel,STATE_ABBREVIATION,D_ID,PARTY,GENERAL_VOTES,DISTRICT_VOTES,PARTY_VALID,DISTRICT_VOTES_ctrl
295,833,FL,FL_24,D,Unopposed,,D,Unopposed
1041,2894,OK,OK_01,R,Unopposed,,R,Unopposed


In [35]:
# Remove Control value
house_by_dist = house_by_dist.drop(['DISTRICT_VOTES_ctrl','row_excel'], axis=1)
house_by_dist.head()

Unnamed: 0,STATE_ABBREVIATION,D_ID,PARTY,GENERAL_VOTES,DISTRICT_VOTES,PARTY_VALID
0,AL,AL_01,R,208083,215893,R
1,AL,AL_01,W,7810,215893,no-party
2,AL,AL_02,R,134886,276584,R
3,AL,AL_02,D,112089,276584,D
4,AL,AL_02,W,29609,276584,no-party


In [36]:
print(house_by_dist.D_ID.unique().size)

439


In [37]:
#Rename columns to final names
house_by_dist.rename(columns={'STATE_ABBREVIATION':'STATE',
                              'GENERAL_VOTES':'VOTES',
                              'DISTRICT_VOTES':'ALL_VOTES_DISTRICT'}, inplace=True)

#Re-order columns to final order
house_by_dist = house_by_dist[['STATE','D_ID','PARTY','PARTY_VALID','VOTES','ALL_VOTES_DISTRICT']]

#Save file
house_by_dist.to_csv('../cleaned-data/house-elections-cleaned.csv')