## Part 1: Importing the Voting Data 

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Import the voting data and make sure to include the column names in their proper place
pres2004 = pd.read_csv('2004_pres.csv', header=2)
pres2008 = pd.read_csv('2008_pres.csv', header=2)
pres2012 = pd.read_csv('2012_pres.csv', header=2)
pres2016 = pd.read_csv('2016_pres.csv', header=2)
nat = pd.read_csv('2004_2016_nat.csv', header=2)
print(pres2004.shape)
print(pres2008.shape)
print(pres2012.shape)
print(pres2016.shape)
print(nat.shape)

(3169, 29)
(3168, 29)
(3169, 29)
(3171, 29)
(234, 30)


## Part 2A: Cleaning Up the Voting Data 

In [3]:
#Concatenate the dataframes together as pres and nat, respectively, and compare their columns
pres = pd.concat([pres2004,pres2008,pres2012,pres2016])
print(list(pres.columns))
print()
print(list(nat.columns))

['Office', 'State', 'RaceDate', 'CensusPop', 'Area', 'RedistrictedDate', 'TotalVotes', 'RepVotes', 'RepCandidate', 'RepStatus', 'DemVotes', 'DemCandidate', 'DemStatus', 'ThirdParty', 'ThirdVotes', 'ThirdCandidate', 'ThirdStatus', 'OtherVotes', 'PluralityVotes', 'PluralityParty', 'RepVotesTotalPercent', 'DemVotesTotalPercent', 'ThirdVotesTotalPercent', 'OtherVotesTotalPercent', 'RepVotesMajorPercent', 'DemVotesMajorPercent', 'RaceNotes', 'TitleNotes', 'OtherNotes']

['Office', 'RaceDate', 'Area', 'ElectoralRepVotes', 'ElectoralDemVotes', 'ElectoralOtherVotes', 'TotalVotes', 'RepVotes', 'RepCandidate', 'RepStatus', 'DemVotes', 'DemCandidate', 'DemStatus', 'ThirdParty', 'ThirdVotes', 'ThirdCandidate', 'ThirdStatus', 'OtherVotes', 'OtherDetail', 'PluralityVotes', 'PluralityParty', 'RepVotesTotalPercent', 'DemVotesTotalPercent', 'ThirdVotesTotalPercent', 'OtherVotesTotalPercent', 'RepVotesMajorPercent', 'DemVotesMajorPercent', 'RaceNotes', 'PopularRaceNotes', 'ElectoralRaceNotes']


In [4]:
nat.head()

Unnamed: 0,Office,RaceDate,Area,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes,TotalVotes,RepVotes,RepCandidate,RepStatus,...,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,OtherVotesTotalPercent,RepVotesMajorPercent,DemVotesMajorPercent,RaceNotes,PopularRaceNotes,ElectoralRaceNotes
0,President,20161108,Alabama,9,0,0,2123372,1318255,"Trump, Donald J.",Challenger,...,R,62.08,34.36,2.09,1.46,64.37,35.63,,,
1,President,20161108,Alaska,3,0,0,318608,163387,"Trump, Donald J.",Challenger,...,R,51.28,36.55,5.88,6.29,58.39,41.61,,,
2,President,20161108,Arizona,11,0,0,2573165,1252401,"Trump, Donald J.",Challenger,...,R,48.67,45.13,4.13,2.07,51.89,48.11,,,
3,President,20161108,Arkansas,6,0,0,1130676,684872,"Trump, Donald J.",Challenger,...,R,60.57,33.65,2.65,3.13,64.29,35.71,,,
4,President,20161108,California,0,55,0,14181595,4483810,"Trump, Donald J.",Challenger,...,D,31.62,61.73,3.37,3.28,33.87,66.13,,,


In [5]:
#Slice the RaceDate, Area, ElectoralRepVotes, ElectoralDemVotes, and ElectoralOtherVotes in nat as part of a new 
#dataframe that will be referenced to later
elec = nat.iloc[:,[1,2,3,4,5]]
list(elec.Area.unique())

['Alabama',
 'Alaska',
 'Arizona',
 'Arkansas',
 'California',
 'Colorado',
 'Connecticut',
 'Delaware',
 'District of Columbia',
 'Florida',
 'Georgia',
 'Hawaii',
 'Idaho',
 'Illinois',
 'Indiana',
 'Iowa',
 'Maine',
 'Massachusetts',
 'Michigan',
 'Kansas',
 'Kentucky',
 'Louisiana',
 'Maryland',
 'Minnesota',
 'Mississippi',
 'Missouri',
 'Montana',
 'Nebraska',
 'Nevada',
 'New Hampshire',
 'New Jersey',
 'New Mexico',
 'New York',
 'North Carolina',
 'North Dakota',
 'Ohio',
 'Oklahoma',
 'Oregon',
 'Pennsylvania',
 'Rhode Island',
 'South Carolina',
 'South Dakota',
 'Tennessee',
 'Texas',
 'Utah',
 'Vermont',
 'Virginia',
 'Washington',
 'West Virginia',
 'Wisconsin',
 'Wyoming',
 nan,
 'ElectoralDemVotesAll',
 '227',
 'Area',
 '332',
 '365',
 '251']

In [6]:
#List all of the states (including the District of Columbia) and slice elec to include only those under the Area
#column
states = ['Alabama','Alaska','Arizona','Arkansas','California',
          'Colorado','Connecticut','Delaware','District of Columbia','Florida',
          'Georgia','Hawaii','Idaho','Illinois','Indiana',
          'Iowa','Kansas','Kentucky','Louisiana','Maine',
          'Maryland','Massachusetts','Michigan','Minnesota','Mississippi',
          'Missouri','Montana','Nebraska','Nevada','New Hampshire',
          'New Jersey','New Mexico','New York','North Carolina','North Dakota',
          'Ohio','Oklahoma','Oregon','Pennsylvania','Rhode Island',
          'South Carolina','South Dakota','Tennessee','Texas','Utah',
          'Vermont','Virginia','Washington','West Virginia','Wisconsin','Wyoming']
elec = elec.loc[(elec['Area'].isin(states))]
elec.head()

Unnamed: 0,RaceDate,Area,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
0,20161108,Alabama,9,0,0
1,20161108,Alaska,3,0,0
2,20161108,Arizona,11,0,0
3,20161108,Arkansas,6,0,0
4,20161108,California,0,55,0


In [7]:
#For simplicity purposes, drop the RaceDate column and replace it with a Year one.  Then sort elec in chronological 
#and alphabetical order, and as a convenience reset its index numbers
elec['RaceDate'] = pd.to_datetime(elec['RaceDate'], format='%Y%m%d')
elec['Year'] = elec['RaceDate'].dt.year
elec = elec.drop(['RaceDate'], axis=1)
elec = elec.sort_values(by=['Year','Area'])
elec = elec.set_index('Year')
elec = elec.reset_index(drop=False)
elec.head()

Unnamed: 0,Year,Area,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
0,2004,Alabama,9,0,0
1,2004,Alaska,3,0,0
2,2004,Arizona,10,0,0
3,2004,Arkansas,6,0,0
4,2004,California,0,55,0


In [8]:
elec.dtypes

Year                    int64
Area                   object
ElectoralRepVotes      object
ElectoralDemVotes      object
ElectoralOtherVotes    object
dtype: object

In [9]:
#Convert the last three columns into integers
elec.iloc[:,2:] = elec.iloc[:,2:].astype(int)
elec.head()

Unnamed: 0,Year,Area,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
0,2004,Alabama,9,0,0
1,2004,Alaska,3,0,0
2,2004,Arizona,10,0,0
3,2004,Arkansas,6,0,0
4,2004,California,0,55,0


## Part 2B: Fixing Alaska and DC in the Voting Data 

In [10]:
#Check the unique values included for Alaska and the District of Columbia
ak = pres.loc[(pres.State == 'Alaska')]
print(list(ak.Area.unique()))
print()
dc = pres.loc[(pres.State == 'District of Columbia')]
print(list(dc.Area.unique()))

['ABSENTEE', 'Election District 1', 'Election District 10', 'Election District 11', 'Election District 12', 'Election District 13', 'Election District 14', 'Election District 15', 'Election District 16', 'Election District 17', 'Election District 18', 'Election District 19', 'Election District 2', 'Election District 20', 'Election District 21', 'Election District 22', 'Election District 23', 'Election District 24', 'Election District 25', 'Election District 26', 'Election District 27', 'Election District 28', 'Election District 29', 'Election District 3', 'Election District 30', 'Election District 31', 'Election District 32', 'Election District 33', 'Election District 34', 'Election District 35', 'Election District 36', 'Election District 37', 'Election District 38', 'Election District 39', 'Election District 4', 'Election District 40', 'Election District 5', 'Election District 6', 'Election District 7', 'Election District 8', 'Election District 9', 'FEDERAL ABSENTEE']

['Federal Ballo

In [11]:
#Since the above two states are missing or do not have counties, slice nat to include only these.  Similar as 
#before, replace the RaceDate column with a Year one, and drop the columns listed below
nat = nat.loc[(nat.Area == 'Alaska') | (nat.Area == 'District of Columbia')]
nat['RaceDate'] = pd.to_datetime(nat['RaceDate'], format='%Y%m%d')
nat['Year'] = nat['RaceDate'].dt.year
nat = nat.drop(['Office','RaceDate','ElectoralRepVotes', 'ElectoralDemVotes', 'ElectoralOtherVotes','RepStatus',
                'DemStatus','ThirdParty','ThirdCandidate','ThirdStatus','RepVotesMajorPercent', 
                'DemVotesMajorPercent','OtherDetail','RaceNotes','PopularRaceNotes','ElectoralRaceNotes'], axis=1)
nat.head()

Unnamed: 0,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,OtherVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,OtherVotesTotalPercent,Year
1,Alaska,318608,163387,"Trump, Donald J.",116454,"Clinton, Hillary Rodham",18725.0,20042,46933,R,51.28,36.55,5.88,6.29,2016
8,District of Columbia,311268,12723,"Trump, Donald J.",282830,"Clinton, Hillary Rodham",4906.0,10809,270107,D,4.09,90.86,1.58,3.47,2016
61,Alaska,300495,164676,"Romney, W. Mitt",122640,"Obama, Barack H.",7392.0,5787,42036,R,54.8,40.81,2.46,1.93,2012
68,District of Columbia,293764,21381,"Romney, W. Mitt",267070,"Obama, Barack H.",2083.0,3230,245689,D,7.28,90.91,0.71,1.1,2012
121,Alaska,326197,193841,"McCain, John S. III",123594,"Obama, Barack H.",,8762,70247,R,59.42,37.89,0.0,2.69,2008


In [12]:
#Like with elec, sort nat in chronological and alphabetical order and reset its index numbers.  At the same time, add
#a State column that for now is the same as the Area one, and rearrange the order of nat's column in order to read 
#the data more easily
nat = nat.sort_values(by=['Year','Area'])
nat = nat.set_index('Year')
nat = nat.reset_index(drop=False)
nat['State'] = nat['Area']
nat = nat[['Year','State','Area','TotalVotes','RepVotes','RepCandidate','DemVotes','DemCandidate','ThirdVotes',
           'OtherVotes','PluralityVotes','PluralityParty','RepVotesTotalPercent','DemVotesTotalPercent',
           'ThirdVotesTotalPercent','OtherVotesTotalPercent']]
nat.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,OtherVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,OtherVotesTotalPercent
0,2004,Alaska,Alaska,312598,190889,"Bush, George W.",111025,"Kerry, John F.",,10684,79864,R,61.07,35.52,0.0,3.42
1,2004,District of Columbia,District of Columbia,227586,21256,"Bush, George W.",202970,"Kerry, John F.",,3360,181714,D,9.34,89.18,0.0,1.48
2,2008,Alaska,Alaska,326197,193841,"McCain, John S. III",123594,"Obama, Barack H.",,8762,70247,R,59.42,37.89,0.0,2.69
3,2008,District of Columbia,District of Columbia,265853,17367,"McCain, John S. III",245800,"Obama, Barack H.",,2686,228433,D,6.53,92.46,0.0,1.01
4,2012,Alaska,Alaska,300495,164676,"Romney, W. Mitt",122640,"Obama, Barack H.",7392.0,5787,42036,R,54.8,40.81,2.46,1.93


In [13]:
#Slice pres to include only the states under its Area column.  After replacing the RaceDate column with a Year one,
#drop the columns listed below.  Similarly as earlier, sort pres in chronological and alphabetical order and reset its 
#index numbers
pres = pres.loc[(pres['State'].isin(states))]
pres['RaceDate'] = pd.to_datetime(pres['RaceDate'], format='%Y%m%d')
pres['Year'] = pres['RaceDate'].dt.year
pres = pres.drop(['Office','RaceDate','CensusPop','RedistrictedDate','RepStatus','DemStatus','ThirdParty',
                  'ThirdCandidate','ThirdStatus','RepVotesMajorPercent', 'DemVotesMajorPercent','RaceNotes',
                  'TitleNotes','OtherNotes'], axis=1)
pres = pres.sort_values(by=['Year','State'])
pres = pres.set_index('Year')
pres = pres.reset_index(drop=False)
pres.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,OtherVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,OtherVotesTotalPercent
0,2004,Alabama,AUTAUGA,20081,15196,"Bush, George W.",4758,"Kerry, John F.",,127,10438,R,75.67,23.69,0.0,0.63
1,2004,Alabama,BALDWIN,69320,52971,"Bush, George W.",15599,"Kerry, John F.",,750,37372,R,76.42,22.5,0.0,1.08
2,2004,Alabama,BARBOUR,10777,5899,"Bush, George W.",4832,"Kerry, John F.",,46,1067,R,54.74,44.84,0.0,0.43
3,2004,Alabama,BIBB,7600,5472,"Bush, George W.",2089,"Kerry, John F.",,39,3383,R,72.0,27.49,0.0,0.51
4,2004,Alabama,BLOUNT,21504,17386,"Bush, George W.",3938,"Kerry, John F.",,180,13448,R,80.85,18.31,0.0,0.84


In [14]:
#This is a check to ensure that pres and nat have the same columns
print(pres.shape)
print()
print(list(pres.columns))
print()
print(nat.shape)
print()
print(list(nat.columns))

(12665, 16)

['Year', 'State', 'Area', 'TotalVotes', 'RepVotes', 'RepCandidate', 'DemVotes', 'DemCandidate', 'ThirdVotes', 'OtherVotes', 'PluralityVotes', 'PluralityParty', 'RepVotesTotalPercent', 'DemVotesTotalPercent', 'ThirdVotesTotalPercent', 'OtherVotesTotalPercent']

(8, 16)

['Year', 'State', 'Area', 'TotalVotes', 'RepVotes', 'RepCandidate', 'DemVotes', 'DemCandidate', 'ThirdVotes', 'OtherVotes', 'PluralityVotes', 'PluralityParty', 'RepVotesTotalPercent', 'DemVotesTotalPercent', 'ThirdVotesTotalPercent', 'OtherVotesTotalPercent']


In [15]:
#List the individual counties included under the Area column in pres
print(list(pres.Area.unique()))

['AUTAUGA', 'BALDWIN', 'BARBOUR', 'BIBB', 'BLOUNT', 'BULLOCK', 'BUTLER', 'CALHOUN', 'CHAMBERS', 'CHEROKEE', 'CHILTON', 'CHOCTAW', 'CLARKE', 'CLAY', 'CLEBURNE', 'COFFEE', 'COLBERT', 'CONECUH', 'COOSA', 'COVINGTON', 'CRENSHAW', 'CULLMAN', 'DALE', 'DALLAS', 'DEKALB', 'ELMORE', 'ESCAMBIA', 'ETOWAH', 'FAYETTE', 'FRANKLIN', 'GENEVA', 'GREENE', 'HALE', 'HENRY', 'HOUSTON', 'JACKSON', 'JEFFERSON', 'LAMAR', 'LAUDERDALE', 'LAWRENCE', 'LEE', 'LIMESTONE', 'LOWNDES', 'MACON', 'MADISON', 'MARENGO', 'MARION', 'MARSHALL', 'MOBILE', 'MONROE', 'MONTGOMERY', 'MORGAN', 'PERRY', 'PICKENS', 'PIKE', 'RANDOLPH', 'RUSSELL', 'SHELBY', 'ST. CLAIR', 'SUMTER', 'TALLADEGA', 'TALLAPOOSA', 'TUSCALOOSA', 'WALKER', 'WASHINGTON', 'WILCOX', 'WINSTON', 'ABSENTEE', 'Election District 1', 'Election District 10', 'Election District 11', 'Election District 12', 'Election District 13', 'Election District 14', 'Election District 15', 'Election District 16', 'Election District 17', 'Election District 18', 'Election District 19', 

In [16]:
#Temporarily drop Alaska, the District of Columbia, and Hawaii from pres, and remove any other odd values from that
#column
pres = pres.loc[(pres.State != 'Alaska') & (pres.State != 'District of Columbia')]
pres = pres.loc[(~pres.Area.str.contains('ABSENTEE')) & (~pres.Area.str.contains('At Large')) &
                (~pres.Area.str.contains('District')) & (~pres.Area.str.contains('Federal Absentee')) &
                (~pres.Area.str.contains('Special Absentee')) & (~pres.Area.str.contains('OVERSEAS VOTE')) &
                (~pres.Area.str.contains('Votes Not Reported by County'))]
pres.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,OtherVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,OtherVotesTotalPercent
0,2004,Alabama,AUTAUGA,20081,15196,"Bush, George W.",4758,"Kerry, John F.",,127,10438,R,75.67,23.69,0.0,0.63
1,2004,Alabama,BALDWIN,69320,52971,"Bush, George W.",15599,"Kerry, John F.",,750,37372,R,76.42,22.5,0.0,1.08
2,2004,Alabama,BARBOUR,10777,5899,"Bush, George W.",4832,"Kerry, John F.",,46,1067,R,54.74,44.84,0.0,0.43
3,2004,Alabama,BIBB,7600,5472,"Bush, George W.",2089,"Kerry, John F.",,39,3383,R,72.0,27.49,0.0,0.51
4,2004,Alabama,BLOUNT,21504,17386,"Bush, George W.",3938,"Kerry, John F.",,180,13448,R,80.85,18.31,0.0,0.84


In [17]:
#Concatenate pres and nat as part of a new dataframe, and confirm that the Area column is capitalized properly.  
#After that, sort vote in chronological and alphabetical order, and confirm there are no null values in it
vote = pd.concat([pres,nat])
vote['Area'] = vote['Area'].str.title()
vote = vote.sort_values(by=['Year','State','Area'])
vote = vote.fillna(0)
vote.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,OtherVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,OtherVotesTotalPercent
0,2004,Alabama,Autauga,20081,15196,"Bush, George W.",4758,"Kerry, John F.",0,127,10438,R,75.67,23.69,0,0.63
1,2004,Alabama,Baldwin,69320,52971,"Bush, George W.",15599,"Kerry, John F.",0,750,37372,R,76.42,22.5,0,1.08
2,2004,Alabama,Barbour,10777,5899,"Bush, George W.",4832,"Kerry, John F.",0,46,1067,R,54.74,44.84,0,0.43
3,2004,Alabama,Bibb,7600,5472,"Bush, George W.",2089,"Kerry, John F.",0,39,3383,R,72.0,27.49,0,0.51
4,2004,Alabama,Blount,21504,17386,"Bush, George W.",3938,"Kerry, John F.",0,180,13448,R,80.85,18.31,0,0.84


## Part 2C: Continuing the Cleanup Process for the Voting Data 

In [18]:
#Check the data types included in vote
vote.dtypes

Year                       int64
State                     object
Area                      object
TotalVotes                object
RepVotes                  object
RepCandidate              object
DemVotes                  object
DemCandidate              object
ThirdVotes                object
OtherVotes                object
PluralityVotes            object
PluralityParty            object
RepVotesTotalPercent      object
DemVotesTotalPercent      object
ThirdVotesTotalPercent    object
OtherVotesTotalPercent    object
dtype: object

In [19]:
#Remove the commas from the six columns spelled out below before converting them into integers.  Then conver the last
#few columns indicating some percent value into floats
vote['TotalVotes'] = vote['TotalVotes'].replace(',','', regex=True)
vote['RepVotes'] = vote['RepVotes'].replace(',','', regex=True)
vote['DemVotes'] = vote['DemVotes'].replace(',','', regex=True)
vote['ThirdVotes'] = vote['ThirdVotes'].replace(',','', regex=True)
vote['OtherVotes'] = vote['OtherVotes'].replace(',','', regex=True)
vote['PluralityVotes'] = vote['PluralityVotes'].replace(',','', regex=True)
vote.iloc[:,[3,4,6,8,9,10]] = vote.iloc[:,[3,4,6,8,9,10]].astype(int)
vote.iloc[:,[12,13,14,15]] = vote.iloc[:,[12,13,14,15]].astype(float)
vote.tail()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,OtherVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,OtherVotesTotalPercent
12660,2016,Wyoming,Sweetwater,17130,12154,"Trump, Donald J.",3231,"Clinton, Hillary Rodham",928,817,8923,R,70.95,18.86,5.42,4.77
12661,2016,Wyoming,Teton,12627,3921,"Trump, Donald J.",7314,"Clinton, Hillary Rodham",701,691,3393,D,31.05,57.92,5.55,5.47
12662,2016,Wyoming,Uinta,8470,6154,"Trump, Donald J.",1202,"Clinton, Hillary Rodham",472,642,4952,R,72.66,14.19,5.57,7.58
12663,2016,Wyoming,Washakie,3814,2911,"Trump, Donald J.",532,"Clinton, Hillary Rodham",194,177,2379,R,76.32,13.95,5.09,4.64
12664,2016,Wyoming,Weston,3526,3033,"Trump, Donald J.",299,"Clinton, Hillary Rodham",108,86,2734,R,86.02,8.48,3.06,2.44


In [20]:
#To avoid redundancy, update the ThirdVotes and ThirdVotesTotalPercent columns to include the figures under the
#OtherVotes and OtherVotesTotalPercent ones, respectively, and drop the latter two columns once done.  For simplicity
#purposes, reset vote's index numbers as well
vote['ThirdVotes'] = vote['ThirdVotes']+vote['OtherVotes']
vote['ThirdVotesTotalPercent'] = vote['ThirdVotesTotalPercent']+vote['OtherVotesTotalPercent']
vote = vote.drop(['OtherVotes','OtherVotesTotalPercent'], axis=1)
vote = vote.reset_index(drop=True)
vote.tail()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
12456,2016,Wyoming,Sweetwater,17130,12154,"Trump, Donald J.",3231,"Clinton, Hillary Rodham",1745,8923,R,70.95,18.86,10.19
12457,2016,Wyoming,Teton,12627,3921,"Trump, Donald J.",7314,"Clinton, Hillary Rodham",1392,3393,D,31.05,57.92,11.02
12458,2016,Wyoming,Uinta,8470,6154,"Trump, Donald J.",1202,"Clinton, Hillary Rodham",1114,4952,R,72.66,14.19,13.15
12459,2016,Wyoming,Washakie,3814,2911,"Trump, Donald J.",532,"Clinton, Hillary Rodham",371,2379,R,76.32,13.95,9.73
12460,2016,Wyoming,Weston,3526,3033,"Trump, Donald J.",299,"Clinton, Hillary Rodham",194,2734,R,86.02,8.48,5.5


In [21]:
#Update the Area column to include the corresponding state abbreviations as strings to avoid confusion between states 
#that could share a county name (except Alaska and the District of Columbia)
vote['Area'] = np.where((vote['State'] == 'Alabama'), vote['Area'] + str(', AL'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Arizona'), vote['Area'] + str(', AZ'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Arkansas'), vote['Area'] + str(', AR'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'California'), vote['Area'] + str(', CA'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Colorado'), vote['Area'] + str(', CO'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Connecticut'), vote['Area'] + str(', CT'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Delaware'), vote['Area'] + str(', DE'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'District of Columbia'), 'District of Columbia', vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Florida'), vote['Area'] + str(', FL'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Georgia'), vote['Area'] + str(', GA'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Hawaii'), vote['Area'] + str(', HI'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Idaho'), vote['Area'] + str(', ID'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Illinois'), vote['Area'] + str(', IL'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Indiana'), vote['Area'] + str(', IN'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Iowa'), vote['Area'] + str(', IA'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Kansas'), vote['Area'] + str(', KS'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Kentucky'), vote['Area'] + str(', KY'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Louisiana'), vote['Area'] + str(', LA'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Maine'), vote['Area'] + str(', ME'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Maryland'), vote['Area'] + str(', MD'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Massachusetts'), vote['Area'] + str(', MA'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Michigan'), vote['Area'] + str(', MI'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Minnesota'), vote['Area'] + str(', MN'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Mississippi'), vote['Area'] + str(', MS'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Missouri'), vote['Area'] + str(', MO'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Montana'), vote['Area'] + str(', MT'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Nebraska'), vote['Area'] + str(', NE'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Nevada'), vote['Area'] + str(', NV'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'New Hampshire'), vote['Area'] + str(', NH'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'New Jersey'), vote['Area'] + str(', NJ'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'New Mexico'), vote['Area'] + str(', NM'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'New York'), vote['Area'] + str(', NY'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'North Carolina'), vote['Area'] + str(', NC'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'North Dakota'), vote['Area'] + str(', ND'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Ohio'), vote['Area'] + str(', OH'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Oklahoma'), vote['Area'] + str(', OK'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Oregon'), vote['Area'] + str(', OR'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Pennsylvania'), vote['Area'] + str(', PA'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Rhode Island'), vote['Area'] + str(', RI'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'South Carolina'), vote['Area'] + str(', SC'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'South Dakota'), vote['Area'] + str(', SD'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Tennessee'), vote['Area'] + str(', TN'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Texas'), vote['Area'] + str(', TX'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Utah'), vote['Area'] + str(', UT'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Vermont'), vote['Area'] + str(', VT'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Virginia'), vote['Area'] + str(', VA'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Washington'), vote['Area'] + str(', WA'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'West Virginia'), vote['Area'] + str(', WV'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Wisconsin'), vote['Area'] + str(', WI'), vote['Area'])
vote['Area'] = np.where((vote['State'] == 'Wyoming'), vote['Area'] + str(', WY'), vote['Area'])
vote.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
0,2004,Alabama,"Autauga, AL",20081,15196,"Bush, George W.",4758,"Kerry, John F.",127,10438,R,75.67,23.69,0.63
1,2004,Alabama,"Baldwin, AL",69320,52971,"Bush, George W.",15599,"Kerry, John F.",750,37372,R,76.42,22.5,1.08
2,2004,Alabama,"Barbour, AL",10777,5899,"Bush, George W.",4832,"Kerry, John F.",46,1067,R,54.74,44.84,0.43
3,2004,Alabama,"Bibb, AL",7600,5472,"Bush, George W.",2089,"Kerry, John F.",39,3383,R,72.0,27.49,0.51
4,2004,Alabama,"Blount, AL",21504,17386,"Bush, George W.",3938,"Kerry, John F.",180,13448,R,80.85,18.31,0.84


In [22]:
vote.loc[(vote.State == 'District of Columbia')]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
291,2004,District of Columbia,District of Columbia,227586,21256,"Bush, George W.",202970,"Kerry, John F.",3360,181714,D,9.34,89.18,1.48
3406,2008,District of Columbia,District of Columbia,265853,17367,"McCain, John S. III",245800,"Obama, Barack H.",2686,228433,D,6.53,92.46,1.01
6521,2012,District of Columbia,District of Columbia,293764,21381,"Romney, W. Mitt",267070,"Obama, Barack H.",5313,245689,D,7.28,90.91,1.81
9636,2016,District of Columbia,District of Columbia,311268,12723,"Trump, Donald J.",282830,"Clinton, Hillary Rodham",15715,270107,D,4.09,90.86,5.05


In [23]:
vote.tail()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
12456,2016,Wyoming,"Sweetwater, WY",17130,12154,"Trump, Donald J.",3231,"Clinton, Hillary Rodham",1745,8923,R,70.95,18.86,10.19
12457,2016,Wyoming,"Teton, WY",12627,3921,"Trump, Donald J.",7314,"Clinton, Hillary Rodham",1392,3393,D,31.05,57.92,11.02
12458,2016,Wyoming,"Uinta, WY",8470,6154,"Trump, Donald J.",1202,"Clinton, Hillary Rodham",1114,4952,R,72.66,14.19,13.15
12459,2016,Wyoming,"Washakie, WY",3814,2911,"Trump, Donald J.",532,"Clinton, Hillary Rodham",371,2379,R,76.32,13.95,9.73
12460,2016,Wyoming,"Weston, WY",3526,3033,"Trump, Donald J.",299,"Clinton, Hillary Rodham",194,2734,R,86.02,8.48,5.5


## Part 3: Importing the Income and GDP Data 

In [24]:
#Import the income and GDP data
inc = pd.read_csv('CAINC1__ALL_AREAS_1969_2019.csv')
gdp = pd.read_csv('CAGDP2__ALL_AREAS_2001_2019.csv')
print(inc.shape)
print()
print(gdp.shape)
print()
inc.head()

(9598, 59)

(107988, 27)



Unnamed: 0,GeoFIPS,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,1969,1970,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,"""00000""",United States,,CAINC1,1.0,...,Personal income (thousands of dollars),Thousands of dollars,791229000,855525000,...,12541995000,13315478000,13998383000,14175503000,14982715000,15717140000,16151881000,16937582000,17839255000,18542262000
1,"""00000""",United States,,CAINC1,2.0,...,Population (persons) 1/,Number of persons,201298000,203798722,...,309321666,311556874,313830990,315993715,318301008,320635163,322941311,324985539,326687501,328239523
2,"""00000""",United States,,CAINC1,3.0,...,Per capita personal income (dollars) 2/,Dollars,3931,4198,...,40547,42739,44605,44860,47071,49019,50015,52118,54606,56490
3,"""01000""",Alabama,5.0,CAINC1,1.0,...,Personal income (thousands of dollars),Thousands of dollars,9737715,10628318,...,161516561,167942757,172804075,174415420,180457683,188711443,192281933,199999756,208752683,216449038
4,"""01000""",Alabama,5.0,CAINC1,2.0,...,Population (persons) 1/,Number of persons,3440000,3449846,...,4785437,4799069,4815588,4830081,4841799,4852347,4863525,4874486,4887681,4903185


In [25]:
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,"""00000""",United States *,,CAGDP2,1.0,...,All industry total,Thousands of dollars,10581822000,10936418000,...,14992052000,15542582000,16197007000,16784851000,17527258000,18238301000,18745075000,19542980000,20611861000,21433226000
1,"""00000""",United States *,,CAGDP2,2.0,...,Private industries,Thousands of dollars,9188887000,9462020000,...,12884089000,13405520000,14037519000,14572341000,15255889000,15898859000,16360179000,17094245000,18062184000,18793750000
2,"""00000""",United States *,,CAGDP2,3.0,11,"Agriculture, forestry, fishing and hunting",Thousands of dollars,99836000,95629000,...,146299000,180945000,179573000,215601000,201003000,182283000,166571000,176625000,178569000,175373000
3,"""00000""",United States *,,CAGDP2,6.0,21,"Mining, quarrying, and oil and gas extraction",Thousands of dollars,123924000,112417000,...,305838000,356315000,358798000,386519000,416375000,261774000,218027000,274035000,330819000,309469000
4,"""00000""",United States *,,CAGDP2,10.0,22,Utilities,Thousands of dollars,181332000,177618000,...,278837000,287459000,279651000,286340000,298076000,299232000,301745000,310092000,321829000,335264000


## Part 4A: Cleaning Up the Income Data 

In [26]:
#Keep only the years 2004 through 2016, and remove the aggregate region names
inc = inc.iloc[:,[0,1,6,7,43,44,45,46,47,48,49,50,51,52,53,54,55]]
inc = inc.loc[(inc.GeoName != 'United States') & (inc.GeoName != 'New England') & (inc.GeoName != 'Mideast') & 
              (inc.GeoName != 'Great Lakes') & (inc.GeoName != 'Plains') & (inc.GeoName != 'Southeast') & 
              (inc.GeoName != 'Southwest') & (inc.GeoName != 'Rocky Mountain') & (inc.GeoName != 'Far West')]
inc.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
3,"""01000""",Alabama,Personal income (thousands of dollars),Thousands of dollars,128739879,136327338,145175833,152327005,157366824,155145273,161516561,167942757,172804075,174415420,180457683,188711443,192281933
4,"""01000""",Alabama,Population (persons) 1/,Number of persons,4530729,4569805,4628981,4672840,4718206,4757938,4785437,4799069,4815588,4830081,4841799,4852347,4863525
5,"""01000""",Alabama,Per capita personal income (dollars) 2/,Dollars,28415,29832,31362,32598,33353,32608,33752,34995,35884,36110,37271,38891,39536
6,"""01001""","Autauga, AL",Personal income (thousands of dollars),Thousands of dollars,1332117,1429633,1528773,1649284,1750849,1764260,1826597,1896347,1927064,1944878,2019288,2131416,2203281
7,"""01001""","Autauga, AL",Population (persons) 1/,Number of persons,48366,49676,51328,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243


In [27]:
inc.tail()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
9569,"""56045""","Weston, WY",Per capita personal income (dollars) 2/,Dollars,27437.0,30659.0,35301.0,36014.0,39137.0,37365.0,37919.0,40285.0,42231.0,43512.0,46588.0,46719.0,41990.0
9594,Note: See the included footnote file.,,,,,,,,,,,,,,,,
9595,CAINC1: Personal Income Summary: Personal Inco...,,,,,,,,,,,,,,,,
9596,"Last updated: November 17, 2020-- new statisti...",,,,,,,,,,,,,,,,
9597,Source: U.S. Department of Commerce / Bureau o...,,,,,,,,,,,,,,,,


In [28]:
#Drop the unneeded rows from inc
inc = inc.iloc[:9567]
inc = inc.fillna(0)
inc.tail()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
9565,"""56043""","Washakie, WY",Population (persons) 1/,Number of persons,7960,8022,7979,8169,8229,8423,8530,8449,8409,8413,8273,8278,8165
9566,"""56043""","Washakie, WY",Per capita personal income (dollars) 2/,Dollars,33957,34581,37109,38143,38985,36780,36343,41591,41085,41206,44406,44286,43615
9567,"""56045""","Weston, WY",Personal income (thousands of dollars),Thousands of dollars,182343,202163,237117,253287,279163,271494,272941,287716,298872,310503,332544,336749,303169
9568,"""56045""","Weston, WY",Population (persons) 1/,Number of persons,6646,6594,6717,7033,7133,7266,7198,7142,7077,7136,7138,7208,7220
9569,"""56045""","Weston, WY",Per capita personal income (dollars) 2/,Dollars,27437,30659,35301,36014,39137,37365,37919,40285,42231,43512,46588,46719,41990


In [29]:
#Using None as a default value, add a new State column and  add the corresponding name depending on the abbreviation 
#included alongside the county for each row
inc['State'] = 'None'
inc['State'] = np.where((inc['GeoName'] == 'Alabama') | (inc['GeoName'].str.contains('AL')), 
                        'Alabama', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Alaska') | (inc['GeoName'].str.contains('AK')), 
                        'Alaska', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Arizona') | (inc['GeoName'].str.contains('AZ')), 
                        'Arizona', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Arkansas') | (inc['GeoName'].str.contains('AR')), 
                        'Arkansas', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'California') | (inc['GeoName'].str.contains('CA')), 
                        'California', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Colorado') | (inc['GeoName'].str.contains('CO')), 
                        'Colorado', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Connecticut') | (inc['GeoName'].str.contains('CT')), 
                        'Connecticut', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Delaware') | (inc['GeoName'].str.contains('DE')), 
                        'Delaware', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'District of Columbia') | (inc['GeoName'].str.contains('DC')), 
                        'District of Columbia', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Florida') | (inc['GeoName'].str.contains('FL')), 
                        'Florida', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Georgia') | (inc['GeoName'].str.contains('GA')), 
                        'Georgia', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Hawaii') | (inc['GeoName'].str.contains('HI')),
                        'Hawaii', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Idaho') | (inc['GeoName'].str.contains('ID')), 
                        'Idaho', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Illinois') | (inc['GeoName'].str.contains('IL')), 
                        'Illinois', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Indiana') | (inc['GeoName'].str.contains('IN')), 
                        'Indiana', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Iowa') | (inc['GeoName'].str.contains('IA')), 
                        'Iowa', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Kansas') | (inc['GeoName'].str.contains('KS')), 
                        'Kansas', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Kentucky') | (inc['GeoName'].str.contains('KY')), 
                        'Kentucky', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Louisiana') | (inc['GeoName'].str.contains('LA')), 
                        'Louisiana', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Maine') | (inc['GeoName'].str.contains('ME')), 
                        'Maine', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Maryland') | (inc['GeoName'].str.contains('MD')), 
                        'Maryland', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Massachusetts') | (inc['GeoName'].str.contains('MA')), 
                        'Massachusetts', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Michigan') | (inc['GeoName'].str.contains('MI')), 
                        'Michigan', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Minnesota') | (inc['GeoName'].str.contains('MN')), 
                        'Minnesota', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Mississippi') | (inc['GeoName'].str.contains('MS')), 
                        'Mississippi', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Missouri') | (inc['GeoName'].str.contains('MO')), 
                        'Missouri', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Montana') | (inc['GeoName'].str.contains('MT')), 
                        'Montana', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Nebraska') | (inc['GeoName'].str.contains('NE')), 
                        'Nebraska', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Nevada') | (inc['GeoName'].str.contains('NV')), 
                        'Nevada', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'New Hampshire') | (inc['GeoName'].str.contains('NH')), 
                        'New Hampshire', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'New Jersey') | (inc['GeoName'].str.contains('NJ')), 
                        'New Jersey', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'New Mexico') | (inc['GeoName'].str.contains('NM')), 
                        'New Mexico', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'New York') | (inc['GeoName'].str.contains('NY')), 
                        'New York', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'North Carolina') | (inc['GeoName'].str.contains('NC')),
                        'North Carolina', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'North Dakota') | (inc['GeoName'].str.contains('ND')), 
                        'North Dakota', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Ohio') | (inc['GeoName'].str.contains('OH')), 
                        'Ohio', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Oklahoma') | (inc['GeoName'].str.contains('OK')), 
                        'Oklahoma', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Oregon') | (inc['GeoName'].str.contains('OR')), 
                        'Oregon', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Pennsylvania') | (inc['GeoName'].str.contains('PA')), 
                        'Pennsylvania', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Rhode Island') | (inc['GeoName'].str.contains('RI')), 
                        'Rhode Island', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'South Carolina') | (inc['GeoName'].str.contains('SC')),
                        'South Carolina', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'South Dakota') | (inc['GeoName'].str.contains('SD')), 
                        'South Dakota', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Tennessee') | (inc['GeoName'].str.contains('TN')), 
                        'Tennessee', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Texas') | (inc['GeoName'].str.contains('TX')), 
                        'Texas', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Utah') | (inc['GeoName'].str.contains('UT')),
                        'Utah', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Vermont') | (inc['GeoName'].str.contains('VT')), 
                        'Vermont', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Virginia') | (inc['GeoName'].str.contains('VA')), 
                        'Virginia', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Washington') | (inc['GeoName'].str.contains('WA')), 
                        'Washington', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'West Virginia') | (inc['GeoName'].str.contains('WV')), 
                        'West Virginia', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Wisconsin') | (inc['GeoName'].str.contains('WI')), 
                        'Wisconsin', inc['State'])
inc['State'] = np.where((inc['GeoName'] == 'Wyoming') | (inc['GeoName'].str.contains('WY')), 
                        'Wyoming', inc['State'])
inc.tail()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
9565,"""56043""","Washakie, WY",Population (persons) 1/,Number of persons,7960,8022,7979,8169,8229,8423,8530,8449,8409,8413,8273,8278,8165,Wyoming
9566,"""56043""","Washakie, WY",Per capita personal income (dollars) 2/,Dollars,33957,34581,37109,38143,38985,36780,36343,41591,41085,41206,44406,44286,43615,Wyoming
9567,"""56045""","Weston, WY",Personal income (thousands of dollars),Thousands of dollars,182343,202163,237117,253287,279163,271494,272941,287716,298872,310503,332544,336749,303169,Wyoming
9568,"""56045""","Weston, WY",Population (persons) 1/,Number of persons,6646,6594,6717,7033,7133,7266,7198,7142,7077,7136,7138,7208,7220,Wyoming
9569,"""56045""","Weston, WY",Per capita personal income (dollars) 2/,Dollars,27437,30659,35301,36014,39137,37365,37919,40285,42231,43512,46588,46719,41990,Wyoming


## Part 4B: Fixing Alaska and DC in the Income Data 

In [30]:
#Recall from the voting dataset that Alaska and the District of Columbia are missing or do not have counties, so these
#states must be sliced in inc to include only their aggregate figures
ak_dc = inc.loc[(inc.GeoName == 'Alaska') | (inc.GeoName == 'District of Columbia')]
ak_dc

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
207,"""02000""",Alaska,Personal income (thousands of dollars),Thousands of dollars,24338346,25956877,27726673,29838859,32825068,32896441,35293829,37827809,39169240,38859491,41110392,42505840,41745626,Alaska
208,"""02000""",Alaska,Population (persons) 1/,Number of persons,659286,666946,675302,680300,687455,698895,713910,722128,730443,737068,736283,737498,741456,Alaska
209,"""02000""",Alaska,Per capita personal income (dollars) 2/,Dollars,36916,38919,41058,43861,47749,47069,49437,52384,53624,52722,55835,57635,56302,Alaska
1056,"""11000""",District of Columbia,Personal income (thousands of dollars),Thousands of dollars,28501723,30163906,32001079,34480237,35462270,35798825,38481501,41786280,43426932,44189236,47259734,50963559,53375784,District of Columbia
1057,"""11000""",District of Columbia,Population (persons) 1/,Number of persons,567754,567136,570681,574404,580236,592228,605226,619800,634924,650581,662328,675400,685815,District of Columbia
1058,"""11000""",District of Columbia,Per capita personal income (dollars) 2/,Dollars,50201,53186,56075,60028,61117,60448,63582,67419,68397,67923,71354,75457,77828,District of Columbia


In [31]:
#Temporarily drop the above two states in inc, and add them back in with only their aggregate figures
inc = inc.loc[(inc.State != 'Alaska') & (inc.State != 'District of Columbia')]
inc = pd.concat([inc, ak_dc])
inc = inc.reset_index(drop=True)
inc = inc.sort_values(by=['State','GeoName'])
inc.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
0,"""01000""",Alabama,Personal income (thousands of dollars),Thousands of dollars,128739879,136327338,145175833,152327005,157366824,155145273,161516561,167942757,172804075,174415420,180457683,188711443,192281933,Alabama
1,"""01000""",Alabama,Population (persons) 1/,Number of persons,4530729,4569805,4628981,4672840,4718206,4757938,4785437,4799069,4815588,4830081,4841799,4852347,4863525,Alabama
2,"""01000""",Alabama,Per capita personal income (dollars) 2/,Dollars,28415,29832,31362,32598,33353,32608,33752,34995,35884,36110,37271,38891,39536,Alabama
3,"""01001""","Autauga, AL",Personal income (thousands of dollars),Thousands of dollars,1332117,1429633,1528773,1649284,1750849,1764260,1826597,1896347,1927064,1944878,2019288,2131416,2203281,Alabama
4,"""01001""","Autauga, AL",Population (persons) 1/,Number of persons,48366,49676,51328,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243,Alabama


## Part 4C: Cleaning Up the GDP Data 

In [32]:
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Region,TableName,LineCode,IndustryClassification,Description,Unit,2001,2002,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,"""00000""",United States *,,CAGDP2,1.0,...,All industry total,Thousands of dollars,10581822000,10936418000,...,14992052000,15542582000,16197007000,16784851000,17527258000,18238301000,18745075000,19542980000,20611861000,21433226000
1,"""00000""",United States *,,CAGDP2,2.0,...,Private industries,Thousands of dollars,9188887000,9462020000,...,12884089000,13405520000,14037519000,14572341000,15255889000,15898859000,16360179000,17094245000,18062184000,18793750000
2,"""00000""",United States *,,CAGDP2,3.0,11,"Agriculture, forestry, fishing and hunting",Thousands of dollars,99836000,95629000,...,146299000,180945000,179573000,215601000,201003000,182283000,166571000,176625000,178569000,175373000
3,"""00000""",United States *,,CAGDP2,6.0,21,"Mining, quarrying, and oil and gas extraction",Thousands of dollars,123924000,112417000,...,305838000,356315000,358798000,386519000,416375000,261774000,218027000,274035000,330819000,309469000
4,"""00000""",United States *,,CAGDP2,10.0,22,Utilities,Thousands of dollars,181332000,177618000,...,278837000,287459000,279651000,286340000,298076000,299232000,301745000,310092000,321829000,335264000


In [33]:
print(list(gdp.GeoName.unique()))

['United States *', 'Alabama', 'Autauga, AL', 'Baldwin, AL', 'Barbour, AL', 'Bibb, AL', 'Blount, AL', 'Bullock, AL', 'Butler, AL', 'Calhoun, AL', 'Chambers, AL', 'Cherokee, AL', 'Chilton, AL', 'Choctaw, AL', 'Clarke, AL', 'Clay, AL', 'Cleburne, AL', 'Coffee, AL', 'Colbert, AL', 'Conecuh, AL', 'Coosa, AL', 'Covington, AL', 'Crenshaw, AL', 'Cullman, AL', 'Dale, AL', 'Dallas, AL', 'DeKalb, AL', 'Elmore, AL', 'Escambia, AL', 'Etowah, AL', 'Fayette, AL', 'Franklin, AL', 'Geneva, AL', 'Greene, AL', 'Hale, AL', 'Henry, AL', 'Houston, AL', 'Jackson, AL', 'Jefferson, AL', 'Lamar, AL', 'Lauderdale, AL', 'Lawrence, AL', 'Lee, AL', 'Limestone, AL', 'Lowndes, AL', 'Macon, AL', 'Madison, AL', 'Marengo, AL', 'Marion, AL', 'Marshall, AL', 'Mobile, AL', 'Monroe, AL', 'Montgomery, AL', 'Morgan, AL', 'Perry, AL', 'Pickens, AL', 'Pike, AL', 'Randolph, AL', 'Russell, AL', 'St. Clair, AL', 'Shelby, AL', 'Sumter, AL', 'Talladega, AL', 'Tallapoosa, AL', 'Tuscaloosa, AL', 'Walker, AL', 'Washington, AL', 'Wilco

In [34]:
#Keep only the years 2004 through 2016, and remove the aggregate region names
gdp = gdp.iloc[:,[0,1,6,7,11,12,13,14,15,16,17,18,19,20,21,22,23]]
gdp = gdp.loc[(gdp.GeoName != 'United States *') & (gdp.GeoName != 'New England') & (gdp.GeoName != 'Mideast') & 
              (gdp.GeoName != 'Great Lakes') & (gdp.GeoName != 'Plains') & (gdp.GeoName != 'Southeast') & 
              (gdp.GeoName != 'Southwest') & (gdp.GeoName != 'Rocky Mountain') & (gdp.GeoName != 'Far West')]
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
34,"""01000""",Alabama,All industry total,Thousands of dollars,146886707,156403303,164347837,170263853,172677248,169128512,175470140,181929880,186553871,192166511,195037686,200197466,204454687
35,"""01000""",Alabama,Private industries,Thousands of dollars,123086044,131222848,138032542,141818766,143193086,138168520,144017021,149921776,154601237,159979872,161985542,166301755,169903894
36,"""01000""",Alabama,"Agriculture, forestry, fishing and hunting",Thousands of dollars,2925332,2630358,2099692,1944287,1894939,1915954,1928328,1591025,1802170,3113430,2518194,2665473,2036652
37,"""01000""",Alabama,"Mining, quarrying, and oil and gas extraction",Thousands of dollars,1960544,2747053,2895305,3431976,3849019,2819762,3034450,2949459,2891834,2760583,2490266,1901441,1566222
38,"""01000""",Alabama,Utilities,Thousands of dollars,3913734,3826057,4377328,4573699,4718711,5052112,5582131,5820104,5688911,5679844,5857377,5994975,6202802


In [35]:
gdp.tail()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
107711,"""56045""","Weston, WY",Private services-providing industries 3/,Thousands of dollars,71201.0,74948.0,86633.0,96091.0,97774.0,100095.0,99232.0,98859.0,103509.0,111264.0,116289.0,128678.0,118463.0
107984,Note: See the included footnote file.,,,,,,,,,,,,,,,,
107985,CAGDP2: Gross domestic product (GDP) by county...,,,,,,,,,,,,,,,,
107986,"Last updated: December 9, 2020 -- new statisti...",,,,,,,,,,,,,,,,
107987,Source: U.S. Department of Commerce / Bureau o...,,,,,,,,,,,,,,,,


In [36]:
#Drop the unneeded rows from gdp
gdp = gdp.iloc[:107678]
gdp = gdp.fillna(0)
gdp.tail()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
107707,"""56045""","Weston, WY",Trade,Thousands of dollars,(D),(D),(D),(D),(D),20620,21630,21257,(D),(D),(D),(D),(D)
107708,"""56045""","Weston, WY",Transportation and utilities,Thousands of dollars,16962,16438,18192,17752,17405,17731,15744,14171,15316,23139,26200,35935,26886
107709,"""56045""","Weston, WY",Manufacturing and information,Thousands of dollars,52449,(D),(D),181194,207459,44748,52469,77566,117411,(D),37493,61536,34212
107710,"""56045""","Weston, WY",Private goods-producing industries 2/,Thousands of dollars,88456,131783,172572,257146,295610,103829,110285,124638,162775,106166,106048,110295,73932
107711,"""56045""","Weston, WY",Private services-providing industries 3/,Thousands of dollars,71201,74948,86633,96091,97774,100095,99232,98859,103509,111264,116289,128678,118463


In [37]:
#Using None as a default value, add a new State column and for the next couple of cells, add the corresponding name 
#depending on the abbreviation included alongside the county for each row
gdp['State'] = 'None'
gdp['State'] = np.where((gdp['GeoName'] == 'Alabama') | (gdp['GeoName'].str.contains('AL')), 
                        'Alabama', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Alaska') | (gdp['GeoName'].str.contains('AK')), 
                        'Alaska', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Arizona') | (gdp['GeoName'].str.contains('AZ')), 
                        'Arizona', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Arkansas') | (gdp['GeoName'].str.contains('AR')), 
                        'Arkansas', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'California') | (gdp['GeoName'].str.contains('CA')), 
                        'California', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Colorado') | (gdp['GeoName'].str.contains('CO')), 
                        'Colorado', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Connecticut') | (gdp['GeoName'].str.contains('CT')), 
                        'Connecticut', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Delaware') | (gdp['GeoName'].str.contains('DE')), 
                        'Delaware', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'District of Columbia') | (gdp['GeoName'].str.contains('DC')), 
                        'District of Columbia', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Florida') | (gdp['GeoName'].str.contains('FL')), 
                        'Florida', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Georgia') | (gdp['GeoName'].str.contains('GA')), 
                        'Georgia', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Hawaii') | (gdp['GeoName'].str.contains('HI')),
                        'Hawaii', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Idaho') | (gdp['GeoName'].str.contains('ID')), 
                        'Idaho', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Illinois') | (gdp['GeoName'].str.contains('IL')), 
                        'Illinois', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Indiana') | (gdp['GeoName'].str.contains('IN')), 
                        'Indiana', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Iowa') | (gdp['GeoName'].str.contains('IA')), 
                        'Iowa', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Kansas') | (gdp['GeoName'].str.contains('KS')), 
                        'Kansas', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Kentucky') | (gdp['GeoName'].str.contains('KY')), 
                        'Kentucky', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Louisiana') | (gdp['GeoName'].str.contains('LA')), 
                        'Louisiana', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Maine') | (gdp['GeoName'].str.contains('ME')), 
                        'Maine', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Maryland') | (gdp['GeoName'].str.contains('MD')), 
                        'Maryland', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Massachusetts') | (gdp['GeoName'].str.contains('MA')), 
                        'Massachusetts', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Michigan') | (gdp['GeoName'].str.contains('MI')), 
                        'Michigan', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Minnesota') | (gdp['GeoName'].str.contains('MN')), 
                        'Minnesota', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Mississippi') | (gdp['GeoName'].str.contains('MS')), 
                        'Mississippi', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Missouri') | (gdp['GeoName'].str.contains('MO')), 
                        'Missouri', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Montana') | (gdp['GeoName'].str.contains('MT')), 
                        'Montana', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Nebraska') | (gdp['GeoName'].str.contains('NE')), 
                        'Nebraska', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Nevada') | (gdp['GeoName'].str.contains('NV')), 
                        'Nevada', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'New Hampshire') | (gdp['GeoName'].str.contains('NH')), 
                        'New Hampshire', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'New Jersey') | (gdp['GeoName'].str.contains('NJ')), 
                        'New Jersey', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'New Mexico') | (gdp['GeoName'].str.contains('NM')), 
                        'New Mexico', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'New York') | (gdp['GeoName'].str.contains('NY')), 
                        'New York', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'North Carolina') | (gdp['GeoName'].str.contains('NC')),
                        'North Carolina', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'North Dakota') | (gdp['GeoName'].str.contains('ND')), 
                        'North Dakota', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Ohio') | (gdp['GeoName'].str.contains('OH')), 
                        'Ohio', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Oklahoma') | (gdp['GeoName'].str.contains('OK')), 
                        'Oklahoma', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Oregon') | (gdp['GeoName'].str.contains('OR')), 
                        'Oregon', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Pennsylvania') | (gdp['GeoName'].str.contains('PA')), 
                        'Pennsylvania', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Rhode Island') | (gdp['GeoName'].str.contains('RI')), 
                        'Rhode Island', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'South Carolina') | (gdp['GeoName'].str.contains('SC')),
                        'South Carolina', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'South Dakota') | (gdp['GeoName'].str.contains('SD')), 
                        'South Dakota', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Tennessee') | (gdp['GeoName'].str.contains('TN')), 
                        'Tennessee', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Texas') | (gdp['GeoName'].str.contains('TX')), 
                        'Texas', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Utah') | (gdp['GeoName'].str.contains('UT')),
                        'Utah', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Vermont') | (gdp['GeoName'].str.contains('VT')), 
                        'Vermont', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Virginia') | (gdp['GeoName'].str.contains('VA')), 
                        'Virginia', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Washington') | (gdp['GeoName'].str.contains('WA')), 
                        'Washington', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'West Virginia') | (gdp['GeoName'].str.contains('WV')), 
                        'West Virginia', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Wisconsin') | (gdp['GeoName'].str.contains('WI')), 
                        'Wisconsin', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Wyoming') | (gdp['GeoName'].str.contains('WY')), 
                        'Wyoming', gdp['State'])
gdp.tail()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
107707,"""56045""","Weston, WY",Trade,Thousands of dollars,(D),(D),(D),(D),(D),20620,21630,21257,(D),(D),(D),(D),(D),Wyoming
107708,"""56045""","Weston, WY",Transportation and utilities,Thousands of dollars,16962,16438,18192,17752,17405,17731,15744,14171,15316,23139,26200,35935,26886,Wyoming
107709,"""56045""","Weston, WY",Manufacturing and information,Thousands of dollars,52449,(D),(D),181194,207459,44748,52469,77566,117411,(D),37493,61536,34212,Wyoming
107710,"""56045""","Weston, WY",Private goods-producing industries 2/,Thousands of dollars,88456,131783,172572,257146,295610,103829,110285,124638,162775,106166,106048,110295,73932,Wyoming
107711,"""56045""","Weston, WY",Private services-providing industries 3/,Thousands of dollars,71201,74948,86633,96091,97774,100095,99232,98859,103509,111264,116289,128678,118463,Wyoming


## Part 4D: Fixing Alaska and DC in the GDP Data 

In [38]:
#Slice Alaska and the District of Columbia aggregate figures from gdp, as each of these are missing or do not have 
#counties
ak_dc = gdp.loc[(gdp.GeoName == 'Alaska') | (gdp.GeoName == 'District of Columbia')]
ak_dc

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
2346,"""02000""",Alaska,All industry total,Thousands of dollars,34987855,39788685,44457105,49002645,54758992,49651626,52947740,56333919,57717546,56625253,55750566,50728090,49755553,Alaska
2347,"""02000""",Alaska,Private industries,Thousands of dollars,27869623,32240305,36542306,40770675,46201169,40604782,43621674,46498806,47473397,46536674,45411043,40369683,39149281,Alaska
2348,"""02000""",Alaska,"Agriculture, forestry, fishing and hunting",Thousands of dollars,304137,345175,370167,337411,373436,371882,459820,538152,497497,509128,494169,458510,400903,Alaska
2349,"""02000""",Alaska,"Mining, quarrying, and oil and gas extraction",Thousands of dollars,7247733,10398909,13820049,16463636,20374778,14420546,15688084,18197504,18212634,16288749,14184103,7737094,6464773,Alaska
2350,"""02000""",Alaska,Utilities,Thousands of dollars,446647,448554,506063,506074,541505,620199,660273,748559,741581,749537,779320,762007,778943,Alaska
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11283,"""11000""",District of Columbia,Trade,Thousands of dollars,1637070,1742671,1809353,1887755,1917765,1880686,2090967,2137964,2318750,2397986,2552123,2679896,2766385,District of Columbia
11284,"""11000""",District of Columbia,Transportation and utilities,Thousands of dollars,1192639,1295774,1414812,1531404,1672828,1822597,1887973,1755094,1638915,1588382,1617757,1751943,1737719,District of Columbia
11285,"""11000""",District of Columbia,Manufacturing and information,Thousands of dollars,6024356,6511964,6184874,6573438,6580806,6352699,6429183,6480297,5926945,6518600,6278942,6593971,6924120,District of Columbia
11286,"""11000""",District of Columbia,Private goods-producing industries 2/,Thousands of dollars,1394187,1360517,1318792,1316859,1344745,1247398,1199787,1376664,1589196,1672195,1685922,1699740,1698174,District of Columbia


In [39]:
#Temporarily drop the above two states in gdp, and add them back in with only their aggregate figures
gdp = gdp.loc[(gdp.State != 'Alaska') & (gdp.State != 'District of Columbia')]
gdp = pd.concat([gdp, ak_dc])
gdp = gdp.reset_index(drop=True)
gdp = gdp.sort_values(by=['State','GeoName'])
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
0,"""01000""",Alabama,All industry total,Thousands of dollars,146886707,156403303,164347837,170263853,172677248,169128512,175470140,181929880,186553871,192166511,195037686,200197466,204454687,Alabama
1,"""01000""",Alabama,Private industries,Thousands of dollars,123086044,131222848,138032542,141818766,143193086,138168520,144017021,149921776,154601237,159979872,161985542,166301755,169903894,Alabama
2,"""01000""",Alabama,"Agriculture, forestry, fishing and hunting",Thousands of dollars,2925332,2630358,2099692,1944287,1894939,1915954,1928328,1591025,1802170,3113430,2518194,2665473,2036652,Alabama
3,"""01000""",Alabama,"Mining, quarrying, and oil and gas extraction",Thousands of dollars,1960544,2747053,2895305,3431976,3849019,2819762,3034450,2949459,2891834,2760583,2490266,1901441,1566222,Alabama
4,"""01000""",Alabama,Utilities,Thousands of dollars,3913734,3826057,4377328,4573699,4718711,5052112,5582131,5820104,5688911,5679844,5857377,5994975,6202802,Alabama


## Part 4E-1: Fixing Virginia in the Voting Data  

In [40]:
#When comparing the counties listed in the income, GDP, and voting datasets, notice how some states (Maryland,
#Missouri, Nevada, and Virginia) have independent cities that essentially function like counties.  However, some
#cities are listed alongside their adjacent counties in the income dataset (as shown by the plus signs), but not the
#voting one.  Taking this observation into account, under the voting dataset these counties must be updated to include
#their corresponding cities' figures.  Start off by slicing Virginia
va_i = inc.loc[(inc.State == 'Virginia')]
va_i = va_i.sort_values(by=['GeoName'])
print(list(va_i.GeoName.unique()))
print()
va_g = gdp.loc[(gdp.State == 'Virginia')]
va_g = va_g.sort_values(by=['GeoName'])
print(list(va_g.GeoName.unique()))
print()
va_v = vote.loc[(vote.State == 'Virginia')]
va_v = va_v.sort_values(by=['Area','Year'])
print(list(va_v.Area.unique()))

['Accomack, VA', 'Albemarle + Charlottesville, VA*', 'Alexandria (Independent City), VA', 'Alleghany + Covington, VA*', 'Amelia, VA', 'Amherst, VA', 'Appomattox, VA', 'Arlington, VA', 'Augusta, Staunton + Waynesboro, VA*', 'Bath, VA', 'Bedford, VA*', 'Bland, VA', 'Botetourt, VA', 'Brunswick, VA', 'Buchanan, VA', 'Buckingham, VA', 'Campbell + Lynchburg, VA*', 'Caroline, VA', 'Carroll + Galax, VA*', 'Charles City, VA', 'Charlotte, VA', 'Chesapeake (Independent City), VA', 'Chesterfield, VA', 'Clarke, VA', 'Craig, VA', 'Culpeper, VA', 'Cumberland, VA', 'Dickenson, VA', 'Dinwiddie, Colonial Heights + Petersburg, VA*', 'Essex, VA', 'Fairfax, Fairfax City + Falls Church, VA*', 'Fauquier, VA', 'Floyd, VA', 'Fluvanna, VA', 'Franklin, VA', 'Frederick + Winchester, VA*', 'Giles, VA', 'Gloucester, VA', 'Goochland, VA', 'Grayson, VA', 'Greene, VA', 'Greensville + Emporia, VA*', 'Halifax, VA', 'Hampton (Independent City), VA', 'Hanover, VA', 'Henrico, VA', 'Henry + Martinsville, VA*', 'Highland, VA

In [41]:
va_v.loc[(va_v.Area == 'Norfolk, VA') | (va_v.Area == 'Norfolk City, VA')]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
12224,2016,Virginia,"Norfolk City, VA",83388,21552,"Trump, Donald J.",57023,"Clinton, Hillary Rodham",4813,35471,D,25.85,68.38,5.77
2878,2004,Virginia,"Norfolk, VA",70570,26401,"Bush, George W.",43518,"Kerry, John F.",651,17117,D,37.41,61.67,0.92
5993,2008,Virginia,"Norfolk, VA",88446,24814,"McCain, John S. III",62819,"Obama, Barack H.",813,38005,D,28.06,71.03,0.92
9108,2012,Virginia,"Norfolk, VA",87043,23147,"Romney, W. Mitt",62687,"Obama, Barack H.",1209,39540,D,26.59,72.02,1.39


In [42]:
#Before proceeding with the counties that list cities next to them, fix Norfolk's name for 2016 to maintain 
#consistency with the other years
va_v['Area'] = np.where((va_v['Area'] == 'Norfolk City, VA'), 'Norfolk, VA', va_v['Area'])
va_v = va_v.sort_values(by=['Year','Area'])
va_v.loc[(va_v.Area == 'Norfolk, VA') | (va_v.Area == 'Norfolk City, VA')]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2878,2004,Virginia,"Norfolk, VA",70570,26401,"Bush, George W.",43518,"Kerry, John F.",651,17117,D,37.41,61.67,0.92
5993,2008,Virginia,"Norfolk, VA",88446,24814,"McCain, John S. III",62819,"Obama, Barack H.",813,38005,D,28.06,71.03,0.92
9108,2012,Virginia,"Norfolk, VA",87043,23147,"Romney, W. Mitt",62687,"Obama, Barack H.",1209,39540,D,26.59,72.02,1.39
12224,2016,Virginia,"Norfolk, VA",83388,21552,"Trump, Donald J.",57023,"Clinton, Hillary Rodham",4813,35471,D,25.85,68.38,5.77


In [43]:
va_v.loc[(va_v.Area == 'Albemarle, VA') | (va_v.Area == 'Charlottesville, VA')]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2793,2004,Virginia,"Albemarle, VA",43726,21189,"Bush, George W.",22088,"Kerry, John F.",449,899,D,48.46,50.51,1.03
2816,2004,Virginia,"Charlottesville, VA",15450,4172,"Bush, George W.",11088,"Kerry, John F.",190,6916,D,27.0,71.77,1.23
5908,2008,Virginia,"Albemarle, VA",50984,20576,"McCain, John S. III",29792,"Obama, Barack H.",616,9216,D,40.36,58.43,1.21
5931,2008,Virginia,"Charlottesville, VA",20044,4078,"McCain, John S. III",15705,"Obama, Barack H.",261,11627,D,20.35,78.35,1.3
9023,2012,Virginia,"Albemarle, VA",53907,23297,"Romney, W. Mitt",29757,"Obama, Barack H.",853,6460,D,43.22,55.2,1.59
9046,2012,Virginia,"Charlottesville, VA",21797,4844,"Romney, W. Mitt",16510,"Obama, Barack H.",443,11666,D,22.22,75.74,2.04
12139,2016,Virginia,"Albemarle, VA",56726,19259,"Trump, Donald J.",33345,"Clinton, Hillary Rodham",4122,14086,D,33.95,58.78,7.27
12162,2016,Virginia,"Charlottesville, VA",22472,2960,"Trump, Donald J.",17901,"Clinton, Hillary Rodham",1611,14941,D,13.17,79.66,7.17


In [44]:
#Using the names listed under the GeoName column in inc, for the next couple of cells, slice the county and any cities
#listed alongside it
albemarle = va_v.loc[(va_v.Area == 'Albemarle, VA')]
charlottesville = va_v.loc[(va_v.Area == 'Charlottesville, VA')]
albemarle.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2793,2004,Virginia,"Albemarle, VA",43726,21189,"Bush, George W.",22088,"Kerry, John F.",449,899,D,48.46,50.51,1.03
5908,2008,Virginia,"Albemarle, VA",50984,20576,"McCain, John S. III",29792,"Obama, Barack H.",616,9216,D,40.36,58.43,1.21
9023,2012,Virginia,"Albemarle, VA",53907,23297,"Romney, W. Mitt",29757,"Obama, Barack H.",853,6460,D,43.22,55.2,1.59
12139,2016,Virginia,"Albemarle, VA",56726,19259,"Trump, Donald J.",33345,"Clinton, Hillary Rodham",4122,14086,D,33.95,58.78,7.27


In [45]:
charlottesville.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2816,2004,Virginia,"Charlottesville, VA",15450,4172,"Bush, George W.",11088,"Kerry, John F.",190,6916,D,27.0,71.77,1.23
5931,2008,Virginia,"Charlottesville, VA",20044,4078,"McCain, John S. III",15705,"Obama, Barack H.",261,11627,D,20.35,78.35,1.3
9046,2012,Virginia,"Charlottesville, VA",21797,4844,"Romney, W. Mitt",16510,"Obama, Barack H.",443,11666,D,22.22,75.74,2.04
12162,2016,Virginia,"Charlottesville, VA",22472,2960,"Trump, Donald J.",17901,"Clinton, Hillary Rodham",1611,14941,D,13.17,79.66,7.17


In [46]:
#Update the county's TotalVotes, RepVotes, DemVotes, and ThirdVotes columns to include its adjacent cities' figures.
#Once done, update its PluralityVotes column as the new absolute value difference between the RepVotes and DemVotes
#columns.  Similarly, update the last three columns to reflect the increased vote totals from the former four columns
albemarle['TotalVotes'] += charlottesville['TotalVotes'].values
albemarle['RepVotes'] += charlottesville['RepVotes'].values
albemarle['DemVotes'] += charlottesville['DemVotes'].values
albemarle['ThirdVotes'] += charlottesville['ThirdVotes'].values
albemarle['PluralityVotes'] = abs(albemarle['RepVotes'] - albemarle['DemVotes'])
albemarle['RepVotesTotalPercent'] = (albemarle['RepVotes']/albemarle['TotalVotes']*100).round(2)
albemarle['DemVotesTotalPercent'] = (albemarle['DemVotes']/albemarle['TotalVotes']*100).round(2)
albemarle['ThirdVotesTotalPercent'] = (albemarle['ThirdVotes']/albemarle['TotalVotes']*100).round(2)
albemarle.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2793,2004,Virginia,"Albemarle, VA",59176,25361,"Bush, George W.",33176,"Kerry, John F.",639,7815,D,42.86,56.06,1.08
5908,2008,Virginia,"Albemarle, VA",71028,24654,"McCain, John S. III",45497,"Obama, Barack H.",877,20843,D,34.71,64.06,1.23
9023,2012,Virginia,"Albemarle, VA",75704,28141,"Romney, W. Mitt",46267,"Obama, Barack H.",1296,18126,D,37.17,61.12,1.71
12139,2016,Virginia,"Albemarle, VA",79198,22219,"Trump, Donald J.",51246,"Clinton, Hillary Rodham",5733,29027,D,28.06,64.71,7.24


In [47]:
va_v.loc[(va_v.Area == 'Alleghany, VA') | (va_v.Area == 'Covington, VA')]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2795,2004,Virginia,"Alleghany, VA",7195,3962,"Bush, George W.",3203,"Kerry, John F.",30,759,R,55.07,44.52,0.42
2821,2004,Virginia,"Covington, VA",2301,1104,"Bush, George W.",1179,"Kerry, John F.",18,75,D,47.98,51.24,0.78
5910,2008,Virginia,"Alleghany, VA",7369,3715,"McCain, John S. III",3553,"Obama, Barack H.",101,162,R,50.41,48.22,1.37
5936,2008,Virginia,"Covington, VA",2354,1020,"McCain, John S. III",1304,"Obama, Barack H.",30,284,D,43.33,55.4,1.27
9025,2012,Virginia,"Alleghany, VA",7173,3595,"Romney, W. Mitt",3403,"Obama, Barack H.",175,192,R,50.12,47.44,2.44
9051,2012,Virginia,"Covington, VA",2330,975,"Romney, W. Mitt",1319,"Obama, Barack H.",36,344,D,41.85,56.61,1.55
12141,2016,Virginia,"Alleghany, VA",7325,4874,"Trump, Donald J.",2166,"Clinton, Hillary Rodham",285,2708,R,66.54,29.57,3.89
12167,2016,Virginia,"Covington, VA",2382,1349,"Trump, Donald J.",914,"Clinton, Hillary Rodham",119,435,R,56.63,38.37,4.99


In [48]:
alleghany = va_v.loc[(va_v.Area == 'Alleghany, VA')]
covington = va_v.loc[(va_v.Area == 'Covington, VA')]
alleghany['TotalVotes'] += covington['TotalVotes'].values
alleghany['RepVotes'] += covington['RepVotes'].values
alleghany['DemVotes'] += covington['DemVotes'].values
alleghany['ThirdVotes'] += covington['ThirdVotes'].values
alleghany['PluralityVotes'] = abs(alleghany['RepVotes'] - alleghany['DemVotes'])
alleghany['RepVotesTotalPercent'] = (alleghany['RepVotes']/alleghany['TotalVotes']*100).round(2)
alleghany['DemVotesTotalPercent'] = (alleghany['DemVotes']/alleghany['TotalVotes']*100).round(2)
alleghany['ThirdVotesTotalPercent'] = (alleghany['ThirdVotes']/alleghany['TotalVotes']*100).round(2)
alleghany

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2795,2004,Virginia,"Alleghany, VA",9496,5066,"Bush, George W.",4382,"Kerry, John F.",48,684,R,53.35,46.15,0.51
5910,2008,Virginia,"Alleghany, VA",9723,4735,"McCain, John S. III",4857,"Obama, Barack H.",131,122,R,48.7,49.95,1.35
9025,2012,Virginia,"Alleghany, VA",9503,4570,"Romney, W. Mitt",4722,"Obama, Barack H.",211,152,R,48.09,49.69,2.22
12141,2016,Virginia,"Alleghany, VA",9707,6223,"Trump, Donald J.",3080,"Clinton, Hillary Rodham",404,3143,R,64.11,31.73,4.16


In [49]:
augusta = va_v.loc[(va_v.Area == 'Augusta, VA')]
staunton = va_v.loc[(va_v.Area == 'Staunton, VA')]
waynesboro = va_v.loc[(va_v.Area == 'Waynesboro, VA')]
augusta['TotalVotes'] += staunton['TotalVotes'].values + waynesboro['TotalVotes'].values
augusta['RepVotes'] += staunton['RepVotes'].values + waynesboro['RepVotes'].values
augusta['DemVotes'] += staunton['DemVotes'].values + waynesboro['DemVotes'].values
augusta['ThirdVotes'] += staunton['ThirdVotes'].values + waynesboro['ThirdVotes'].values
augusta['PluralityVotes'] = abs(augusta['RepVotes'] - augusta['DemVotes'])
augusta['RepVotesTotalPercent'] = (augusta['RepVotes']/augusta['TotalVotes']*100).round(2)
augusta['DemVotesTotalPercent'] = (augusta['DemVotes']/augusta['TotalVotes']*100).round(2)
augusta['ThirdVotesTotalPercent'] = (augusta['ThirdVotes']/augusta['TotalVotes']*100).round(2)
augusta

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2800,2004,Virginia,"Augusta, VA",47296,32997,"Bush, George W.",13567,"Kerry, John F.",732,19430,R,69.77,28.69,1.55
5915,2008,Virginia,"Augusta, VA",53213,33265,"McCain, John S. III",19300,"Obama, Barack H.",648,13965,R,62.51,36.27,1.22
9030,2012,Virginia,"Augusta, VA",53673,33686,"Romney, W. Mitt",19019,"Obama, Barack H.",968,14667,R,62.76,35.43,1.8
12146,2016,Virginia,"Augusta, VA",56802,36097,"Trump, Donald J.",17274,"Clinton, Hillary Rodham",3431,18823,R,63.55,30.41,6.04


In [50]:
bedford = va_v.loc[(va_v.Area == 'Bedford, VA')]
bedford_c = va_v.loc[(va_v.Area == 'Bedford City, VA')]
bedford['TotalVotes'] += bedford_c['TotalVotes'].values
bedford['RepVotes'] += bedford_c['RepVotes'].values
bedford['DemVotes'] += bedford_c['DemVotes'].values
bedford['ThirdVotes'] += bedford_c['ThirdVotes'].values
bedford['PluralityVotes'] = abs(bedford['RepVotes'] - bedford['DemVotes'])
bedford['RepVotesTotalPercent'] = (bedford['RepVotes']/bedford['TotalVotes']*100).round(2)
bedford['DemVotesTotalPercent'] = (bedford['DemVotes']/bedford['TotalVotes']*100).round(2)
bedford['ThirdVotesTotalPercent'] = (bedford['ThirdVotes']/bedford['TotalVotes']*100).round(2)
bedford

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2802,2004,Virginia,"Bedford, VA",33946,23397,"Bush, George W.",10144,"Kerry, John F.",405,13253,R,68.92,29.88,1.19
5917,2008,Virginia,"Bedford, VA",38564,25917,"McCain, John S. III",12225,"Obama, Barack H.",422,13692,R,67.21,31.7,1.09
9032,2012,Virginia,"Bedford, VA",40230,28206,"Romney, W. Mitt",11434,"Obama, Barack H.",590,16772,R,70.11,28.42,1.47
12148,2016,Virginia,"Bedford, VA",42525,30659,"Trump, Donald J.",9768,"Clinton, Hillary Rodham",2098,20891,R,72.1,22.97,4.93


In [51]:
campbell = va_v.loc[(va_v.Area == 'Campbell, VA')]
lynchburg = va_v.loc[(va_v.Area == 'Lynchburg, VA')]
campbell['TotalVotes'] += lynchburg['TotalVotes'].values
campbell['RepVotes'] += lynchburg['RepVotes'].values
campbell['DemVotes'] += lynchburg['DemVotes'].values
campbell['ThirdVotes'] += lynchburg['ThirdVotes'].values
campbell['PluralityVotes'] = abs(campbell['RepVotes'] - campbell['DemVotes'])
campbell['RepVotesTotalPercent'] = (campbell['RepVotes']/campbell['TotalVotes']*100).round(2)
campbell['DemVotesTotalPercent'] = (campbell['DemVotes']/campbell['TotalVotes']*100).round(2)
campbell['ThirdVotesTotalPercent'] = (campbell['ThirdVotes']/campbell['TotalVotes']*100).round(2)
campbell

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2811,2004,Virginia,"Campbell, VA",49337,30291,"Bush, George W.",18589,"Kerry, John F.",457,11702,R,61.4,37.68,0.93
5926,2008,Virginia,"Campbell, VA",60155,35082,"McCain, John S. III",24360,"Obama, Barack H.",713,10722,R,58.32,40.5,1.19
9041,2012,Virginia,"Campbell, VA",62144,37501,"Romney, W. Mitt",23543,"Obama, Barack H.",1100,13958,R,60.35,37.88,1.77
12157,2016,Virginia,"Campbell, VA",63206,37533,"Trump, Donald J.",21456,"Clinton, Hillary Rodham",4217,16077,R,59.38,33.95,6.67


In [52]:
carroll = va_v.loc[(va_v.Area == 'Carroll, VA')]
galax = va_v.loc[(va_v.Area == 'Galax, VA')]
carroll['TotalVotes'] += galax['TotalVotes'].values
carroll['RepVotes'] += galax['RepVotes'].values
carroll['DemVotes'] += galax['DemVotes'].values
carroll['ThirdVotes'] += galax['ThirdVotes'].values
carroll['PluralityVotes'] = abs(carroll['RepVotes'] - carroll['DemVotes'])
carroll['RepVotesTotalPercent'] = (carroll['RepVotes']/carroll['TotalVotes']*100).round(2)
carroll['DemVotesTotalPercent'] = (carroll['DemVotes']/carroll['TotalVotes']*100).round(2)
carroll['ThirdVotesTotalPercent'] = (carroll['ThirdVotes']/carroll['TotalVotes']*100).round(2)
carroll

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2813,2004,Virginia,"Carroll, VA",14463,9509,"Bush, George W.",4875,"Kerry, John F.",79,4634,R,65.75,33.71,0.55
5928,2008,Virginia,"Carroll, VA",14981,9504,"McCain, John S. III",5161,"Obama, Barack H.",316,4343,R,63.44,34.45,2.11
9043,2012,Virginia,"Carroll, VA",15195,10068,"Romney, W. Mitt",4585,"Obama, Barack H.",542,5483,R,66.26,30.17,3.57
12159,2016,Virginia,"Carroll, VA",16044,12266,"Trump, Donald J.",3240,"Clinton, Hillary Rodham",538,9026,R,76.45,20.19,3.35


In [53]:
dinwiddie = va_v.loc[(va_v.Area == 'Dinwiddie, VA')]
colonial_heights = va_v.loc[(va_v.Area == 'Colonial Heights, VA')]
petersburg = va_v.loc[(va_v.Area == 'Petersburg, VA')]
dinwiddie['TotalVotes'] += colonial_heights['TotalVotes'].values + petersburg['TotalVotes'].values
dinwiddie['RepVotes'] += colonial_heights['RepVotes'].values + petersburg['RepVotes'].values
dinwiddie['DemVotes'] += colonial_heights['DemVotes'].values + petersburg['DemVotes'].values
dinwiddie['ThirdVotes'] += colonial_heights['ThirdVotes'].values + petersburg['ThirdVotes'].values
dinwiddie['PluralityVotes'] = abs(dinwiddie['RepVotes'] - dinwiddie['DemVotes'])
dinwiddie['RepVotesTotalPercent'] = (dinwiddie['RepVotes']/dinwiddie['TotalVotes']*100).round(2)
dinwiddie['DemVotesTotalPercent'] = (dinwiddie['DemVotes']/dinwiddie['TotalVotes']*100).round(2)
dinwiddie['ThirdVotesTotalPercent'] = (dinwiddie['ThirdVotes']/dinwiddie['TotalVotes']*100).round(2)
dinwiddie

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2827,2004,Virginia,"Dinwiddie, VA",31019,14560,"Bush, George W.",16312,"Kerry, John F.",147,1752,R,46.94,52.59,0.47
5942,2008,Virginia,"Dinwiddie, VA",37281,14270,"McCain, John S. III",22582,"Obama, Barack H.",429,8312,R,38.28,60.57,1.15
9057,2012,Virginia,"Dinwiddie, VA",38121,14343,"Romney, W. Mitt",23377,"Obama, Barack H.",401,9034,R,37.62,61.32,1.05
12173,2016,Virginia,"Dinwiddie, VA",35818,14579,"Trump, Donald J.",20153,"Clinton, Hillary Rodham",1086,5574,R,40.7,56.27,3.03


In [54]:
fairfax = va_v.loc[(va_v.Area == 'Fairfax, VA')]
fairfax_c = va_v.loc[(va_v.Area == 'Fairfax City, VA')]
falls_church = va_v.loc[(va_v.Area == 'Falls Church, VA')]
fairfax['TotalVotes'] += fairfax_c['TotalVotes'].values + falls_church['TotalVotes'].values
fairfax['RepVotes'] += fairfax_c['RepVotes'].values + falls_church['RepVotes'].values
fairfax['DemVotes'] += fairfax_c['DemVotes'].values + falls_church['DemVotes'].values
fairfax['ThirdVotes'] += fairfax_c['ThirdVotes'].values + falls_church['ThirdVotes'].values
fairfax['PluralityVotes'] = abs(fairfax['RepVotes'] - fairfax['DemVotes'])
fairfax['RepVotesTotalPercent'] = (fairfax['RepVotes']/fairfax['TotalVotes']*100).round(2)
fairfax['DemVotesTotalPercent'] = (fairfax['DemVotes']/fairfax['TotalVotes']*100).round(2)
fairfax['ThirdVotesTotalPercent'] = (fairfax['ThirdVotes']/fairfax['TotalVotes']*100).round(2)
fairfax

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2830,2004,Virginia,"Fairfax, VA",478023,219099,"Bush, George W.",255010,"Kerry, John F.",3914,35911,D,45.83,53.35,0.82
5945,2008,Virginia,"Fairfax, VA",534402,207655,"McCain, John S. III",321629,"Obama, Barack H.",5118,113974,D,38.86,60.18,0.96
9060,2012,Virginia,"Fairfax, VA",548192,213695,"Romney, W. Mitt",326939,"Obama, Barack H.",7558,113244,D,38.98,59.64,1.38
12176,2016,Virginia,"Fairfax, VA",570970,162736,"Trump, Donald J.",368319,"Clinton, Hillary Rodham",39915,205583,D,28.5,64.51,6.99


In [55]:
frederick = va_v.loc[(va_v.Area == 'Frederick, VA')]
winchester = va_v.loc[(va_v.Area == 'Winchester, VA')]
frederick['TotalVotes'] += winchester['TotalVotes'].values
frederick['RepVotes'] += winchester['RepVotes'].values
frederick['DemVotes'] += winchester['DemVotes'].values
frederick['ThirdVotes'] += winchester['ThirdVotes'].values
frederick['PluralityVotes'] = abs(frederick['RepVotes'] - frederick['DemVotes'])
frederick['RepVotesTotalPercent'] = (frederick['RepVotes']/frederick['TotalVotes']*100).round(2)
frederick['DemVotesTotalPercent'] = (frederick['DemVotes']/frederick['TotalVotes']*100).round(2)
frederick['ThirdVotesTotalPercent'] = (frederick['ThirdVotes']/frederick['TotalVotes']*100).round(2)
frederick

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2838,2004,Virginia,"Frederick, VA",37883,24669,"Bush, George W.",12820,"Kerry, John F.",394,11849,R,65.12,33.84,1.04
5953,2008,Virginia,"Frederick, VA",43738,24874,"McCain, John S. III",18229,"Obama, Barack H.",635,6645,R,56.87,41.68,1.45
9068,2012,Virginia,"Frederick, VA",46690,27804,"Romney, W. Mitt",17784,"Obama, Barack H.",1102,10020,R,59.55,38.09,2.36
12184,2016,Virginia,"Frederick, VA",51107,30873,"Trump, Donald J.",17096,"Clinton, Hillary Rodham",3138,13777,R,60.41,33.45,6.14


In [56]:
greensville = va_v.loc[(va_v.Area == 'Greensville, VA')]
emporia = va_v.loc[(va_v.Area == 'Emporia, VA')]
greensville['TotalVotes'] += emporia['TotalVotes'].values
greensville['RepVotes'] += emporia['RepVotes'].values
greensville['DemVotes'] += emporia['DemVotes'].values
greensville['ThirdVotes'] += emporia['ThirdVotes'].values
greensville['PluralityVotes'] = abs(greensville['RepVotes'] - greensville['DemVotes'])
greensville['RepVotesTotalPercent'] = (greensville['RepVotes']/greensville['TotalVotes']*100).round(2)
greensville['DemVotesTotalPercent'] = (greensville['DemVotes']/greensville['TotalVotes']*100).round(2)
greensville['ThirdVotesTotalPercent'] = (greensville['ThirdVotes']/greensville['TotalVotes']*100).round(2)
greensville

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2846,2004,Virginia,"Greensville, VA",6479,2702,"Bush, George W.",3761,"Kerry, John F.",16,1059,D,41.7,58.05,0.25
5961,2008,Virginia,"Greensville, VA",7504,2626,"McCain, John S. III",4824,"Obama, Barack H.",54,2198,D,34.99,64.29,0.72
9076,2012,Virginia,"Greensville, VA",7622,2652,"Romney, W. Mitt",4928,"Obama, Barack H.",42,2276,D,34.79,64.65,0.55
12192,2016,Virginia,"Greensville, VA",6730,2526,"Trump, Donald J.",4088,"Clinton, Hillary Rodham",116,1562,D,37.53,60.74,1.72


In [57]:
henry = va_v.loc[(va_v.Area == 'Henry, VA')]
martinsville = va_v.loc[(va_v.Area == 'Martinsville, VA')]
henry['TotalVotes'] += martinsville['TotalVotes'].values
henry['RepVotes'] += martinsville['RepVotes'].values
henry['DemVotes'] += martinsville['DemVotes'].values
henry['ThirdVotes'] += martinsville['ThirdVotes'].values
henry['PluralityVotes'] = abs(henry['RepVotes'] - henry['DemVotes'])
henry['RepVotesTotalPercent'] = (henry['RepVotes']/henry['TotalVotes']*100).round(2)
henry['DemVotesTotalPercent'] = (henry['DemVotes']/henry['TotalVotes']*100).round(2)
henry['ThirdVotesTotalPercent'] = (henry['ThirdVotes']/henry['TotalVotes']*100).round(2)
henry

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2852,2004,Virginia,"Henry, VA",29061,15896,"Bush, George W.",12887,"Kerry, John F.",278,3009,R,54.7,44.34,0.96
5967,2008,Virginia,"Henry, VA",31735,16069,"McCain, John S. III",15257,"Obama, Barack H.",409,812,R,50.63,48.08,1.29
9082,2012,Virginia,"Henry, VA",31247,16296,"Romney, W. Mitt",14172,"Obama, Barack H.",779,2124,R,52.15,45.35,2.49
12198,2016,Virginia,"Henry, VA",30013,17357,"Trump, Donald J.",11731,"Clinton, Hillary Rodham",925,5626,R,57.83,39.09,3.08


In [58]:
james_c = va_v.loc[(va_v.Area == 'James City, VA')]
williamsburg = va_v.loc[(va_v.Area == 'Williamsburg, VA')]
james_c['TotalVotes'] += williamsburg['TotalVotes'].values
james_c['RepVotes'] += williamsburg['RepVotes'].values
james_c['DemVotes'] += williamsburg['DemVotes'].values
james_c['ThirdVotes'] += williamsburg['ThirdVotes'].values
james_c['PluralityVotes'] = abs(james_c['RepVotes'] - james_c['DemVotes'])
james_c['RepVotesTotalPercent'] = (james_c['RepVotes']/james_c['TotalVotes']*100).round(2)
james_c['DemVotesTotalPercent'] = (james_c['DemVotes']/james_c['TotalVotes']*100).round(2)
james_c['ThirdVotesTotalPercent'] = (james_c['ThirdVotes']/james_c['TotalVotes']*100).round(2)
james_c

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2856,2004,Virginia,"James City, VA",35410,21013,"Bush, George W.",14150,"Kerry, John F.",247,6863,R,59.34,39.96,0.7
5971,2008,Virginia,"James City, VA",45390,23265,"McCain, John S. III",21680,"Obama, Barack H.",445,1585,R,51.26,47.76,0.98
9086,2012,Virginia,"James City, VA",48988,25525,"Romney, W. Mitt",22782,"Obama, Barack H.",681,2743,R,52.1,46.51,1.39
12202,2016,Virginia,"James City, VA",50799,23231,"Trump, Donald J.",24311,"Clinton, Hillary Rodham",3257,1080,R,45.73,47.86,6.41


In [59]:
montgomery = va_v.loc[(va_v.Area == 'Montgomery, VA')]
radford = va_v.loc[(va_v.Area == 'Radford, VA')]
montgomery['TotalVotes'] += radford['TotalVotes'].values
montgomery['RepVotes'] += radford['RepVotes'].values
montgomery['DemVotes'] += radford['DemVotes'].values
montgomery['ThirdVotes'] += radford['ThirdVotes'].values
montgomery['PluralityVotes'] = abs(montgomery['RepVotes'] - montgomery['DemVotes'])
montgomery['RepVotesTotalPercent'] = (montgomery['RepVotes']/montgomery['TotalVotes']*100).round(2)
montgomery['DemVotesTotalPercent'] = (montgomery['DemVotes']/montgomery['TotalVotes']*100).round(2)
montgomery['ThirdVotesTotalPercent'] = (montgomery['ThirdVotes']/montgomery['TotalVotes']*100).round(2)
montgomery

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2874,2004,Virginia,"Montgomery, VA",36360,19634,"Bush, George W.",16372,"Kerry, John F.",354,3262,R,54.0,45.03,0.97
5989,2008,Virginia,"Montgomery, VA",46082,21446,"McCain, John S. III",23961,"Obama, Barack H.",675,2515,D,46.54,52.0,1.46
9104,2012,Virginia,"Montgomery, VA",46408,22526,"Romney, W. Mitt",22635,"Obama, Barack H.",1247,109,R,48.54,48.77,2.69
12220,2016,Virginia,"Montgomery, VA",49116,22097,"Trump, Donald J.",22946,"Clinton, Hillary Rodham",4073,849,D,44.99,46.72,8.29


In [60]:
pittsylvania = va_v.loc[(va_v.Area == 'Pittsylvania, VA')]
danville = va_v.loc[(va_v.Area == 'Danville, VA')]
pittsylvania['TotalVotes'] += danville['TotalVotes'].values
pittsylvania['RepVotes'] += danville['RepVotes'].values
pittsylvania['DemVotes'] += danville['DemVotes'].values
pittsylvania['ThirdVotes'] += danville['ThirdVotes'].values
pittsylvania['PluralityVotes'] = abs(pittsylvania['RepVotes'] - pittsylvania['DemVotes'])
pittsylvania['RepVotesTotalPercent'] = (pittsylvania['RepVotes']/pittsylvania['TotalVotes']*100).round(2)
pittsylvania['DemVotesTotalPercent'] = (pittsylvania['DemVotes']/pittsylvania['TotalVotes']*100).round(2)
pittsylvania['ThirdVotesTotalPercent'] = (pittsylvania['ThirdVotes']/pittsylvania['TotalVotes']*100).round(2)
pittsylvania

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2887,2004,Virginia,"Pittsylvania, VA",46529,27072,"Bush, George W.",18710,"Kerry, John F.",747,8362,R,58.18,40.21,1.61
6002,2008,Virginia,"Pittsylvania, VA",51323,27091,"McCain, John S. III",23767,"Obama, Barack H.",465,3324,R,52.79,46.31,0.91
9117,2012,Virginia,"Pittsylvania, VA",50885,27026,"Romney, W. Mitt",23076,"Obama, Barack H.",783,3950,R,53.11,45.35,1.54
12233,2016,Virginia,"Pittsylvania, VA",50550,28857,"Trump, Donald J.",20258,"Clinton, Hillary Rodham",1435,8599,R,57.09,40.08,2.84


In [61]:
prince_george = va_v.loc[(va_v.Area == 'Prince George, VA')]
hopewell = va_v.loc[(va_v.Area == 'Hopewell, VA')]
prince_george['TotalVotes'] += hopewell['TotalVotes'].values
prince_george['RepVotes'] += hopewell['RepVotes'].values
prince_george['DemVotes'] += hopewell['DemVotes'].values
prince_george['ThirdVotes'] += hopewell['ThirdVotes'].values
prince_george['PluralityVotes'] = abs(prince_george['RepVotes'] - prince_george['DemVotes'])
prince_george['RepVotesTotalPercent'] = (prince_george['RepVotes']/prince_george['TotalVotes']*100).round(2)
prince_george['DemVotesTotalPercent'] = (prince_george['DemVotes']/prince_george['TotalVotes']*100).round(2)
prince_george['ThirdVotesTotalPercent'] = (prince_george['ThirdVotes']/prince_george['TotalVotes']*100).round(2)
prince_george

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2892,2004,Virginia,"Prince George, VA",21190,12382,"Bush, George W.",8639,"Kerry, John F.",169,3743,R,58.43,40.77,0.8
6007,2008,Virginia,"Prince George, VA",25530,12901,"McCain, John S. III",12415,"Obama, Barack H.",214,486,R,50.53,48.63,0.84
9122,2012,Virginia,"Prince George, VA",25077,12618,"Romney, W. Mitt",12170,"Obama, Barack H.",289,448,R,50.32,48.53,1.15
12238,2016,Virginia,"Prince George, VA",25192,13042,"Trump, Donald J.",11143,"Clinton, Hillary Rodham",1007,1899,R,51.77,44.23,4.0


In [62]:
prince_william = va_v.loc[(va_v.Area == 'Prince William, VA')]
manassas = va_v.loc[(va_v.Area == 'Manassas, VA')]
manassas_park = va_v.loc[(va_v.Area == 'Manassas Park, VA')]
prince_william['TotalVotes'] += manassas['TotalVotes'].values + manassas_park['TotalVotes'].values
prince_william['RepVotes'] += manassas['RepVotes'].values + manassas_park['RepVotes'].values
prince_william['DemVotes'] += manassas['DemVotes'].values + manassas_park['DemVotes'].values
prince_william['ThirdVotes'] += manassas['ThirdVotes'].values + manassas_park['ThirdVotes'].values
prince_william['PluralityVotes'] = abs(prince_william['RepVotes'] - prince_william['DemVotes'])
prince_william['RepVotesTotalPercent'] = (prince_william['RepVotes']/prince_william['TotalVotes']*100).round(2)
prince_william['DemVotesTotalPercent'] = (prince_william['DemVotes']/prince_william['TotalVotes']*100).round(2)
prince_william['ThirdVotesTotalPercent'] = (prince_william['ThirdVotes']/prince_william['TotalVotes']*100).round(2)
prince_william

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2893,2004,Virginia,"Prince William, VA",148298,78840,"Bush, George W.",68331,"Kerry, John F.",1127,10509,R,53.16,46.08,0.76
6008,2008,Virginia,"Prince William, VA",180213,75230,"McCain, John S. III",103416,"Obama, Barack H.",1567,28186,D,41.75,57.39,0.87
9123,2012,Virginia,"Prince William, VA",200051,82620,"Romney, W. Mitt",114688,"Obama, Barack H.",2743,32068,D,41.3,57.33,1.37
12239,2016,Virginia,"Prince William, VA",217181,79407,"Trump, Donald J.",124771,"Clinton, Hillary Rodham",13003,45364,D,36.56,57.45,5.99


In [63]:
roanoke = va_v.loc[(va_v.Area == 'Roanoke, VA')]
salem = va_v.loc[(va_v.Area == 'Salem, VA')]
roanoke['TotalVotes'] += salem['TotalVotes'].values
roanoke['RepVotes'] += salem['RepVotes'].values
roanoke['DemVotes'] += salem['DemVotes'].values
roanoke['ThirdVotes'] += salem['ThirdVotes'].values
roanoke['PluralityVotes'] = abs(roanoke['RepVotes'] - roanoke['DemVotes'])
roanoke['RepVotesTotalPercent'] = (roanoke['RepVotes']/roanoke['TotalVotes']*100).round(2)
roanoke['DemVotesTotalPercent'] = (roanoke['DemVotes']/roanoke['TotalVotes']*100).round(2)
roanoke['ThirdVotesTotalPercent'] = (roanoke['ThirdVotes']/roanoke['TotalVotes']*100).round(2)
roanoke

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2899,2004,Virginia,"Roanoke, VA",58457,37711,"Bush, George W.",20336,"Kerry, John F.",410,17375,R,64.51,34.79,0.7
6014,2008,Virginia,"Roanoke, VA",63381,37659,"McCain, John S. III",24976,"Obama, Barack H.",746,12683,R,59.42,39.41,1.18
9129,2012,Virginia,"Roanoke, VA",63535,38923,"Romney, W. Mitt",23471,"Obama, Barack H.",1141,15452,R,61.26,36.94,1.8
12245,2016,Virginia,"Roanoke, VA",63714,38634,"Trump, Donald J.",21402,"Clinton, Hillary Rodham",3678,17232,R,60.64,33.59,5.77


In [64]:
rockbridge = va_v.loc[(va_v.Area == 'Rockbridge, VA')]
buena_vista = va_v.loc[(va_v.Area == 'Buena Vista, VA')]
lexington = va_v.loc[(va_v.Area == 'Lexington, VA')]
rockbridge['TotalVotes'] += buena_vista['TotalVotes'].values + lexington['TotalVotes'].values
rockbridge['RepVotes'] += buena_vista['RepVotes'].values + lexington['RepVotes'].values
rockbridge['DemVotes'] += buena_vista['DemVotes'].values + lexington['DemVotes'].values
rockbridge['ThirdVotes'] += buena_vista['ThirdVotes'].values + lexington['ThirdVotes'].values
rockbridge['PluralityVotes'] = abs(rockbridge['RepVotes'] - rockbridge['DemVotes'])
rockbridge['RepVotesTotalPercent'] = (rockbridge['RepVotes']/rockbridge['TotalVotes']*100).round(2)
rockbridge['DemVotesTotalPercent'] = (rockbridge['DemVotes']/rockbridge['TotalVotes']*100).round(2)
rockbridge['ThirdVotesTotalPercent'] = (rockbridge['ThirdVotes']/rockbridge['TotalVotes']*100).round(2)
rockbridge

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2901,2004,Virginia,"Rockbridge, VA",13919,7811,"Bush, George W.",5903,"Kerry, John F.",205,1908,R,56.12,42.41,1.47
6016,2008,Virginia,"Rockbridge, VA",15097,7928,"McCain, John S. III",6998,"Obama, Barack H.",171,930,R,52.51,46.35,1.13
9131,2012,Virginia,"Rockbridge, VA",15390,8608,"Romney, W. Mitt",6493,"Obama, Barack H.",289,2115,R,55.93,42.19,1.88
12247,2016,Virginia,"Rockbridge, VA",15667,8876,"Trump, Donald J.",5715,"Clinton, Hillary Rodham",1076,3161,R,56.65,36.48,6.87


In [65]:
rockingham = va_v.loc[(va_v.Area == 'Rockingham, VA')]
harrisonburg = va_v.loc[(va_v.Area == 'Harrisonburg, VA')]
rockingham['TotalVotes'] += harrisonburg['TotalVotes'].values
rockingham['RepVotes'] += harrisonburg['RepVotes'].values
rockingham['DemVotes'] += harrisonburg['DemVotes'].values
rockingham['ThirdVotes'] += harrisonburg['ThirdVotes'].values
rockingham['PluralityVotes'] = abs(rockingham['RepVotes'] - rockingham['DemVotes'])
rockingham['RepVotesTotalPercent'] = (rockingham['RepVotes']/rockingham['TotalVotes']*100).round(2)
rockingham['DemVotesTotalPercent'] = (rockingham['DemVotes']/rockingham['TotalVotes']*100).round(2)
rockingham['ThirdVotesTotalPercent'] = (rockingham['ThirdVotes']/rockingham['TotalVotes']*100).round(2)
rockingham

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2902,2004,Virginia,"Rockingham, VA",40246,27902,"Bush, George W.",11999,"Kerry, John F.",345,15903,R,69.33,29.81,0.86
6017,2008,Virginia,"Rockingham, VA",48009,28516,"McCain, John S. III",18897,"Obama, Barack H.",596,9619,R,59.4,39.36,1.24
9132,2012,Virginia,"Rockingham, VA",50459,30751,"Romney, W. Mitt",18719,"Obama, Barack H.",989,12032,R,60.94,37.1,1.96
12248,2016,Virginia,"Rockingham, VA",55480,32252,"Trump, Donald J.",19578,"Clinton, Hillary Rodham",3650,12674,R,58.13,35.29,6.58


In [66]:
southampton = va_v.loc[(va_v.Area == 'Southampton, VA')]
franklin_c = va_v.loc[(va_v.Area == 'Franklin City, VA')]
southampton['TotalVotes'] += franklin_c['TotalVotes'].values
southampton['RepVotes'] += franklin_c['RepVotes'].values
southampton['DemVotes'] += franklin_c['DemVotes'].values
southampton['ThirdVotes'] += franklin_c['ThirdVotes'].values
southampton['PluralityVotes'] = abs(southampton['RepVotes'] - southampton['DemVotes'])
southampton['RepVotesTotalPercent'] = (southampton['RepVotes']/southampton['TotalVotes']*100).round(2)
southampton['DemVotesTotalPercent'] = (southampton['DemVotes']/southampton['TotalVotes']*100).round(2)
southampton['ThirdVotesTotalPercent'] = (southampton['ThirdVotes']/southampton['TotalVotes']*100).round(2)
southampton

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2908,2004,Virginia,"Southampton, VA",11028,5631,"Bush, George W.",5341,"Kerry, John F.",56,290,R,51.06,48.43,0.51
6023,2008,Virginia,"Southampton, VA",13494,6159,"McCain, John S. III",7221,"Obama, Barack H.",114,1062,R,45.64,53.51,0.84
9138,2012,Virginia,"Southampton, VA",13624,6229,"Romney, W. Mitt",7270,"Obama, Barack H.",125,1041,R,45.72,53.36,0.92
12254,2016,Virginia,"Southampton, VA",12948,6456,"Trump, Donald J.",6114,"Clinton, Hillary Rodham",378,342,R,49.86,47.22,2.92


In [67]:
spotsylvania = va_v.loc[(va_v.Area == 'Spotsylvania, VA')]
fredericksburg = va_v.loc[(va_v.Area == 'Fredericksburg, VA')]
spotsylvania['TotalVotes'] += fredericksburg['TotalVotes'].values
spotsylvania['RepVotes'] += fredericksburg['RepVotes'].values
spotsylvania['DemVotes'] += fredericksburg['DemVotes'].values
spotsylvania['ThirdVotes'] += fredericksburg['ThirdVotes'].values
spotsylvania['PluralityVotes'] = abs(spotsylvania['RepVotes'] - spotsylvania['DemVotes'])
spotsylvania['RepVotesTotalPercent'] = (spotsylvania['RepVotes']/spotsylvania['TotalVotes']*100).round(2)
spotsylvania['DemVotesTotalPercent'] = (spotsylvania['DemVotes']/spotsylvania['TotalVotes']*100).round(2)
spotsylvania['ThirdVotesTotalPercent'] = (spotsylvania['ThirdVotes']/spotsylvania['TotalVotes']*100).round(2)
spotsylvania

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2909,2004,Virginia,"Spotsylvania, VA",52987,31917,"Bush, George W.",20708,"Kerry, John F.",362,11209,R,60.24,39.08,0.68
6024,2008,Virginia,"Spotsylvania, VA",63746,32023,"McCain, John S. III",31052,"Obama, Barack H.",671,971,R,50.24,48.71,1.05
9139,2012,Virginia,"Spotsylvania, VA",69411,35904,"Romney, W. Mitt",32296,"Obama, Barack H.",1211,3608,R,51.73,46.53,1.74
12255,2016,Virginia,"Spotsylvania, VA",73826,38367,"Trump, Donald J.",30914,"Clinton, Hillary Rodham",4545,7453,R,51.97,41.87,6.16


In [68]:
washington = va_v.loc[(va_v.Area == 'Washington, VA')]
bristol = va_v.loc[(va_v.Area == 'Bristol, VA')]
washington['TotalVotes'] += bristol['TotalVotes'].values
washington['RepVotes'] += bristol['RepVotes'].values
washington['DemVotes'] += bristol['DemVotes'].values
washington['ThirdVotes'] += bristol['ThirdVotes'].values
washington['PluralityVotes'] = abs(washington['RepVotes'] - washington['DemVotes'])
washington['RepVotesTotalPercent'] = (washington['RepVotes']/washington['TotalVotes']*100).round(2)
washington['DemVotesTotalPercent'] = (washington['DemVotes']/washington['TotalVotes']*100).round(2)
washington['ThirdVotesTotalPercent'] = (washington['ThirdVotes']/washington['TotalVotes']*100).round(2)
washington

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2918,2004,Virginia,"Washington, VA",29238,19024,"Bush, George W.",9739,"Kerry, John F.",475,9285,R,65.07,33.31,1.62
6033,2008,Virginia,"Washington, VA",31859,20656,"McCain, John S. III",10728,"Obama, Barack H.",475,9928,R,64.84,33.67,1.49
9148,2012,Virginia,"Washington, VA",33019,22921,"Romney, W. Mitt",9568,"Obama, Barack H.",530,13353,R,69.42,28.98,1.61
12264,2016,Virginia,"Washington, VA",32879,24212,"Trump, Donald J.",7388,"Clinton, Hillary Rodham",1279,16824,R,73.64,22.47,3.89


In [69]:
wise = va_v.loc[(va_v.Area == 'Wise, VA')]
norton = va_v.loc[(va_v.Area == 'Norton, VA')]
wise['TotalVotes'] += norton['TotalVotes'].values
wise['RepVotes'] += norton['RepVotes'].values
wise['DemVotes'] += norton['DemVotes'].values
wise['ThirdVotes'] += norton['ThirdVotes'].values
wise['PluralityVotes'] = abs(wise['RepVotes'] - wise['DemVotes'])
wise['RepVotesTotalPercent'] = (wise['RepVotes']/wise['TotalVotes']*100).round(2)
wise['DemVotesTotalPercent'] = (wise['DemVotes']/wise['TotalVotes']*100).round(2)
wise['ThirdVotesTotalPercent'] = (wise['ThirdVotes']/wise['TotalVotes']*100).round(2)
wise

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2923,2004,Virginia,"Wise, VA",15816,9098,"Bush, George W.",6527,"Kerry, John F.",191,2571,R,57.52,41.27,1.21
6038,2008,Virginia,"Wise, VA",15650,9658,"McCain, John S. III",5738,"Obama, Barack H.",254,3920,R,61.71,36.66,1.62
9153,2012,Virginia,"Wise, VA",16510,11971,"Romney, W. Mitt",4326,"Obama, Barack H.",213,7645,R,72.51,26.2,1.29
12269,2016,Virginia,"Wise, VA",16623,13107,"Trump, Donald J.",3084,"Clinton, Hillary Rodham",432,10023,R,78.85,18.55,2.6


In [70]:
york = va_v.loc[(va_v.Area == 'York, VA')]
poquoson = va_v.loc[(va_v.Area == 'Poquoson, VA')]
york['TotalVotes'] += poquoson['TotalVotes'].values
york['RepVotes'] += poquoson['RepVotes'].values
york['DemVotes'] += poquoson['DemVotes'].values
york['ThirdVotes'] += poquoson['ThirdVotes'].values
york['PluralityVotes'] = abs(york['RepVotes'] - york['DemVotes'])
york['RepVotesTotalPercent'] = (york['RepVotes']/york['TotalVotes']*100).round(2)
york['DemVotesTotalPercent'] = (york['DemVotes']/york['TotalVotes']*100).round(2)
york['ThirdVotesTotalPercent'] = (york['ThirdVotes']/york['TotalVotes']*100).round(2)
york

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2925,2004,Virginia,"York, VA",36360,24400,"Bush, George W.",11700,"Kerry, John F.",260,12700,R,67.11,32.18,0.72
6040,2008,Virginia,"York, VA",40962,25062,"McCain, John S. III",15448,"Obama, Barack H.",452,9614,R,61.18,37.71,1.1
9155,2012,Virginia,"York, VA",41059,25516,"Romney, W. Mitt",14862,"Obama, Barack H.",681,10654,R,62.14,36.2,1.66
12271,2016,Virginia,"York, VA",41295,23929,"Trump, Donald J.",14600,"Clinton, Hillary Rodham",2766,9329,R,57.95,35.36,6.7


In [71]:
va_v.tail()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
12267,2016,Virginia,"Williamsburg, VA",7626,1925,"Trump, Donald J.",5206,"Clinton, Hillary Rodham",495,3281,D,25.24,68.27,6.49
12268,2016,Virginia,"Winchester, VA",10667,4790,"Trump, Donald J.",5164,"Clinton, Hillary Rodham",713,374,D,44.9,48.41,6.68
12269,2016,Virginia,"Wise, VA",15163,12086,"Trump, Donald J.",2701,"Clinton, Hillary Rodham",376,9385,R,79.71,17.81,2.48
12270,2016,Virginia,"Wythe, VA",13343,10046,"Trump, Donald J.",2770,"Clinton, Hillary Rodham",527,7276,R,75.29,20.76,3.95
12271,2016,Virginia,"York, VA",34118,18837,"Trump, Donald J.",12999,"Clinton, Hillary Rodham",2282,5838,R,55.21,38.1,6.69


In [72]:
#Drop the old rows with counties that list cities alongside them
remove_va_v = ['Albemarle, VA', 'Charlottesville, VA',
               'Alleghany, VA', 'Covington, VA',
               'Augusta, VA', 'Staunton, VA', 'Waynesboro, VA',
               'Bedford, VA', 'Bedford City, VA',
               'Campbell, VA', 'Lynchburg, VA',
               'Carroll, VA', 'Galax, VA',
               'Dinwiddie, VA', 'Colonial Heights, VA', 'Petersburg, VA',
               'Fairfax, VA', 'Fairfax City, VA', 'Falls Church, VA',
               'Frederick, VA', 'Winchester, VA',
               'Greensville, VA', 'Emporia, VA',
               'Henry, VA', 'Martinsville, VA',
               'James City, VA', 'Williamsburg, VA',
               'Montgomery, VA', 'Radford, VA',
               'Pittsylvania, VA', 'Danville, VA',
               'Prince George, VA', 'Hopewell, VA',
               'Prince William, VA', 'Manassas, VA', 'Manassas Park, VA',
               'Roanoke, VA', 'Salem, VA',
               'Rockbridge, VA', 'Buena Vista, VA', 'Lexington, VA',
               'Rockingham, VA', 'Harrisonburg, VA',
               'Southampton, VA', 'Franklin City, VA',
               'Spotsylvania, VA', 'Fredericksburg, VA',
               'Washington, VA', 'Bristol, VA',
               'Wise, VA', 'Norton, VA',
               'York, VA', 'Poquoson, VA']
va_v = va_v.loc[~(va_v['Area'].isin(remove_va_v))]
va_v.tail()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
12261,2016,Virginia,"Tazewell, VA",18566,15168,"Trump, Donald J.",2895,"Clinton, Hillary Rodham",503,12273,R,81.7,15.59,2.71
12262,2016,Virginia,"Virginia Beach, VA",203262,98224,"Trump, Donald J.",91032,"Clinton, Hillary Rodham",14006,7192,R,48.32,44.79,6.89
12263,2016,Virginia,"Warren, VA",17960,11773,"Trump, Donald J.",5169,"Clinton, Hillary Rodham",1018,6604,R,65.55,28.78,5.67
12266,2016,Virginia,"Westmoreland, VA",8574,4448,"Trump, Donald J.",3836,"Clinton, Hillary Rodham",290,612,R,51.88,44.74,3.38
12270,2016,Virginia,"Wythe, VA",13343,10046,"Trump, Donald J.",2770,"Clinton, Hillary Rodham",527,7276,R,75.29,20.76,3.95


In [73]:
#Add back the new rows for the counties that had been dropped just earlier
add_va_v = [albemarle, alleghany, augusta, bedford, campbell, 
            carroll, dinwiddie, fairfax, frederick, greensville, 
            henry, james_c, montgomery, pittsylvania, prince_george, 
            prince_william, roanoke, rockbridge, rockingham, southampton, 
            spotsylvania, washington, wise, york]
va_v = va_v.append(add_va_v)
va_v = va_v.sort_values(by=['Year','Area'])
va_v.tail()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
12264,2016,Virginia,"Washington, VA",32879,24212,"Trump, Donald J.",7388,"Clinton, Hillary Rodham",1279,16824,R,73.64,22.47,3.89
12266,2016,Virginia,"Westmoreland, VA",8574,4448,"Trump, Donald J.",3836,"Clinton, Hillary Rodham",290,612,R,51.88,44.74,3.38
12269,2016,Virginia,"Wise, VA",16623,13107,"Trump, Donald J.",3084,"Clinton, Hillary Rodham",432,10023,R,78.85,18.55,2.6
12270,2016,Virginia,"Wythe, VA",13343,10046,"Trump, Donald J.",2770,"Clinton, Hillary Rodham",527,7276,R,75.29,20.76,3.95
12271,2016,Virginia,"York, VA",41295,23929,"Trump, Donald J.",14600,"Clinton, Hillary Rodham",2766,9329,R,57.95,35.36,6.7


In [74]:
#Drop the old Virginia data from vote and add in the new figures
vote = vote.loc[(vote.State != 'Virginia')]
vote = pd.concat([vote, va_v])
vote = vote.sort_values(by=['Year','State','Area'])
vote.loc[(vote.State == 'Virginia')].head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2792,2004,Virginia,"Accomack, VA",13356,7726,"Bush, George W.",5518,"Kerry, John F.",112,2208,R,57.85,41.31,0.84
2793,2004,Virginia,"Albemarle, VA",59176,25361,"Bush, George W.",33176,"Kerry, John F.",639,7815,D,42.86,56.06,1.08
2794,2004,Virginia,"Alexandria, VA",61515,19844,"Bush, George W.",41116,"Kerry, John F.",555,21272,D,32.26,66.84,0.9
2795,2004,Virginia,"Alleghany, VA",9496,5066,"Bush, George W.",4382,"Kerry, John F.",48,684,R,53.35,46.15,0.51
2796,2004,Virginia,"Amelia, VA",5397,3499,"Bush, George W.",1862,"Kerry, John F.",36,1637,R,64.83,34.5,0.67


## Part 4E-2: Fixing Virginia in the Income Data 

In [75]:
#Fix the abbreviations to be capitalized all the way and remove the unnecessary punctuation from inc. Then simplify 
#the city descriptions, and to maintain consistency with vote, drop any rows with Virginia under the GeoName column
va_i['GeoName'] = va_i['GeoName'].replace({'Va':'VA'}, regex=True)
va_i['GeoName'] = va_i['GeoName'].str.replace('*','')
va_i['GeoName'] = va_i['GeoName'].str.replace('(','')
va_i['GeoName'] = va_i['GeoName'].str.replace(')','')
va_i['GeoName'] = va_i['GeoName'].str.replace(' Independent City',' City')
va_i = va_i.loc[(va_i['GeoName'] != 'Virginia')]
va_i.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
8502,"""51001""","Accomack, VA",Personal income (thousands of dollars),Thousands of dollars,921178,955300,979101,1033169,1089135,1092338,1114647,1131424,1185059,1191128,1231057,1278779,1289594,Virginia
8503,"""51001""","Accomack, VA",Population (persons) 1/,Number of persons,36310,35835,35192,34553,33970,33415,33148,33225,33268,32969,32971,32914,32871,Virginia
8504,"""51001""","Accomack, VA",Per capita personal income (dollars) 2/,Dollars,25370,26658,27822,29901,32062,32690,33626,34053,35622,36129,37338,38852,39232,Virginia
8748,"""51901""","Albemarle + Charlottesville, VA",Personal income (thousands of dollars),Thousands of dollars,5459814,5877106,6463224,6897809,7167152,6859572,7240429,7657164,8457472,8329280,8936944,9518529,10184984,Virginia
8749,"""51901""","Albemarle + Charlottesville, VA",Population (persons) 1/,Number of persons,129772,132273,134918,136547,139211,141125,142665,143974,146044,147282,149250,151418,153374,Virginia


In [76]:
#For counties that list cities next to them, for simplicity purposes drop the cities from their descriptions, as their
#figures have already been incorporated
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Albemarle + Charlottesville, VA'),
                           'Albemarle, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Alleghany + Covington, VA'),
                           'Alleghany, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Augusta, Staunton + Waynesboro, VA'),
                           'Augusta, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Campbell + Lynchburg, VA'),
                           'Campbell, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Carroll + Galax, VA'),
                           'Carroll, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Dinwiddie, Colonial Heights + Petersburg, VA'),
                           'Dinwiddie, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Fairfax, Fairfax City + Falls Church, VA'),
                           'Fairfax, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Frederick + Winchester, VA'),
                           'Frederick, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Greensville + Emporia, VA'),
                           'Greensville, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Henry + Martinsville, VA'),
                           'Henry, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'James City + Williamsburg, VA'),
                           'James City, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Montgomery + Radford, VA'),
                           'Montgomery, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Pittsylvania + Danville, VA'),
                           'Pittsylvania, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Prince George + Hopewell, VA'),
                           'Prince George, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Prince William, Manassas + Manassas Park, VA'), 
                           'Prince William, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Roanoke + Salem, VA'),
                           'Roanoke, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Rockbridge, Buena Vista + Lexington, VA'),
                           'Rockbridge, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Rockingham + Harrisonburg, VA'),
                           'Rockingham, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Southampton + Franklin, VA'),
                           'Southampton, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Spotsylvania + Fredericksburg, VA'),
                           'Spotsylvania, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Washington + Bristol, VA'),
                           'Washington, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Wise + Norton, VA'),
                           'Wise, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'York + Poquoson, VA'),
                           'York, VA', va_i['GeoName'])
va_i.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
8502,"""51001""","Accomack, VA",Personal income (thousands of dollars),Thousands of dollars,921178,955300,979101,1033169,1089135,1092338,1114647,1131424,1185059,1191128,1231057,1278779,1289594,Virginia
8503,"""51001""","Accomack, VA",Population (persons) 1/,Number of persons,36310,35835,35192,34553,33970,33415,33148,33225,33268,32969,32971,32914,32871,Virginia
8504,"""51001""","Accomack, VA",Per capita personal income (dollars) 2/,Dollars,25370,26658,27822,29901,32062,32690,33626,34053,35622,36129,37338,38852,39232,Virginia
8748,"""51901""","Albemarle, VA",Personal income (thousands of dollars),Thousands of dollars,5459814,5877106,6463224,6897809,7167152,6859572,7240429,7657164,8457472,8329280,8936944,9518529,10184984,Virginia
8749,"""51901""","Albemarle, VA",Population (persons) 1/,Number of persons,129772,132273,134918,136547,139211,141125,142665,143974,146044,147282,149250,151418,153374,Virginia


In [77]:
#Compare the lengths of the lists of counties, independent cities, and combinations for Virginia in both datasets
print(len(list(va_v.Area.unique())))
print()
print(len(list(va_i.GeoName.unique())))

105

105


In [78]:
list(va_v.Area.unique())

['Accomack, VA',
 'Albemarle, VA',
 'Alexandria, VA',
 'Alleghany, VA',
 'Amelia, VA',
 'Amherst, VA',
 'Appomattox, VA',
 'Arlington, VA',
 'Augusta, VA',
 'Bath, VA',
 'Bedford, VA',
 'Bland, VA',
 'Botetourt, VA',
 'Brunswick, VA',
 'Buchanan, VA',
 'Buckingham, VA',
 'Campbell, VA',
 'Caroline, VA',
 'Carroll, VA',
 'Charles City, VA',
 'Charlotte, VA',
 'Chesapeake, VA',
 'Chesterfield, VA',
 'Clarke, VA',
 'Craig, VA',
 'Culpeper, VA',
 'Cumberland, VA',
 'Dickenson, VA',
 'Dinwiddie, VA',
 'Essex, VA',
 'Fairfax, VA',
 'Fauquier, VA',
 'Floyd, VA',
 'Fluvanna, VA',
 'Franklin, VA',
 'Frederick, VA',
 'Giles, VA',
 'Gloucester, VA',
 'Goochland, VA',
 'Grayson, VA',
 'Greene, VA',
 'Greensville, VA',
 'Halifax, VA',
 'Hampton, VA',
 'Hanover, VA',
 'Henrico, VA',
 'Henry, VA',
 'Highland, VA',
 'Isle Of Wight, VA',
 'James City, VA',
 'King And Queen, VA',
 'King George, VA',
 'King William, VA',
 'Lancaster, VA',
 'Lee, VA',
 'Loudoun, VA',
 'Louisa, VA',
 'Lunenburg, VA',
 'Mad

In [79]:
list(va_i.GeoName.unique())

['Accomack, VA',
 'Albemarle, VA',
 'Alexandria City, VA',
 'Alleghany, VA',
 'Amelia, VA',
 'Amherst, VA',
 'Appomattox, VA',
 'Arlington, VA',
 'Augusta, VA',
 'Bath, VA',
 'Bedford, VA',
 'Bland, VA',
 'Botetourt, VA',
 'Brunswick, VA',
 'Buchanan, VA',
 'Buckingham, VA',
 'Campbell, VA',
 'Caroline, VA',
 'Carroll, VA',
 'Charles City, VA',
 'Charlotte, VA',
 'Chesapeake City, VA',
 'Chesterfield, VA',
 'Clarke, VA',
 'Craig, VA',
 'Culpeper, VA',
 'Cumberland, VA',
 'Dickenson, VA',
 'Dinwiddie, VA',
 'Essex, VA',
 'Fairfax, VA',
 'Fauquier, VA',
 'Floyd, VA',
 'Fluvanna, VA',
 'Franklin, VA',
 'Frederick, VA',
 'Giles, VA',
 'Gloucester, VA',
 'Goochland, VA',
 'Grayson, VA',
 'Greene, VA',
 'Greensville, VA',
 'Halifax, VA',
 'Hampton City, VA',
 'Hanover, VA',
 'Henrico, VA',
 'Henry, VA',
 'Highland, VA',
 'Isle of Wight, VA',
 'James City, VA',
 'King George, VA',
 'King William, VA',
 'King and Queen, VA',
 'Lancaster, VA',
 'Lee, VA',
 'Loudoun, VA',
 'Louisa, VA',
 'Lunenb

In [80]:
#To maintain consistency between the two datasets with respect to their names, drop City from any independent cities, 
#provided that they do not share a similarly named county (Richmond and Roanoke)
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Alexandria City, VA'), 'Alexandria, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Chesapeake City, VA'), 'Chesapeake, VA', va_i['GeoName']) 
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Newport News City, VA'), 'Newport News, VA', va_i['GeoName']) 
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Norfolk City, VA'), 'Norfolk, VA', va_i['GeoName']) 
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Portsmouth City, VA'), 'Portsmouth, VA', va_i['GeoName']) 
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Suffolk City, VA'), 'Suffolk, VA', va_i['GeoName']) 
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Virginia Beach City, VA'), 'Virginia Beach, VA', va_i['GeoName'])

In [81]:
#Compare the lengths of the lists of individual counties and cities in Virginia in vote and inc
print(len(list(va_v.Area.unique())))
print()
print(len(list(va_i.GeoName.unique())))

105

105


In [82]:
#Test if any of the counties and cities are off in their spelling between vote and inc
a = list(va_v.Area.unique())
b = list(va_i.GeoName.unique())
list(set(b) - set(a))

['King and Queen, VA', 'Isle of Wight, VA', 'Hampton City, VA']

In [83]:
#Rename the counties and cities above to the same as in vote to maintain consistency
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Hampton City, VA'), 'Hampton, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'Isle of Wight, VA'), 'Isle Of Wight, VA', va_i['GeoName'])
va_i['GeoName'] = np.where((va_i['GeoName'] == 'King and Queen, VA'), 'King And Queen, VA', va_i['GeoName'])
print(list(va_i['GeoName'].unique()))

['Accomack, VA', 'Albemarle, VA', 'Alexandria, VA', 'Alleghany, VA', 'Amelia, VA', 'Amherst, VA', 'Appomattox, VA', 'Arlington, VA', 'Augusta, VA', 'Bath, VA', 'Bedford, VA', 'Bland, VA', 'Botetourt, VA', 'Brunswick, VA', 'Buchanan, VA', 'Buckingham, VA', 'Campbell, VA', 'Caroline, VA', 'Carroll, VA', 'Charles City, VA', 'Charlotte, VA', 'Chesapeake, VA', 'Chesterfield, VA', 'Clarke, VA', 'Craig, VA', 'Culpeper, VA', 'Cumberland, VA', 'Dickenson, VA', 'Dinwiddie, VA', 'Essex, VA', 'Fairfax, VA', 'Fauquier, VA', 'Floyd, VA', 'Fluvanna, VA', 'Franklin, VA', 'Frederick, VA', 'Giles, VA', 'Gloucester, VA', 'Goochland, VA', 'Grayson, VA', 'Greene, VA', 'Greensville, VA', 'Halifax, VA', 'Hampton, VA', 'Hanover, VA', 'Henrico, VA', 'Henry, VA', 'Highland, VA', 'Isle Of Wight, VA', 'James City, VA', 'King George, VA', 'King William, VA', 'King And Queen, VA', 'Lancaster, VA', 'Lee, VA', 'Loudoun, VA', 'Louisa, VA', 'Lunenburg, VA', 'Madison, VA', 'Mathews, VA', 'Mecklenburg, VA', 'Middlesex, V

In [84]:
#Drop the old Virginia data from inc and add in the new figures
inc = inc.loc[(inc.State != 'Virginia')]
inc = pd.concat([inc, va_i])
inc = inc.sort_values(by=['State','GeoName'])
inc.loc[(inc.State == 'Virginia')].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
8502,"""51001""","Accomack, VA",Personal income (thousands of dollars),Thousands of dollars,921178,955300,979101,1033169,1089135,1092338,1114647,1131424,1185059,1191128,1231057,1278779,1289594,Virginia
8503,"""51001""","Accomack, VA",Population (persons) 1/,Number of persons,36310,35835,35192,34553,33970,33415,33148,33225,33268,32969,32971,32914,32871,Virginia
8504,"""51001""","Accomack, VA",Per capita personal income (dollars) 2/,Dollars,25370,26658,27822,29901,32062,32690,33626,34053,35622,36129,37338,38852,39232,Virginia
8748,"""51901""","Albemarle, VA",Personal income (thousands of dollars),Thousands of dollars,5459814,5877106,6463224,6897809,7167152,6859572,7240429,7657164,8457472,8329280,8936944,9518529,10184984,Virginia
8749,"""51901""","Albemarle, VA",Population (persons) 1/,Number of persons,129772,132273,134918,136547,139211,141125,142665,143974,146044,147282,149250,151418,153374,Virginia


## Part 4E-3: Fixing Virginia in the GDP Data

In [85]:
#Fix the abbreviations to be capitalized all the way and remove the unnecessary punctuation from gdp. Then simplify 
#the city descriptions, and to maintain consistency with vote, drop any rows with Virginia under the GeoName column
va_g['GeoName'] = va_g['GeoName'].replace({'Va':'VA'}, regex=True)
va_g['GeoName'] = va_g['GeoName'].str.replace('*','')
va_g['GeoName'] = va_g['GeoName'].str.replace('(','')
va_g['GeoName'] = va_g['GeoName'].str.replace(')','')
va_g['GeoName'] = va_g['GeoName'].str.replace(' Independent City',' City')
va_g = va_g.loc[(va_g['GeoName'] != 'Virginia')]
va_g.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
96356,"""51001""","Accomack, VA",All industry total,Thousands of dollars,1436773,1502080,1602004,1594285,1724978,2324058,2415250,2286572,2463244,2112316,1833399,1731347,1584757,Virginia
96375,"""51001""","Accomack, VA",Administrative and support and waste manage...,Thousands of dollars,18468,25893,27436,33860,28743,(D),33230,20363,19822,18440,15473,15417,16641,Virginia
96376,"""51001""","Accomack, VA","Educational services, health care, and socia...",Thousands of dollars,37359,41514,42334,41478,48257,49905,47775,46231,49976,(D),(D),(D),(D),Virginia
96377,"""51001""","Accomack, VA",Educational services,Thousands of dollars,(D),(D),(D),704,(D),611,(D),(D),(D),(D),(D),(D),(D),Virginia
96378,"""51001""","Accomack, VA",Health care and social assistance,Thousands of dollars,(D),(D),(D),40775,(D),49295,(D),(D),(D),(D),(D),(D),(D),Virginia


In [86]:
#For counties that list cities next to them, for simplicity purposes drop the cities from their descriptions, as their
#figures have already been incorporated
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Albemarle + Charlottesville, VA'),
                           'Albemarle, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Alleghany + Covington, VA'),
                           'Alleghany, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Augusta, Staunton + Waynesboro, VA'),
                           'Augusta, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Campbell + Lynchburg, VA'),
                           'Campbell, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Carroll + Galax, VA'),
                           'Carroll, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Dinwiddie, Colonial Heights + Petersburg, VA'),
                           'Dinwiddie, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Fairfax, Fairfax City + Falls Church, VA'),
                           'Fairfax, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Frederick + Winchester, VA'),
                           'Frederick, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Greensville + Emporia, VA'),
                           'Greensville, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Henry + Martinsville, VA'),
                           'Henry, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'James City + Williamsburg, VA'),
                           'James City, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Montgomery + Radford, VA'),
                           'Montgomery, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Pittsylvania + Danville, VA'),
                           'Pittsylvania, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Prince George + Hopewell, VA'),
                           'Prince George, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Prince William, Manassas + Manassas Park, VA'), 
                           'Prince William, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Roanoke + Salem, VA'),
                           'Roanoke, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Rockbridge, Buena Vista + Lexington, VA'),
                           'Rockbridge, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Rockingham + Harrisonburg, VA'),
                           'Rockingham, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Southampton + Franklin, VA'),
                           'Southampton, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Spotsylvania + Fredericksburg, VA'),
                           'Spotsylvania, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Washington + Bristol, VA'),
                           'Washington, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Wise + Norton, VA'),
                           'Wise, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'York + Poquoson, VA'),
                           'York, VA', va_g['GeoName'])
va_g.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
96356,"""51001""","Accomack, VA",All industry total,Thousands of dollars,1436773,1502080,1602004,1594285,1724978,2324058,2415250,2286572,2463244,2112316,1833399,1731347,1584757,Virginia
96375,"""51001""","Accomack, VA",Administrative and support and waste manage...,Thousands of dollars,18468,25893,27436,33860,28743,(D),33230,20363,19822,18440,15473,15417,16641,Virginia
96376,"""51001""","Accomack, VA","Educational services, health care, and socia...",Thousands of dollars,37359,41514,42334,41478,48257,49905,47775,46231,49976,(D),(D),(D),(D),Virginia
96377,"""51001""","Accomack, VA",Educational services,Thousands of dollars,(D),(D),(D),704,(D),611,(D),(D),(D),(D),(D),(D),(D),Virginia
96378,"""51001""","Accomack, VA",Health care and social assistance,Thousands of dollars,(D),(D),(D),40775,(D),49295,(D),(D),(D),(D),(D),(D),(D),Virginia


In [87]:
#Compare the lengths of the lists of counties, independent cities, and combinations for Virginia in vote and gdp
print(len(list(va_v.Area.unique())))
print()
print(len(list(va_g.GeoName.unique())))

105

105


In [88]:
#To maintain consistency between the two datasets with respect to their names, drop City from any independent cities, 
#provided that they do not share a similarly named county (Richmond and Roanoke)
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Alexandria City, VA'), 'Alexandria, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Chesapeake City, VA'), 'Chesapeake, VA', va_g['GeoName']) 
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Newport News City, VA'), 'Newport News, VA', va_g['GeoName']) 
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Norfolk City, VA'), 'Norfolk, VA', va_g['GeoName']) 
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Portsmouth City, VA'), 'Portsmouth, VA', va_g['GeoName']) 
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Suffolk City, VA'), 'Suffolk, VA', va_g['GeoName']) 
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Virginia Beach City, VA'), 'Virginia Beach, VA', va_g['GeoName'])

In [89]:
#Test if any of the counties and cities are off in their spelling between vote and gdp
a = list(va_v.Area.unique())
b = list(va_g.GeoName.unique())
list(set(b) - set(a))

['King and Queen, VA', 'Isle of Wight, VA', 'Hampton City, VA']

In [90]:
#Rename the counties and cities above to the same as in vote to maintain consistency
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Hampton City, VA'), 'Hampton, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'Isle of Wight, VA'), 'Isle Of Wight, VA', va_g['GeoName'])
va_g['GeoName'] = np.where((va_g['GeoName'] == 'King and Queen, VA'), 'King And Queen, VA', va_g['GeoName'])

In [91]:
#Drop the old Virginia data from gdp and add in the new figures
gdp = gdp.loc[(gdp.State != 'Virginia')]
gdp = pd.concat([gdp, va_g])
gdp = gdp.sort_values(by=['State','GeoName'])
gdp.loc[(gdp.State == 'Virginia')].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
96356,"""51001""","Accomack, VA",All industry total,Thousands of dollars,1436773,1502080,1602004,1594285,1724978,2324058,2415250,2286572,2463244,2112316,1833399,1731347,1584757,Virginia
96375,"""51001""","Accomack, VA",Administrative and support and waste manage...,Thousands of dollars,18468,25893,27436,33860,28743,(D),33230,20363,19822,18440,15473,15417,16641,Virginia
96376,"""51001""","Accomack, VA","Educational services, health care, and socia...",Thousands of dollars,37359,41514,42334,41478,48257,49905,47775,46231,49976,(D),(D),(D),(D),Virginia
96377,"""51001""","Accomack, VA",Educational services,Thousands of dollars,(D),(D),(D),704,(D),611,(D),(D),(D),(D),(D),(D),(D),Virginia
96378,"""51001""","Accomack, VA",Health care and social assistance,Thousands of dollars,(D),(D),(D),40775,(D),49295,(D),(D),(D),(D),(D),(D),(D),Virginia


## Part 4F: Fixing Maryland 

In [92]:
#Slice Maryland from the datasets
md_i = inc.loc[(inc.State == 'Maryland')]
md_i = md_i.sort_values(by=['GeoName'])
print(list(md_i.GeoName.unique()))
print()
md_g = gdp.loc[(gdp.State == 'Maryland')]
md_g = md_g.sort_values(by=['GeoName'])
print(list(md_g.GeoName.unique()))
print()
md_v = vote.loc[(vote.State == 'Maryland')]
md_v = md_v.sort_values(by=['Area','Year'])
print(list(md_v.Area.unique()))

['Allegany, MD', 'Anne Arundel, MD', 'Baltimore (Independent City), MD', 'Baltimore, MD', 'Calvert, MD', 'Caroline, MD', 'Carroll, MD', 'Cecil, MD', 'Charles, MD', 'Dorchester, MD', 'Frederick, MD', 'Garrett, MD', 'Harford, MD', 'Howard, MD', 'Kent, MD', 'Maryland', 'Montgomery, MD', "Prince George's, MD", "Queen Anne's, MD", 'Somerset, MD', "St. Mary's, MD", 'Talbot, MD', 'Washington, MD', 'Wicomico, MD', 'Worcester, MD']

['Allegany, MD', 'Anne Arundel, MD', 'Baltimore (Independent City), MD', 'Baltimore, MD', 'Calvert, MD', 'Caroline, MD', 'Carroll, MD', 'Cecil, MD', 'Charles, MD', 'Dorchester, MD', 'Frederick, MD', 'Garrett, MD', 'Harford, MD', 'Howard, MD', 'Kent, MD', 'Maryland', 'Montgomery, MD', "Prince George's, MD", "Queen Anne's, MD", 'Somerset, MD', "St. Mary's, MD", 'Talbot, MD', 'Washington, MD', 'Wicomico, MD', 'Worcester, MD']

['Allegany, MD', 'Anne Arundel, MD', 'Baltimore City, MD', 'Baltimore, MD', 'Calvert, MD', 'Caroline, MD', 'Carroll, MD', 'Cecil, MD', 'Charles,

In [93]:
#Fix the abbreviations and simplify the city description for Baltimore.  To maintain consistency with vote, drop any 
#rows with Maryland under the GeoName column
md_i['GeoName'] = md_i['GeoName'].replace({'Md':'MD'}, regex=True)
md_i['GeoName'] = np.where((md_i['GeoName'] == 'Baltimore (Independent City), MD'), 
                           'Baltimore City, MD', md_i['GeoName'])
md_i = md_i.loc[(md_i['GeoName'] != 'Maryland')]
md_i.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
3543,"""24001""","Allegany, MD",Personal income (thousands of dollars),Thousands of dollars,2175434,2101765,2045184,2074941,2233858,2306560,2398887,2479759,2509845,2538636,2614150,2677429,2746585,Maryland
3544,"""24001""","Allegany, MD",Population (persons) 1/,Number of persons,74408,73979,73980,74449,74638,75101,74965,74572,73947,73576,73020,72462,72053,Maryland
3545,"""24001""","Allegany, MD",Per capita personal income (dollars) 2/,Dollars,29237,28410,27645,27871,29929,30713,32000,33253,33941,34504,35800,36949,38119,Maryland
3546,"""24003""","Anne Arundel, MD",Personal income (thousands of dollars),Thousands of dollars,23501078,24673103,26100019,27136400,27952098,27643648,28572128,30407771,31467873,31823273,33131681,34594658,35658932,Maryland
3547,"""24003""","Anne Arundel, MD",Population (persons) 1/,Number of persons,513259,516171,517698,520503,525304,532395,539284,544803,550333,555417,559142,563027,567254,Maryland


In [94]:
#Seeing how some counties have apostrophes, this and the next cell are preliminary checks to see if other states have 
#similar counties
vote.loc[(vote['Area'].str.contains("'s") | (vote['Area'].str.contains("'S")))]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent


In [95]:
inc.loc[(inc['GeoName'].str.contains("'s") | (inc['GeoName'].str.contains("'S")))]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
3588,"""24033""","Prince George's, MD",Personal income (thousands of dollars),Thousands of dollars,29243042,30504376,31554188,32534016,33733444,34633481,35447497,37153669,37781264,38013099,39241009,40564696,41744817,Maryland
3589,"""24033""","Prince George's, MD",Population (persons) 1/,Number of persons,845950,853271,852097,849916,850167,856161,866419,874389,882490,890658,899686,906197,908501,Maryland
3590,"""24033""","Prince George's, MD",Per capita personal income (dollars) 2/,Dollars,34568,35750,37031,38279,39679,40452,40913,42491,42812,42680,43616,44764,45949,Maryland
3591,"""24035""","Queen Anne's, MD",Personal income (thousands of dollars),Thousands of dollars,1866460,1939029,2056016,2178675,2308583,2289687,2372077,2510475,2600589,2606200,2706621,2811726,2948304,Maryland
3592,"""24035""","Queen Anne's, MD",Population (persons) 1/,Number of persons,44411,44879,45716,46517,47063,47532,47806,48262,48497,48517,48782,48984,49063,Maryland
3593,"""24035""","Queen Anne's, MD",Per capita personal income (dollars) 2/,Dollars,42027,43206,44974,46836,49053,48171,49619,52018,53624,53717,55484,57401,60092,Maryland
3594,"""24037""","St. Mary's, MD",Personal income (thousands of dollars),Thousands of dollars,3427720,3661019,3927946,4207038,4484205,4684281,4961849,5266435,5364105,5372070,5559929,5798401,5971637,Maryland
3595,"""24037""","St. Mary's, MD",Population (persons) 1/,Number of persons,94900,96871,98849,100599,101921,103273,105761,107566,108800,109224,109913,110985,111750,Maryland
3596,"""24037""","St. Mary's, MD",Per capita personal income (dollars) 2/,Dollars,36119,37793,39737,41820,43997,45358,46916,48960,49302,49184,50585,52245,53437,Maryland


In [96]:
#The counties with apostrophes are written messily in inc, so these should be fixed up 
md_i['GeoName'] = np.where((md_i['GeoName'] == "Prince George'S, MD"), "Prince George's, MD", md_i['GeoName'])
md_i['GeoName'] = np.where((md_i['GeoName'] == "Queen Anne'S, MD"), "Queen Anne's, MD", md_i['GeoName'])
md_i['GeoName'] = np.where((md_i['GeoName'] == "St. Mary'S, MD"), "St. Mary's, MD", md_i['GeoName'])
md_i.loc[(md_i['GeoName'].str.contains("'s"))].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
3588,"""24033""","Prince George's, MD",Personal income (thousands of dollars),Thousands of dollars,29243042,30504376,31554188,32534016,33733444,34633481,35447497,37153669,37781264,38013099,39241009,40564696,41744817,Maryland
3589,"""24033""","Prince George's, MD",Population (persons) 1/,Number of persons,845950,853271,852097,849916,850167,856161,866419,874389,882490,890658,899686,906197,908501,Maryland
3590,"""24033""","Prince George's, MD",Per capita personal income (dollars) 2/,Dollars,34568,35750,37031,38279,39679,40452,40913,42491,42812,42680,43616,44764,45949,Maryland
3592,"""24035""","Queen Anne's, MD",Population (persons) 1/,Number of persons,44411,44879,45716,46517,47063,47532,47806,48262,48497,48517,48782,48984,49063,Maryland
3593,"""24035""","Queen Anne's, MD",Per capita personal income (dollars) 2/,Dollars,42027,43206,44974,46836,49053,48171,49619,52018,53624,53717,55484,57401,60092,Maryland


In [97]:
#Rename the same counties in vote to maintain consistency with inc
md_v['Area'] = np.where((md_v['Area'] == "Prince Georges, MD"), "Prince George's, MD", md_v['Area'])
md_v['Area'] = np.where((md_v['Area'] == "Queen Annes, MD"), "Queen Anne's, MD", md_v['Area'])
md_v['Area'] = np.where((md_v['Area'] == "St. Marys, MD"), "St. Mary's, MD", md_v['Area'])
md_v.loc[(md_v['Area'].str.contains("'s"))].head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
1180,2004,Maryland,"Prince George's, MD",318474,55532,"Bush, George W.",260532,"Kerry, John F.",2410,205000,D,17.44,81.81,0.76
4295,2008,Maryland,"Prince George's, MD",374026,38833,"McCain, John S. III",332396,"Obama, Barack H.",2797,293563,D,10.38,88.87,0.75
7410,2012,Maryland,"Prince George's, MD",387744,35734,"Romney, W. Mitt",347938,"Obama, Barack H.",4072,312204,D,9.22,89.73,1.05
10526,2016,Maryland,"Prince George's, MD",390385,32811,"Trump, Donald J.",344049,"Clinton, Hillary Rodham",13525,311238,D,8.4,88.13,3.47
1181,2004,Maryland,"Queen Anne's, MD",21794,14489,"Bush, George W.",7070,"Kerry, John F.",235,7419,R,66.48,32.44,1.08


In [98]:
#Fix the abbreviations and simplify the city description for Baltimore.  To maintain consistency with vote, drop any 
#rows with Maryland under the GeoName column
md_g['GeoName'] = md_g['GeoName'].replace({'Md':'MD'}, regex=True)
md_g['GeoName'] = np.where((md_g['GeoName'] == 'Baltimore (Independent City), MD'), 
                           'Baltimore City, MD', md_g['GeoName'])
md_g = md_g.loc[(md_g['GeoName'] != 'Maryland')]
md_g.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
40154,"""24001""","Allegany, MD",All industry total,Thousands of dollars,2153585,2244656,2455319,2540482,2550300,2598093,2795389,2848760,2824789,2877716,2910998,3037113,3132106,Maryland
40173,"""24001""","Allegany, MD",Administrative and support and waste manage...,Thousands of dollars,41014,46259,55232,61204,57172,60717,60436,61470,59358,57526,58991,59057,55627,Maryland
40174,"""24001""","Allegany, MD","Educational services, health care, and socia...",Thousands of dollars,306130,323068,343147,345725,364765,387870,409398,414966,415656,407522,415366,431148,451621,Maryland
40175,"""24001""","Allegany, MD",Educational services,Thousands of dollars,12971,12899,13581,14647,15852,18583,21168,22493,22684,20481,20248,17877,18166,Maryland
40176,"""24001""","Allegany, MD",Health care and social assistance,Thousands of dollars,293159,310169,329566,331078,348914,369287,388231,392473,392972,387041,395118,413271,433455,Maryland


In [99]:
#The counties with apostrophes are written messily in gdp, so these should be fixed up 
md_g['GeoName'] = np.where((md_g['GeoName'] == "Prince George'S, MD"), "Prince George's, MD", md_g['GeoName'])
md_g['GeoName'] = np.where((md_g['GeoName'] == "Queen Anne'S, MD"), "Queen Anne's, MD", md_g['GeoName'])
md_g['GeoName'] = np.where((md_g['GeoName'] == "St. Mary'S, MD"), "St. Mary's, MD", md_g['GeoName'])
md_g.loc[(md_g['GeoName'].str.contains("'s"))].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
40682,"""24033""","Prince George's, MD",Management of companies and enterprises,Thousands of dollars,252038,268839,259890,249241,271718,238750,237569,186155,186980,213351,221904,207728,254578,Maryland
40683,"""24033""","Prince George's, MD",Administrative and support and waste manage...,Thousands of dollars,962713,1031024,1085907,1230512,1228309,1173794,1199537,1195707,1177881,1147464,1182212,1233504,1272411,Maryland
40684,"""24033""","Prince George's, MD","Educational services, health care, and socia...",Thousands of dollars,1560806,1613269,1724914,1787189,1928372,2081121,2108233,2185557,2315260,2387077,2532538,2638826,2789388,Maryland
40685,"""24033""","Prince George's, MD",Educational services,Thousands of dollars,190959,211076,225241,237563,254101,285400,273643,286579,313368,326924,361684,379115,409465,Maryland
40686,"""24033""","Prince George's, MD",Health care and social assistance,Thousands of dollars,1369847,1402193,1499673,1549625,1674271,1795721,1834591,1898979,2001892,2060153,2170854,2259711,2379923,Maryland


In [100]:
print(list(md_i.GeoName.unique()))
print()
print(list(md_v.Area.unique()))
print()
print(list(md_g.GeoName.unique()))

['Allegany, MD', 'Anne Arundel, MD', 'Baltimore City, MD', 'Baltimore, MD', 'Calvert, MD', 'Caroline, MD', 'Carroll, MD', 'Cecil, MD', 'Charles, MD', 'Dorchester, MD', 'Frederick, MD', 'Garrett, MD', 'Harford, MD', 'Howard, MD', 'Kent, MD', 'Montgomery, MD', "Prince George's, MD", "Queen Anne's, MD", 'Somerset, MD', "St. Mary's, MD", 'Talbot, MD', 'Washington, MD', 'Wicomico, MD', 'Worcester, MD']

['Allegany, MD', 'Anne Arundel, MD', 'Baltimore City, MD', 'Baltimore, MD', 'Calvert, MD', 'Caroline, MD', 'Carroll, MD', 'Cecil, MD', 'Charles, MD', 'Dorchester, MD', 'Frederick, MD', 'Garrett, MD', 'Harford, MD', 'Howard, MD', 'Kent, MD', 'Montgomery, MD', "Prince George's, MD", "Queen Anne's, MD", 'Somerset, MD', "St. Mary's, MD", 'Talbot, MD', 'Washington, MD', 'Wicomico, MD', 'Worcester, MD']

['Allegany, MD', 'Anne Arundel, MD', 'Baltimore City, MD', 'Baltimore, MD', 'Calvert, MD', 'Caroline, MD', 'Carroll, MD', 'Cecil, MD', 'Charles, MD', 'Dorchester, MD', 'Frederick, MD', 'Garrett, M

In [101]:
#Drop the old Maryland data from vote and add in the new figures
vote = vote.loc[(vote.State != 'Maryland')]
vote = pd.concat([vote, md_v])
vote = vote.sort_values(by=['Year','State','Area'])
vote.loc[(vote.State == 'Maryland')].head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
1164,2004,Maryland,"Allegany, MD",29855,18980,"Bush, George W.",10576,"Kerry, John F.",299,8404,R,63.57,35.42,1.0
1165,2004,Maryland,"Anne Arundel, MD",239667,133231,"Bush, George W.",103324,"Kerry, John F.",3112,29907,R,55.59,43.11,1.3
1167,2004,Maryland,"Baltimore City, MD",213563,36230,"Bush, George W.",175022,"Kerry, John F.",2311,138792,D,16.96,81.95,1.08
1166,2004,Maryland,"Baltimore, MD",353479,166051,"Bush, George W.",182474,"Kerry, John F.",4954,16423,D,46.98,51.62,1.4
1168,2004,Maryland,"Calvert, MD",39351,23017,"Bush, George W.",15967,"Kerry, John F.",367,7050,R,58.49,40.58,0.93


In [102]:
#Drop the old Maryland data from inc and add in the new figures
inc = inc.loc[(inc.State != 'Maryland')]
inc = pd.concat([inc, md_i])
inc = inc.sort_values(by=['State','GeoName'])
inc.loc[(inc.State == 'Maryland')].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
3543,"""24001""","Allegany, MD",Personal income (thousands of dollars),Thousands of dollars,2175434,2101765,2045184,2074941,2233858,2306560,2398887,2479759,2509845,2538636,2614150,2677429,2746585,Maryland
3544,"""24001""","Allegany, MD",Population (persons) 1/,Number of persons,74408,73979,73980,74449,74638,75101,74965,74572,73947,73576,73020,72462,72053,Maryland
3545,"""24001""","Allegany, MD",Per capita personal income (dollars) 2/,Dollars,29237,28410,27645,27871,29929,30713,32000,33253,33941,34504,35800,36949,38119,Maryland
3546,"""24003""","Anne Arundel, MD",Personal income (thousands of dollars),Thousands of dollars,23501078,24673103,26100019,27136400,27952098,27643648,28572128,30407771,31467873,31823273,33131681,34594658,35658932,Maryland
3547,"""24003""","Anne Arundel, MD",Population (persons) 1/,Number of persons,513259,516171,517698,520503,525304,532395,539284,544803,550333,555417,559142,563027,567254,Maryland


In [103]:
#Drop the old Maryland data from gdp and add in the new figures
gdp = gdp.loc[(gdp.State != 'Maryland')]
gdp = pd.concat([gdp, md_g])
gdp = gdp.sort_values(by=['State','GeoName'])
gdp.loc[(gdp.State == 'Maryland')].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
40154,"""24001""","Allegany, MD",All industry total,Thousands of dollars,2153585,2244656,2455319,2540482,2550300,2598093,2795389,2848760,2824789,2877716,2910998,3037113,3132106,Maryland
40173,"""24001""","Allegany, MD",Administrative and support and waste manage...,Thousands of dollars,41014,46259,55232,61204,57172,60717,60436,61470,59358,57526,58991,59057,55627,Maryland
40174,"""24001""","Allegany, MD","Educational services, health care, and socia...",Thousands of dollars,306130,323068,343147,345725,364765,387870,409398,414966,415656,407522,415366,431148,451621,Maryland
40175,"""24001""","Allegany, MD",Educational services,Thousands of dollars,12971,12899,13581,14647,15852,18583,21168,22493,22684,20481,20248,17877,18166,Maryland
40176,"""24001""","Allegany, MD",Health care and social assistance,Thousands of dollars,293159,310169,329566,331078,348914,369287,388231,392473,392972,387041,395118,413271,433455,Maryland


## Part 4G: Fixing Nevada 

In [104]:
#Slice Nevada from the datasets
nv_i = inc.loc[(inc.State == 'Nevada')]
nv_i = nv_i.sort_values(by=['GeoName'])
print(list(nv_i.GeoName.unique()))
print()
nv_g = gdp.loc[(gdp.State == 'Nevada')]
nv_g = nv_g.sort_values(by=['GeoName'])
print(list(nv_g.GeoName.unique()))
print()
nv_v = vote.loc[(vote.State == 'Nevada')]
nv_v = nv_v.sort_values(by=['Area','Year'])
print(list(nv_v.Area.unique()))

['Carson City (Independent City), NV', 'Churchill, NV', 'Clark, NV', 'Douglas, NV', 'Elko, NV', 'Esmeralda, NV', 'Eureka, NV', 'Humboldt, NV', 'Lander, NV', 'Lincoln, NV', 'Lyon, NV', 'Mineral, NV', 'Nevada', 'Nye, NV', 'Pershing, NV', 'Storey, NV', 'Washoe, NV', 'White Pine, NV']

['Carson City (Independent City), NV', 'Churchill, NV', 'Clark, NV', 'Douglas, NV', 'Elko, NV', 'Esmeralda, NV', 'Eureka, NV', 'Humboldt, NV', 'Lander, NV', 'Lincoln, NV', 'Lyon, NV', 'Mineral, NV', 'Nevada', 'Nye, NV', 'Pershing, NV', 'Storey, NV', 'Washoe, NV', 'White Pine, NV']

['Carson City, NV', 'Churchill, NV', 'Clark, NV', 'Douglas, NV', 'Elko, NV', 'Esmeralda, NV', 'Eureka, NV', 'Humboldt, NV', 'Lander, NV', 'Lincoln, NV', 'Lyon, NV', 'Mineral, NV', 'Nye, NV', 'Pershing, NV', 'Storey, NV', 'Washoe, NV', 'White Pine, NV']


In [105]:
#Fix the abbreviations and simplify the city description for Carson City.  To maintain consistency with vote, drop 
#any rows with Nevada under the GeoName column
nv_i['GeoName'] = nv_i['GeoName'].replace({'Nv':'NV'}, regex=True)
nv_i['GeoName'] = np.where((nv_i['GeoName'] == 'Carson City (Independent City), NV'), 
                           'Carson City, NV', nv_i['GeoName'])
nv_i = nv_i.loc[(nv_i['GeoName'] != 'Nevada')]
nv_i.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
5277,"""32510""","Carson City, NV",Personal income (thousands of dollars),Thousands of dollars,1175447,1374248,1691496,1623157,1659389,2273150,2431986,2353165,2143871,2191065,2298527,2460841,2478381,Nevada
5278,"""32510""","Carson City, NV",Population (persons) 1/,Number of persons,55995,55982,55410,55288,55552,55401,54981,54676,54312,53726,54082,54068,54147,Nevada
5279,"""32510""","Carson City, NV",Per capita personal income (dollars) 2/,Dollars,20992,24548,30527,29358,29871,41031,44233,43038,39473,40782,42501,45514,45771,Nevada
5229,"""32001""","Churchill, NV",Personal income (thousands of dollars),Thousands of dollars,710639,757744,771769,784661,809594,791472,828714,864902,859930,870490,919979,957762,964215,Nevada
5230,"""32001""","Churchill, NV",Population (persons) 1/,Number of persons,24226,24593,24842,24961,25049,25067,24818,24593,24245,23927,23824,23957,23904,Nevada


In [106]:
#Fix the abbreviations and simplify the city description for Carson City.  To maintain consistency with vote, drop 
#any rows with Nevada under the GeoName column
nv_g['GeoName'] = nv_g['GeoName'].replace({'Nv':'NV'}, regex=True)
nv_g['GeoName'] = np.where((nv_g['GeoName'] == 'Carson City (Independent City), NV'), 
                           'Carson City, NV', nv_g['GeoName'])
nv_g = nv_g.loc[(nv_g['GeoName'] != 'Nevada')]
nv_g.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
59806,"""32510""","Carson City, NV",All industry total,Thousands of dollars,2565152,2726707,2896381,3003026,2997110,3059712,3221659,3034855,3090326,3157090,3024832,3169718,3208608,Nevada
59825,"""32510""","Carson City, NV",Administrative and support and waste manage...,Thousands of dollars,56780,61051,56211,54317,52588,48556,53820,56662,59641,61185,64313,62542,60584,Nevada
59826,"""32510""","Carson City, NV","Educational services, health care, and socia...",Thousands of dollars,156788,168350,183046,211584,315247,334454,386721,429817,445951,423379,419358,379634,378301,Nevada
59827,"""32510""","Carson City, NV",Educational services,Thousands of dollars,3480,3945,17114,33068,46716,55620,91875,118557,131407,124081,104250,51092,26123,Nevada
59828,"""32510""","Carson City, NV",Health care and social assistance,Thousands of dollars,153308,164405,165932,178515,268531,278834,294845,311259,314544,299298,315108,328542,352177,Nevada


In [107]:
print(list(nv_i.GeoName.unique()))
print()
print(list(nv_v.Area.unique()))
print()
print(list(nv_g.GeoName.unique()))

['Carson City, NV', 'Churchill, NV', 'Clark, NV', 'Douglas, NV', 'Elko, NV', 'Esmeralda, NV', 'Eureka, NV', 'Humboldt, NV', 'Lander, NV', 'Lincoln, NV', 'Lyon, NV', 'Mineral, NV', 'Nye, NV', 'Pershing, NV', 'Storey, NV', 'Washoe, NV', 'White Pine, NV']

['Carson City, NV', 'Churchill, NV', 'Clark, NV', 'Douglas, NV', 'Elko, NV', 'Esmeralda, NV', 'Eureka, NV', 'Humboldt, NV', 'Lander, NV', 'Lincoln, NV', 'Lyon, NV', 'Mineral, NV', 'Nye, NV', 'Pershing, NV', 'Storey, NV', 'Washoe, NV', 'White Pine, NV']

['Carson City, NV', 'Churchill, NV', 'Clark, NV', 'Douglas, NV', 'Elko, NV', 'Esmeralda, NV', 'Eureka, NV', 'Humboldt, NV', 'Lander, NV', 'Lincoln, NV', 'Lyon, NV', 'Mineral, NV', 'Nye, NV', 'Pershing, NV', 'Storey, NV', 'Washoe, NV', 'White Pine, NV']


In [108]:
#Drop the old Nevada data from inc and add in the new figures
inc = inc.loc[(inc.State != 'Nevada')]
inc = pd.concat([inc, nv_i])
inc = inc.sort_values(by=['State','GeoName'])
inc.loc[(inc.State == 'Nevada')].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
5277,"""32510""","Carson City, NV",Personal income (thousands of dollars),Thousands of dollars,1175447,1374248,1691496,1623157,1659389,2273150,2431986,2353165,2143871,2191065,2298527,2460841,2478381,Nevada
5278,"""32510""","Carson City, NV",Population (persons) 1/,Number of persons,55995,55982,55410,55288,55552,55401,54981,54676,54312,53726,54082,54068,54147,Nevada
5279,"""32510""","Carson City, NV",Per capita personal income (dollars) 2/,Dollars,20992,24548,30527,29358,29871,41031,44233,43038,39473,40782,42501,45514,45771,Nevada
5229,"""32001""","Churchill, NV",Personal income (thousands of dollars),Thousands of dollars,710639,757744,771769,784661,809594,791472,828714,864902,859930,870490,919979,957762,964215,Nevada
5230,"""32001""","Churchill, NV",Population (persons) 1/,Number of persons,24226,24593,24842,24961,25049,25067,24818,24593,24245,23927,23824,23957,23904,Nevada


In [109]:
#Drop the old Nevada data from gdp and add in the new figures
gdp = gdp.loc[(gdp.State != 'Nevada')]
gdp = pd.concat([gdp, nv_g])
gdp = gdp.sort_values(by=['State','GeoName'])
gdp.loc[(gdp.State == 'Nevada')].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
59806,"""32510""","Carson City, NV",All industry total,Thousands of dollars,2565152,2726707,2896381,3003026,2997110,3059712,3221659,3034855,3090326,3157090,3024832,3169718,3208608,Nevada
59825,"""32510""","Carson City, NV",Administrative and support and waste manage...,Thousands of dollars,56780,61051,56211,54317,52588,48556,53820,56662,59641,61185,64313,62542,60584,Nevada
59826,"""32510""","Carson City, NV","Educational services, health care, and socia...",Thousands of dollars,156788,168350,183046,211584,315247,334454,386721,429817,445951,423379,419358,379634,378301,Nevada
59827,"""32510""","Carson City, NV",Educational services,Thousands of dollars,3480,3945,17114,33068,46716,55620,91875,118557,131407,124081,104250,51092,26123,Nevada
59828,"""32510""","Carson City, NV",Health care and social assistance,Thousands of dollars,153308,164405,165932,178515,268531,278834,294845,311259,314544,299298,315108,328542,352177,Nevada


## Part 4H: Fixing Missouri

In [110]:
#Slice Missouri from the datasets
mo_i = inc.loc[(inc.State == 'Missouri')]
mo_i = mo_i.sort_values(by=['GeoName'])
print(list(mo_i.GeoName.unique()))
print()
mo_g = gdp.loc[(gdp.State == 'Missouri')]
mo_g = mo_g.sort_values(by=['GeoName'])
print(list(mo_g.GeoName.unique()))
print()
mo_v = vote.loc[(vote.State == 'Missouri')]
mo_v = mo_v.sort_values(by=['Area','Year'])
print(list(mo_v.Area.unique()))

['Adair, MO', 'Andrew, MO', 'Atchison, MO', 'Audrain, MO', 'Barry, MO', 'Barton, MO', 'Bates, MO', 'Benton, MO', 'Bollinger, MO', 'Boone, MO', 'Buchanan, MO', 'Butler, MO', 'Caldwell, MO', 'Callaway, MO', 'Camden, MO', 'Cape Girardeau, MO', 'Carroll, MO', 'Carter, MO', 'Cass, MO', 'Cedar, MO', 'Chariton, MO', 'Christian, MO', 'Clark, MO', 'Clay, MO', 'Clinton, MO', 'Cole, MO', 'Cooper, MO', 'Crawford, MO', 'Dade, MO', 'Dallas, MO', 'Daviess, MO', 'DeKalb, MO', 'Dent, MO', 'Douglas, MO', 'Dunklin, MO', 'Franklin, MO', 'Gasconade, MO', 'Gentry, MO', 'Greene, MO', 'Grundy, MO', 'Harrison, MO', 'Henry, MO', 'Hickory, MO', 'Holt, MO', 'Howard, MO', 'Howell, MO', 'Iron, MO', 'Jackson, MO', 'Jasper, MO', 'Jefferson, MO', 'Johnson, MO', 'Knox, MO', 'Laclede, MO', 'Lafayette, MO', 'Lawrence, MO', 'Lewis, MO', 'Lincoln, MO', 'Linn, MO', 'Livingston, MO', 'Macon, MO', 'Madison, MO', 'Maries, MO', 'Marion, MO', 'McDonald, MO', 'Mercer, MO', 'Miller, MO', 'Mississippi, MO', 'Missouri', 'Moniteau, M

In [111]:
#Fix the abbreviations and simplify the city description for St. Louis City.  To maintain consistency with vote, 
#drop any rows with Missouri under the GeoName column
mo_i['GeoName'] = mo_i['GeoName'].replace({'Mo':'MO'}, regex=True)
mo_i['GeoName'] = np.where((mo_i['GeoName'] == 'St. Louis (Independent City), MO'), 
                           'St. Louis City, MO', mo_i['GeoName'])
mo_i = mo_i.loc[(mo_i['GeoName'] != 'Missouri')]
mo_i.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
4428,"""29001""","Adair, MO",Personal income (thousands of dollars),Thousands of dollars,559348,576504,603828,632552,674714,711589,696616,730848,737002,753678,771041,796107,799495,Missouri
4429,"""29001""","Adair, MO",Population (persons) 1/,Number of persons,24988,24849,25007,24983,25399,25577,25625,25657,25675,25703,25521,25353,25244,Missouri
4430,"""29001""","Adair, MO",Per capita personal income (dollars) 2/,Dollars,22385,23200,24146,25319,26565,27821,27185,28485,28705,29323,30212,31401,31671,Missouri
4431,"""29003""","Andrew, MO",Personal income (thousands of dollars),Thousands of dollars,471079,478482,516393,559228,612255,615875,615615,645153,660175,683090,701118,711139,728983,Missouri
4432,"""29003""","Andrew, MO",Population (persons) 1/,Number of persons,16580,16511,16861,16834,16924,17036,17344,17199,17311,17305,17282,17314,17351,Missouri


In [112]:
#Compare the lengths of the lists of counties and independent cities for Missouri in mo_i and mo_v
print(len(list(mo_i.GeoName.unique())))
print()
print(len(list(mo_v.Area.unique())))

115

116


In [113]:
#Slice Jackson County starting and Kansas City from mo_v
jackson = mo_v.loc[(mo_v.Area == 'Jackson, MO')]
kansas_c = mo_v.loc[(mo_v.Area == 'Kansas City, MO')]
jackson

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
1501,2004,Missouri,"Jackson, MO",174570,94439,"Bush, George W.",79029,"Kerry, John F.",1102,15410,R,54.1,45.27,0.63
4616,2008,Missouri,"Jackson, MO",186047,92833,"McCain, John S. III",90722,"Obama, Barack H.",2492,2111,R,49.9,48.76,1.34
7731,2012,Missouri,"Jackson, MO",174764,93199,"Romney, W. Mitt",78283,"Obama, Barack H.",3282,14916,R,53.33,44.79,1.88
10847,2016,Missouri,"Jackson, MO",173275,91557,"Trump, Donald J.",71237,"Clinton, Hillary Rodham",10481,20320,R,52.84,41.11,6.05


In [114]:
kansas_c

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
1505,2004,Missouri,"Kansas City, MO",141423,36061,"Bush, George W.",104625,"Kerry, John F.",737,68564,D,25.5,73.98,0.52
4620,2008,Missouri,"Kansas City, MO",153219,31854,"McCain, John S. III",120102,"Obama, Barack H.",1263,88248,D,20.79,78.39,0.82
7735,2012,Missouri,"Kansas City, MO",136813,29509,"Romney, W. Mitt",105670,"Obama, Barack H.",1634,76161,D,21.57,77.24,1.19
10851,2016,Missouri,"Kansas City, MO",128601,24654,"Trump, Donald J.",97735,"Clinton, Hillary Rodham",6212,73081,D,19.17,76.0,4.83


In [115]:
#Similarly as with Virginia, for the purposes of convenience to avoid dropping votes, append Kansas City's data to
#Jackson County's under the latter's name
jackson['TotalVotes'] += kansas_c['TotalVotes'].values
jackson['RepVotes'] += kansas_c['RepVotes'].values
jackson['DemVotes'] += kansas_c['DemVotes'].values
jackson['ThirdVotes'] += kansas_c['ThirdVotes'].values
jackson['PluralityVotes'] = abs(jackson['RepVotes'] - jackson['DemVotes'])
jackson['RepVotesTotalPercent'] = (jackson['RepVotes']/jackson['TotalVotes']*100).round(2)
jackson['DemVotesTotalPercent'] = (jackson['DemVotes']/jackson['TotalVotes']*100).round(2)
jackson['ThirdVotesTotalPercent'] = (jackson['ThirdVotes']/jackson['TotalVotes']*100).round(2)
jackson

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
1501,2004,Missouri,"Jackson, MO",315993,130500,"Bush, George W.",183654,"Kerry, John F.",1839,53154,R,41.3,58.12,0.58
4616,2008,Missouri,"Jackson, MO",339266,124687,"McCain, John S. III",210824,"Obama, Barack H.",3755,86137,R,36.75,62.14,1.11
7731,2012,Missouri,"Jackson, MO",311577,122708,"Romney, W. Mitt",183953,"Obama, Barack H.",4916,61245,R,39.38,59.04,1.58
10847,2016,Missouri,"Jackson, MO",301876,116211,"Trump, Donald J.",168972,"Clinton, Hillary Rodham",16693,52761,R,38.5,55.97,5.53


In [116]:
#Drop the old data for Jackson County and Kansas City from mo_v
mo_v = mo_v.loc[(mo_v['Area'] != 'Jackson, MO') & (mo_v['Area'] != 'Kansas City, MO')]
mo_v.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
1454,2004,Missouri,"Adair, MO",11404,6367,"Bush, George W.",4938,"Kerry, John F.",99,1429,R,55.83,43.3,0.87
4569,2008,Missouri,"Adair, MO",11871,5891,"McCain, John S. III",5735,"Obama, Barack H.",245,156,R,49.63,48.31,2.06
7684,2012,Missouri,"Adair, MO",10126,5651,"Romney, W. Mitt",4219,"Obama, Barack H.",256,1432,R,55.81,41.67,2.53
10800,2016,Missouri,"Adair, MO",10226,6030,"Trump, Donald J.",3500,"Clinton, Hillary Rodham",696,2530,R,58.97,34.23,6.81
1455,2004,Missouri,"Andrew, MO",8266,5135,"Bush, George W.",3069,"Kerry, John F.",62,2066,R,62.12,37.13,0.75


In [117]:
list(mo_v.Area.unique())

['Adair, MO',
 'Andrew, MO',
 'Atchison, MO',
 'Audrain, MO',
 'Barry, MO',
 'Barton, MO',
 'Bates, MO',
 'Benton, MO',
 'Bollinger, MO',
 'Boone, MO',
 'Buchanan, MO',
 'Butler, MO',
 'Caldwell, MO',
 'Callaway, MO',
 'Camden, MO',
 'Cape Girardeau, MO',
 'Carroll, MO',
 'Carter, MO',
 'Cass, MO',
 'Cedar, MO',
 'Chariton, MO',
 'Christian, MO',
 'Clark, MO',
 'Clay, MO',
 'Clinton, MO',
 'Cole, MO',
 'Cooper, MO',
 'Crawford, MO',
 'Dade, MO',
 'Dallas, MO',
 'Daviess, MO',
 'Dekalb, MO',
 'Dent, MO',
 'Douglas, MO',
 'Dunklin, MO',
 'Franklin, MO',
 'Gasconade, MO',
 'Gentry, MO',
 'Greene, MO',
 'Grundy, MO',
 'Harrison, MO',
 'Henry, MO',
 'Hickory, MO',
 'Holt, MO',
 'Howard, MO',
 'Howell, MO',
 'Iron, MO',
 'Jasper, MO',
 'Jefferson, MO',
 'Johnson, MO',
 'Knox, MO',
 'Laclede, MO',
 'Lafayette, MO',
 'Lawrence, MO',
 'Lewis, MO',
 'Lincoln, MO',
 'Linn, MO',
 'Livingston, MO',
 'Macon, MO',
 'Madison, MO',
 'Maries, MO',
 'Marion, MO',
 'Mcdonald, MO',
 'Mercer, MO',
 'Miller,

In [118]:
list(mo_i.GeoName.unique())

['Adair, MO',
 'Andrew, MO',
 'Atchison, MO',
 'Audrain, MO',
 'Barry, MO',
 'Barton, MO',
 'Bates, MO',
 'Benton, MO',
 'Bollinger, MO',
 'Boone, MO',
 'Buchanan, MO',
 'Butler, MO',
 'Caldwell, MO',
 'Callaway, MO',
 'Camden, MO',
 'Cape Girardeau, MO',
 'Carroll, MO',
 'Carter, MO',
 'Cass, MO',
 'Cedar, MO',
 'Chariton, MO',
 'Christian, MO',
 'Clark, MO',
 'Clay, MO',
 'Clinton, MO',
 'Cole, MO',
 'Cooper, MO',
 'Crawford, MO',
 'Dade, MO',
 'Dallas, MO',
 'Daviess, MO',
 'DeKalb, MO',
 'Dent, MO',
 'Douglas, MO',
 'Dunklin, MO',
 'Franklin, MO',
 'Gasconade, MO',
 'Gentry, MO',
 'Greene, MO',
 'Grundy, MO',
 'Harrison, MO',
 'Henry, MO',
 'Hickory, MO',
 'Holt, MO',
 'Howard, MO',
 'Howell, MO',
 'Iron, MO',
 'Jackson, MO',
 'Jasper, MO',
 'Jefferson, MO',
 'Johnson, MO',
 'Knox, MO',
 'Laclede, MO',
 'Lafayette, MO',
 'Lawrence, MO',
 'Lewis, MO',
 'Lincoln, MO',
 'Linn, MO',
 'Livingston, MO',
 'Macon, MO',
 'Madison, MO',
 'Maries, MO',
 'Marion, MO',
 'McDonald, MO',
 'Mercer

In [119]:
#Fix any counties in mo_i that are written incorrectly
mo_v['Area'] = np.where((mo_v['Area'] == 'Mcdonald, MO'), 'McDonald, MO', mo_v['Area'])
mo_i['GeoName'] = np.where((mo_i['GeoName'] == 'MOniteau, MO'), 'Moniteau, MO', mo_i['GeoName'])
mo_i['GeoName'] = np.where((mo_i['GeoName'] == 'MOnroe, MO'), 'Monroe, MO', mo_i['GeoName'])
mo_i['GeoName'] = np.where((mo_i['GeoName'] == 'MOntgomery, MO'), 'Montgomery, MO', mo_i['GeoName'])
mo_i['GeoName'] = np.where((mo_i['GeoName'] == 'MOrgan, MO'), 'Morgan, MO', mo_i['GeoName'])

In [120]:
#Drop the old Missouri data from vote and add in the new figures
vote = vote.loc[(vote.State != 'Missouri')]
vote = pd.concat([vote, mo_v, jackson])
vote = vote.sort_values(by=['Year','State','Area'])
vote.loc[(vote.State == 'Missouri') & (vote.Area.str.contains('J'))].tail()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
7734,2012,Missouri,"Johnson, MO",21021,12763,"Romney, W. Mitt",7667,"Obama, Barack H.",591,5096,R,60.72,36.47,2.81
10847,2016,Missouri,"Jackson, MO",301876,116211,"Trump, Donald J.",168972,"Clinton, Hillary Rodham",16693,52761,R,38.5,55.97,5.53
10848,2016,Missouri,"Jasper, MO",48326,35070,"Trump, Donald J.",10572,"Clinton, Hillary Rodham",2684,24498,R,72.57,21.88,5.55
10849,2016,Missouri,"Jefferson, MO",106238,69036,"Trump, Donald J.",31568,"Clinton, Hillary Rodham",5634,37468,R,64.98,29.71,5.31
10850,2016,Missouri,"Johnson, MO",21132,13719,"Trump, Donald J.",5930,"Clinton, Hillary Rodham",1483,7789,R,64.92,28.06,7.02


In [121]:
#Drop the old Missouri data from inc and add in the new figures
inc = inc.loc[(inc.State != 'Missouri')]
inc = pd.concat([inc, mo_i])
inc = inc.sort_values(by=['State','GeoName'])
inc.loc[(inc.State == 'Missouri')].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
4428,"""29001""","Adair, MO",Personal income (thousands of dollars),Thousands of dollars,559348,576504,603828,632552,674714,711589,696616,730848,737002,753678,771041,796107,799495,Missouri
4429,"""29001""","Adair, MO",Population (persons) 1/,Number of persons,24988,24849,25007,24983,25399,25577,25625,25657,25675,25703,25521,25353,25244,Missouri
4430,"""29001""","Adair, MO",Per capita personal income (dollars) 2/,Dollars,22385,23200,24146,25319,26565,27821,27185,28485,28705,29323,30212,31401,31671,Missouri
4431,"""29003""","Andrew, MO",Personal income (thousands of dollars),Thousands of dollars,471079,478482,516393,559228,612255,615875,615615,645153,660175,683090,701118,711139,728983,Missouri
4432,"""29003""","Andrew, MO",Population (persons) 1/,Number of persons,16580,16511,16861,16834,16924,17036,17344,17199,17311,17305,17282,17314,17351,Missouri


In [122]:
#Fix the abbreviations and simplify the city description for St. Louis City.  To maintain consistency with vote, 
#drop any rows with Missouri under the GeoName column
mo_g['GeoName'] = mo_g['GeoName'].replace({'Mo':'MO'}, regex=True)
mo_g['GeoName'] = np.where((mo_g['GeoName'] == 'St. Louis (Independent City), MO'), 
                           'St. Louis City, MO', mo_g['GeoName'])
mo_g = mo_g.loc[(mo_g['GeoName'] != 'Missouri')]
mo_g.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
50184,"""29001""","Adair, MO",All industry total,Thousands of dollars,626221,669721,713764,725289,741203,755630,794591,842475,734973,772559,798026,811574,796012,Missouri
50203,"""29001""","Adair, MO",Administrative and support and waste manage...,Thousands of dollars,4705,4673,4663,4530,6121,7148,7279,8730,8575,8574,8548,(D),(D),Missouri
50204,"""29001""","Adair, MO","Educational services, health care, and socia...",Thousands of dollars,(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),152453,159389,Missouri
50205,"""29001""","Adair, MO",Educational services,Thousands of dollars,(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),Missouri
50206,"""29001""","Adair, MO",Health care and social assistance,Thousands of dollars,(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),Missouri


In [123]:
#Compare the lengths of the lists of counties and independent cities for Missouri in mo_i and mo_g
print(len(list(mo_i.GeoName.unique())))
print()
print(len(list(mo_g.GeoName.unique())))

115

115


In [124]:
list(mo_g.GeoName.unique())

['Adair, MO',
 'Andrew, MO',
 'Atchison, MO',
 'Audrain, MO',
 'Barry, MO',
 'Barton, MO',
 'Bates, MO',
 'Benton, MO',
 'Bollinger, MO',
 'Boone, MO',
 'Buchanan, MO',
 'Butler, MO',
 'Caldwell, MO',
 'Callaway, MO',
 'Camden, MO',
 'Cape Girardeau, MO',
 'Carroll, MO',
 'Carter, MO',
 'Cass, MO',
 'Cedar, MO',
 'Chariton, MO',
 'Christian, MO',
 'Clark, MO',
 'Clay, MO',
 'Clinton, MO',
 'Cole, MO',
 'Cooper, MO',
 'Crawford, MO',
 'Dade, MO',
 'Dallas, MO',
 'Daviess, MO',
 'DeKalb, MO',
 'Dent, MO',
 'Douglas, MO',
 'Dunklin, MO',
 'Franklin, MO',
 'Gasconade, MO',
 'Gentry, MO',
 'Greene, MO',
 'Grundy, MO',
 'Harrison, MO',
 'Henry, MO',
 'Hickory, MO',
 'Holt, MO',
 'Howard, MO',
 'Howell, MO',
 'Iron, MO',
 'Jackson, MO',
 'Jasper, MO',
 'Jefferson, MO',
 'Johnson, MO',
 'Knox, MO',
 'Laclede, MO',
 'Lafayette, MO',
 'Lawrence, MO',
 'Lewis, MO',
 'Lincoln, MO',
 'Linn, MO',
 'Livingston, MO',
 'Macon, MO',
 'Madison, MO',
 'Maries, MO',
 'Marion, MO',
 'McDonald, MO',
 'Mercer

In [125]:
#Fix any counties in mo_g that are written incorrectly
mo_g['GeoName'] = np.where((mo_g['GeoName'] == 'MOniteau, MO'), 'Moniteau, MO', mo_g['GeoName'])
mo_g['GeoName'] = np.where((mo_g['GeoName'] == 'MOnroe, MO'), 'Monroe, MO', mo_g['GeoName'])
mo_g['GeoName'] = np.where((mo_g['GeoName'] == 'MOntgomery, MO'), 'Montgomery, MO', mo_g['GeoName'])
mo_g['GeoName'] = np.where((mo_g['GeoName'] == 'MOrgan, MO'), 'Morgan, MO', mo_g['GeoName'])

In [126]:
#Drop the old Missouri data from gdp and add in the new figures
gdp = gdp.loc[(gdp.State != 'Missouri')]
gdp = pd.concat([gdp, mo_g])
gdp = gdp.sort_values(by=['State','GeoName'])
gdp.loc[(gdp.State == 'Missouri')].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
50184,"""29001""","Adair, MO",All industry total,Thousands of dollars,626221,669721,713764,725289,741203,755630,794591,842475,734973,772559,798026,811574,796012,Missouri
50203,"""29001""","Adair, MO",Administrative and support and waste manage...,Thousands of dollars,4705,4673,4663,4530,6121,7148,7279,8730,8575,8574,8548,(D),(D),Missouri
50204,"""29001""","Adair, MO","Educational services, health care, and socia...",Thousands of dollars,(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),152453,159389,Missouri
50205,"""29001""","Adair, MO",Educational services,Thousands of dollars,(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),Missouri
50206,"""29001""","Adair, MO",Health care and social assistance,Thousands of dollars,(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),Missouri


## Part 4I: Fixing Georgia 

In [127]:
#Slice Georgia from the datasets and compare the lengths of their lists of individual counties
ga_i = inc.loc[(inc.State == 'Georgia')]
ga_i = ga_i.sort_values(by=['GeoName'])
print(list(ga_i.GeoName.unique()))
print()
print(len(list(ga_i.GeoName.unique())))
print()
ga_g = gdp.loc[(gdp.State == 'Georgia')]
ga_g = ga_g.sort_values(by=['GeoName'])
print(list(ga_g.GeoName.unique()))
print()
print(len(list(ga_g.GeoName.unique())))
print()
ga_v = vote.loc[(vote.State == 'Georgia')]
ga_v = ga_v.sort_values(by=['Area','Year'])
print(list(ga_v.Area.unique()))
print()
print(len(list(ga_v.Area.unique())))

['Appling, GA', 'Atkinson, GA', 'Bacon, GA', 'Baker, GA', 'Baldwin, GA', 'Banks, GA', 'Barrow, GA', 'Bartow, GA', 'Ben Hill, GA', 'Berrien, GA', 'Bibb, GA', 'Bleckley, GA', 'Brantley, GA', 'Brooks, GA', 'Bryan, GA', 'Bulloch, GA', 'Burke, GA', 'Butts, GA', 'Calhoun, GA', 'Camden, GA', 'Candler, GA', 'Carroll, GA', 'Catoosa, GA', 'Charlton, GA', 'Chatham, GA', 'Chattahoochee, GA', 'Chattooga, GA', 'Cherokee, GA', 'Clarke, GA', 'Clay, GA', 'Clayton, GA', 'Clinch, GA', 'Cobb, GA', 'Coffee, GA', 'Colquitt, GA', 'Columbia, GA', 'Cook, GA', 'Coweta, GA', 'Crawford, GA', 'Crisp, GA', 'Dade, GA', 'Dawson, GA', 'DeKalb, GA', 'Decatur, GA', 'Dodge, GA', 'Dooly, GA', 'Dougherty, GA', 'Douglas, GA', 'Early, GA', 'Echols, GA', 'Effingham, GA', 'Elbert, GA', 'Emanuel, GA', 'Evans, GA', 'Fannin, GA', 'Fayette, GA', 'Floyd, GA', 'Forsyth, GA', 'Franklin, GA', 'Fulton, GA', 'Georgia', 'Gilmer, GA', 'Glascock, GA', 'Glynn, GA', 'Gordon, GA', 'Grady, GA', 'Greene, GA', 'Gwinnett, GA', 'Habersham, GA', 'H

In [128]:
vote.loc[(vote.Year == 2016) & ((vote.Area == 'Fulton, GA') | (vote.Area == 'Milton, GA'))]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
9763,2016,Georgia,"Fulton, GA",431391,117783,"Trump, Donald J.",297051,"Clinton, Hillary Rodham",16557,179268,D,27.3,68.86,3.83
9804,2016,Georgia,"Milton, GA",0,0,"Trump, Donald J.",0,"Clinton, Hillary Rodham",0,0,R,0.0,0.0,0.0


In [129]:
#Drop any rows with Georgia under the GeoName column in inc and gdp, as well as Milton under the Area column in vote, 
#as this name refers to a city already incorporated under Fulton County
inc = inc.loc[(inc.GeoName != 'Georgia')]
gdp = gdp.loc[(gdp.GeoName != 'Georgia')]
vote = vote.loc[(vote.Area != 'Milton, GA')]
inc.loc[(inc['GeoName'].str.contains('GA'))].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
1098,"""13001""","Appling, GA",Personal income (thousands of dollars),Thousands of dollars,375363,391874,393768,417019,435948,448686,463822,501239,512395,525901,546011,585322,587321,Georgia
1099,"""13001""","Appling, GA",Population (persons) 1/,Number of persons,17740,17769,17725,17985,18133,18144,18334,18459,18385,18370,18472,18423,18428,Georgia
1100,"""13001""","Appling, GA",Per capita personal income (dollars) 2/,Dollars,21159,22054,22215,23187,24042,24729,25298,27154,27870,28628,29559,31771,31871,Georgia
1101,"""13003""","Atkinson, GA",Personal income (thousands of dollars),Thousands of dollars,165742,175016,173369,177023,191772,187278,182613,197405,201210,207201,207822,229242,226066,Georgia
1102,"""13003""","Atkinson, GA",Population (persons) 1/,Number of persons,7985,8096,8197,8346,8366,8386,8364,8360,8255,8264,8215,8340,8244,Georgia


In [130]:
gdp.loc[(gdp['GeoName'].str.contains('GA'))].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
12444,"""13001""","Appling, GA",All industry total,Thousands of dollars,936590,899392,994090,996955,1065060,1110641,1238589,1390910,1422221,1428211,1497465,1510707,1538554,Georgia
12445,"""13001""","Appling, GA",Private industries,Thousands of dollars,859835,816017,913269,915747,983689,1023219,1143307,1296610,1328679,1329891,1399495,1410282,1440993,Georgia
12446,"""13001""","Appling, GA","Agriculture, forestry, fishing and hunting",Thousands of dollars,30032,26583,19234,26495,28014,27307,23861,44500,46455,63102,59231,75321,69660,Georgia
12447,"""13001""","Appling, GA","Mining, quarrying, and oil and gas extraction",Thousands of dollars,0,0,0,0,0,0,0,0,0,0,0,0,0,Georgia
12448,"""13001""","Appling, GA",Utilities,Thousands of dollars,599415,552758,633785,620673,671430,718641,819426,937057,958825,936423,988890,965424,993307,Georgia


In [131]:
vote.loc[(vote.Year == 2016) & (vote.State == 'Georgia') & (vote.Area.str.contains('Mi'))]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
9803,2016,Georgia,"Miller, GA",2544,1891,"Trump, Donald J.",623,"Clinton, Hillary Rodham",30,1268,R,74.33,24.49,1.18
9805,2016,Georgia,"Mitchell, GA",7880,4279,"Trump, Donald J.",3493,"Clinton, Hillary Rodham",108,786,R,54.3,44.33,1.37


## Part 4J: Fixing South Dakota 

In [132]:
#It should be noted that Shannon County in South Dakota was renamed as Oglala Lakota County in 2015, so slice that in
#vote, inc, and gdp
vote.loc[(vote.Area == 'Shannon, SD') | (vote.Area == 'Oglala Lakota, SD')]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
2389,2004,South Dakota,"Shannon, SD",4214,526,"Bush, George W.",3566,"Kerry, John F.",122,3040,D,12.48,84.62,2.9
5504,2008,South Dakota,"Shannon, SD",3350,331,"McCain, John S. III",2971,"Obama, Barack H.",48,2640,D,9.88,88.69,1.43
8619,2012,South Dakota,"Shannon, SD",3145,188,"Romney, W. Mitt",2937,"Obama, Barack H.",20,2749,D,5.98,93.39,0.64
11730,2016,South Dakota,"Oglala Lakota, SD",2905,241,"Trump, Donald J.",2510,"Clinton, Hillary Rodham",154,2269,D,8.3,86.4,5.3


In [133]:
inc.loc[(inc.State == 'South Dakota') & ((inc.GeoName.str.contains('Shannon')) | 
                                         (inc.GeoName.str.contains('Oglala Lakota')))]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
7263,"""46102""","Oglala Lakota, SD*",Personal income (thousands of dollars),Thousands of dollars,217493,226998,222367,230196,252903,267001,295744,314280,313258,309069,331544,338835,343736,South Dakota
7264,"""46102""","Oglala Lakota, SD*",Population (persons) 1/,Number of persons,12983,13150,13404,13345,13368,13425,13636,13896,14037,14126,14213,14358,14425,South Dakota
7265,"""46102""","Oglala Lakota, SD*",Per capita personal income (dollars) 2/,Dollars,16752,17262,16590,17250,18919,19888,21688,22617,22317,21879,23327,23599,23829,South Dakota


In [134]:
gdp.loc[(gdp.State == 'South Dakota') & ((gdp.GeoName.str.contains('Shannon')) | 
                                         (gdp.GeoName.str.contains('Oglala Lakota')))].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
82314,"""46102""","Oglala Lakota, SD*",All industry total,Thousands of dollars,212413,220895,222897,220428,237137,244665,263571,273418,269309,257897,290212,305776,314874,South Dakota
82315,"""46102""","Oglala Lakota, SD*",Private industries,Thousands of dollars,60411,66582,63329,58934,65975,68904,75346,82602,81854,85622,109781,119746,123298,South Dakota
82316,"""46102""","Oglala Lakota, SD*","Agriculture, forestry, fishing and hunting",Thousands of dollars,(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),South Dakota
82317,"""46102""","Oglala Lakota, SD*","Mining, quarrying, and oil and gas extraction",Thousands of dollars,0,0,0,0,0,0,0,0,0,0,0,0,0,South Dakota
82318,"""46102""","Oglala Lakota, SD*",Utilities,Thousands of dollars,0,(D),(D),161,393,413,364,34,(D),(D),(D),(D),(D),South Dakota


In [135]:
#Correct the county's name in all three datasets for consistency and simplicity purposes
vote['Area'] = np.where((vote['Area'] == 'Shannon, SD'), 'Oglala Lakota, SD', vote['Area'])
inc['GeoName'] = np.where((inc['GeoName'] == 'Oglala Lakota, SD*'), 'Oglala Lakota, SD', inc['GeoName'])
gdp['GeoName'] = np.where((gdp['GeoName'] == 'Oglala Lakota, SD*'), 'Oglala Lakota, SD', gdp['GeoName'])

## Part 4K: Fixing Hawaii 

In [136]:
#Slice Hawaii from the datasets and compare the lengths of their lists of individual counties
hi_i = inc.loc[(inc.State == 'Hawaii')]
hi_i = hi_i.sort_values(by=['GeoName'])
print(list(hi_i.GeoName.unique()))
print()
print(len(list(hi_i.GeoName.unique())))
print()
hi_g = gdp.loc[(gdp.State == 'Hawaii')]
hi_g = hi_g.sort_values(by=['GeoName'])
print(list(hi_g.GeoName.unique()))
print()
print(len(list(hi_g.GeoName.unique())))
print()
hi_v = vote.loc[(vote.State == 'Hawaii')]
hi_v = hi_v.sort_values(by=['Area','Year'])
print(list(hi_v.Area.unique()))
print()
print(len(list(hi_v.Area.unique())))

['Hawaii', 'Hawaii, HI', 'Honolulu, HI', 'Kauai, HI', 'Maui + Kalawao, HI*']

5

['Hawaii', 'Hawaii, HI', 'Honolulu, HI', 'Kauai, HI', 'Maui + Kalawao, HI*']

5

['Hawaii, HI', 'Honolulu, HI', 'Kauai, HI', 'Maui, HI']

4


In [137]:
#Drop any rows with Hawaii under the GeoName column in inc and gdp.  Noting how Maui and Kalawao Counties have their
#income and GDP figures consolidated together, fix its name discrepancy in all three datasets
inc = inc.loc[(inc.GeoName != 'Hawaii')]
gdp = gdp.loc[(gdp.GeoName != 'Hawaii')]
vote['Area'] = np.where((vote['Area'] == 'Maui, HI'), 'Maui and Kalawao, HI', vote['Area'])
inc['GeoName'] = np.where((inc['GeoName'] == 'Maui + Kalawao, HI*'), 'Maui and Kalawao, HI', inc['GeoName'])
gdp['GeoName'] = np.where((gdp['GeoName'] == 'Maui + Kalawao, HI*'), 'Maui and Kalawao, HI', gdp['GeoName'])
inc.loc[(inc.State == 'Hawaii')]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
1578,"""15001""","Hawaii, HI",Personal income (thousands of dollars),Thousands of dollars,4379204,4848840,5227476,5570974,5903907,5675130,5869367,6142665,6369930,6443742,6863938,7405863,7748127,Hawaii
1579,"""15001""","Hawaii, HI",Population (persons) 1/,Number of persons,162852,168237,173536,177733,181506,183629,185363,187079,189161,191459,193711,195975,198316,Hawaii
1580,"""15001""","Hawaii, HI",Per capita personal income (dollars) 2/,Dollars,26891,28821,30123,31345,32527,30905,31664,32835,33675,33656,35434,37790,39070,Hawaii
1581,"""15003""","Honolulu, HI",Personal income (thousands of dollars),Thousands of dollars,33368413,35728717,38249805,40176577,42118992,42371822,43397685,45532320,47177994,47853464,50354664,52729171,54567246,Hawaii
1582,"""15003""","Honolulu, HI",Population (persons) 1/,Number of persons,907997,918181,926954,925335,933680,943177,956285,967336,977994,986059,987414,991064,992268,Hawaii
1583,"""15003""","Honolulu, HI",Per capita personal income (dollars) 2/,Dollars,36749,38912,41264,43418,45111,44925,45382,47070,48240,48530,50997,53205,54992,Hawaii
1584,"""15007""","Kauai, HI",Personal income (thousands of dollars),Thousands of dollars,1888102,2024507,2159942,2302505,2412045,2322396,2375385,2492278,2599394,2700501,2889753,3073497,3216155,Hawaii
1585,"""15007""","Kauai, HI",Population (persons) 1/,Number of persons,62095,62863,63465,64490,65603,66518,67205,67888,68671,69632,70288,71021,71537,Hawaii
1586,"""15007""","Kauai, HI",Per capita personal income (dollars) 2/,Dollars,30407,32205,34034,35703,36767,34914,35345,36712,37853,38782,41113,43276,44958,Hawaii
1587,"""15901""","Maui and Kalawao, HI",Personal income (thousands of dollars),Thousands of dollars,4341656,4704262,5063867,5373022,5624965,5320683,5536704,5877361,6308599,6318893,6737768,7167703,7516459,Hawaii


## Part 5: Adjusting for Alaska and DC in the Income and GDP Data 

In [138]:
inc.loc[(inc.State == 'Alaska') | (inc.State == 'District of Columbia')]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
9399,"""02000""",Alaska,Personal income (thousands of dollars),Thousands of dollars,24338346,25956877,27726673,29838859,32825068,32896441,35293829,37827809,39169240,38859491,41110392,42505840,41745626,Alaska
9400,"""02000""",Alaska,Population (persons) 1/,Number of persons,659286,666946,675302,680300,687455,698895,713910,722128,730443,737068,736283,737498,741456,Alaska
9401,"""02000""",Alaska,Per capita personal income (dollars) 2/,Dollars,36916,38919,41058,43861,47749,47069,49437,52384,53624,52722,55835,57635,56302,Alaska
9402,"""11000""",District of Columbia,Personal income (thousands of dollars),Thousands of dollars,28501723,30163906,32001079,34480237,35462270,35798825,38481501,41786280,43426932,44189236,47259734,50963559,53375784,District of Columbia
9403,"""11000""",District of Columbia,Population (persons) 1/,Number of persons,567754,567136,570681,574404,580236,592228,605226,619800,634924,650581,662328,675400,685815,District of Columbia
9404,"""11000""",District of Columbia,Per capita personal income (dollars) 2/,Dollars,50201,53186,56075,60028,61117,60448,63582,67419,68397,67923,71354,75457,77828,District of Columbia


In [139]:
print(states)

['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'District of Columbia', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']


In [140]:
#Eliminate the rows in inc that refer to an entire state under the GeoName column, except for Alaska and the District 
#of Columbia, as these were missing or did not have counties in the first place
most = ['Alabama','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','Florida',
        'Georgia','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana',
        'Maine','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana',
        'Nebraska','Nevada','New Hampshire','New Jersey','New Mexico','New York','North Carolina','North Dakota',
        'Ohio','Oklahoma','Oregon','Pennsylvania','Rhode Island','South Carolina','South Dakota','Tennessee',
        'Texas','Utah','Vermont','Virginia','Washington','West Virginia','Wisconsin','Wyoming']
inc = inc.loc[~(inc['GeoName'].isin(most))]
inc.loc[(inc.State == 'Hawaii')]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
1578,"""15001""","Hawaii, HI",Personal income (thousands of dollars),Thousands of dollars,4379204,4848840,5227476,5570974,5903907,5675130,5869367,6142665,6369930,6443742,6863938,7405863,7748127,Hawaii
1579,"""15001""","Hawaii, HI",Population (persons) 1/,Number of persons,162852,168237,173536,177733,181506,183629,185363,187079,189161,191459,193711,195975,198316,Hawaii
1580,"""15001""","Hawaii, HI",Per capita personal income (dollars) 2/,Dollars,26891,28821,30123,31345,32527,30905,31664,32835,33675,33656,35434,37790,39070,Hawaii
1581,"""15003""","Honolulu, HI",Personal income (thousands of dollars),Thousands of dollars,33368413,35728717,38249805,40176577,42118992,42371822,43397685,45532320,47177994,47853464,50354664,52729171,54567246,Hawaii
1582,"""15003""","Honolulu, HI",Population (persons) 1/,Number of persons,907997,918181,926954,925335,933680,943177,956285,967336,977994,986059,987414,991064,992268,Hawaii
1583,"""15003""","Honolulu, HI",Per capita personal income (dollars) 2/,Dollars,36749,38912,41264,43418,45111,44925,45382,47070,48240,48530,50997,53205,54992,Hawaii
1584,"""15007""","Kauai, HI",Personal income (thousands of dollars),Thousands of dollars,1888102,2024507,2159942,2302505,2412045,2322396,2375385,2492278,2599394,2700501,2889753,3073497,3216155,Hawaii
1585,"""15007""","Kauai, HI",Population (persons) 1/,Number of persons,62095,62863,63465,64490,65603,66518,67205,67888,68671,69632,70288,71021,71537,Hawaii
1586,"""15007""","Kauai, HI",Per capita personal income (dollars) 2/,Dollars,30407,32205,34034,35703,36767,34914,35345,36712,37853,38782,41113,43276,44958,Hawaii
1587,"""15901""","Maui and Kalawao, HI",Personal income (thousands of dollars),Thousands of dollars,4341656,4704262,5063867,5373022,5624965,5320683,5536704,5877361,6308599,6318893,6737768,7167703,7516459,Hawaii


In [141]:
#Confirm that both vote and inc have all the states together
print(states)
print()
print(list(vote.State.unique()))
print()
print(list(inc.State.unique()))

['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'District of Columbia', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']

['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'District of Columbia', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 

In [142]:
#Eliminate the rows in gdp that refer to an entire state under the GeoName column, except for Alaska, the District of
#Columbia, and Hawaii, as these were missing or did not have counties in the first place
gdp = gdp.loc[~(gdp['GeoName'].isin(most))]
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
34,"""01001""","Autauga, AL",All industry total,Thousands of dollars,946964,969849,1080320,1136583,1043678,1124580,1201882,1316842,1487130,1457702,1503623,1660498,1716235,Alabama
35,"""01001""","Autauga, AL",Private industries,Thousands of dollars,808170,825221,918707,961717,878021,933062,1011015,1121150,1282522,1239612,1266330,1401135,1467403,Alabama
36,"""01001""","Autauga, AL","Agriculture, forestry, fishing and hunting",Thousands of dollars,39394,39267,44400,41844,21181,22771,20272,30138,17004,28355,17169,16500,22791,Alabama
37,"""01001""","Autauga, AL","Mining, quarrying, and oil and gas extraction",Thousands of dollars,2464,3954,6568,7947,8183,6918,8703,7973,10313,14681,14485,13596,17077,Alabama
38,"""01001""","Autauga, AL",Utilities,Thousands of dollars,81339,67098,111611,129539,106810,145888,214034,305643,443613,376088,377660,464229,456310,Alabama


In [143]:
#Confirm that both vote and gdp have all the states together
print(states)
print()
print(list(vote.State.unique()))
print()
print(list(gdp.State.unique()))

['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'District of Columbia', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']

['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'District of Columbia', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 

## Part 6: Fixing Incorrectly Written Names in the Voting Data 

In [144]:
#Seeing from Missouri that one county started with Mc, this and the next two cells are preliminary checks to see if 
#other states have similar counties
vote.loc[(vote['Area'].str.contains('Mc'))]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
455,2004,Georgia,"Mcduffie, GA",7773,4846,"Bush, George W.",2899,"Kerry, John F.",28,1947,R,62.34,37.30,0.36
456,2004,Georgia,"Mcintosh, GA",5379,2837,"Bush, George W.",2523,"Kerry, John F.",19,314,R,52.74,46.90,0.35
627,2004,Illinois,"Mcdonough, IL",14929,7656,"Bush, George W.",7119,"Kerry, John F.",154,537,R,51.28,47.69,1.03
628,2004,Illinois,"Mchenry, IL",127948,76412,"Bush, George W.",50330,"Kerry, John F.",1206,26082,R,59.72,39.34,0.94
629,2004,Illinois,"Mclean, IL",71620,41276,"Bush, George W.",29877,"Kerry, John F.",467,11399,R,57.63,41.72,0.65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11805,2016,Tennessee,"Mcnairy, TN",10038,7841,"Trump, Donald J.",1848,"Clinton, Hillary Rodham",349,5993,R,78.11,18.41,3.47
12000,2016,Texas,"Mcculloch, TX",3103,2552,"Trump, Donald J.",482,"Clinton, Hillary Rodham",69,2070,R,82.24,15.53,2.22
12001,2016,Texas,"Mclennan, TX",79075,48260,"Trump, Donald J.",27063,"Clinton, Hillary Rodham",3752,21197,R,61.03,34.22,4.74
12002,2016,Texas,"Mcmullen, TX",499,454,"Trump, Donald J.",40,"Clinton, Hillary Rodham",5,414,R,90.98,8.02,1.00


In [145]:
inc.loc[(inc['GeoName'].str.contains('Mc'))]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
1377,"""13189""","McDuffie, GA",Personal income (thousands of dollars),Thousands of dollars,581925,598125,608852,610145,614804,607524,610125,635227,647205,653388,679449,712197,730214,Georgia
1378,"""13189""","McDuffie, GA",Population (persons) 1/,Number of persons,21256,21419,21564,21527,21790,21836,21803,21614,21630,21499,21524,21462,21456,Georgia
1379,"""13189""","McDuffie, GA",Per capita personal income (dollars) 2/,Dollars,27377,27925,28235,28343,28215,27822,27984,29390,29922,30392,31567,33184,34033,Georgia
1380,"""13191""","McIntosh, GA",Personal income (thousands of dollars),Thousands of dollars,252379,262490,283317,300011,316387,316467,319798,333186,340229,338531,352411,376046,393031,Georgia
1381,"""13191""","McIntosh, GA",Population (persons) 1/,Number of persons,12182,12408,12950,13623,13973,14268,14311,14223,13883,14027,14064,14031,14057,Georgia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8068,"""48311""","McMullen, TX",Population (persons) 1/,Number of persons,735,789,765,773,736,699,711,697,730,760,799,828,806,Texas
8069,"""48311""","McMullen, TX",Per capita personal income (dollars) 2/,Dollars,35856,31753,35475,34745,41230,40415,36761,56516,56411,69837,92319,96803,63082,Texas
9009,"""54047""","McDowell, WV",Personal income (thousands of dollars),Thousands of dollars,426751,431530,452644,462492,503136,519896,550301,599347,619361,596092,562028,555109,533389,West Virginia
9010,"""54047""","McDowell, WV",Population (persons) 1/,Number of persons,24264,23711,23199,22746,22494,22276,22097,21730,21318,20958,20385,19762,19192,West Virginia


In [146]:
gdp.loc[(gdp['GeoName'].str.contains('Mc'))]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
15606,"""13189""","McDuffie, GA",All industry total,Thousands of dollars,524076,567099,606855,599635,564987,572344,570788,573561,553654,559252,590592,627168,636561,Georgia
15607,"""13189""","McDuffie, GA",Private industries,Thousands of dollars,437221,478353,515852,504824,471905,474760,471401,476786,463936,474247,500628,535881,547096,Georgia
15608,"""13189""","McDuffie, GA","Agriculture, forestry, fishing and hunting",Thousands of dollars,33162,(D),(D),(D),26382,(D),31437,(D),25853,23300,26416,27557,29616,Georgia
15609,"""13189""","McDuffie, GA","Mining, quarrying, and oil and gas extraction",Thousands of dollars,9797,8636,9101,10329,10073,8983,8999,6903,6131,2631,1682,1568,1650,Georgia
15610,"""13189""","McDuffie, GA",Utilities,Thousands of dollars,(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),(D),Georgia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102131,"""54047""","McDowell, WV",Trade,Thousands of dollars,28066,32191,35482,35583,(D),(D),(D),(D),(D),(D),(D),(D),(D),West Virginia
102132,"""54047""","McDowell, WV",Transportation and utilities,Thousands of dollars,20895,24692,27477,25670,28588,28440,33889,41234,45181,45124,32394,25516,27945,West Virginia
102133,"""54047""","McDowell, WV",Manufacturing and information,Thousands of dollars,6760,6925,14104,14773,17966,8818,10194,9213,9958,23985,31181,22785,8203,West Virginia
102134,"""54047""","McDowell, WV",Private goods-producing industries 2/,Thousands of dollars,139022,(D),237187,306325,400497,316620,375163,414567,365795,352831,278148,194105,146627,West Virginia


In [147]:
#Slice out the rows with Mc as a string under the GeoName and Area columns
mc_i = inc.loc[(inc['GeoName'].str.contains('Mc'))]
print(list(mc_i['GeoName'].unique()))
print()
mc_g = gdp.loc[(gdp['GeoName'].str.contains('Mc'))]
print(list(mc_g['GeoName'].unique()))
print()
mc_v = vote.loc[(vote['Area'].str.contains('Mc'))]
print(list(mc_v['Area'].unique()))

['McDuffie, GA', 'McIntosh, GA', 'McDonough, IL', 'McHenry, IL', 'McLean, IL', 'McPherson, KS', 'McCracken, KY', 'McCreary, KY', 'McLean, KY', 'McLeod, MN', 'McDonald, MO', 'McCone, MT', 'McPherson, NE', 'McKinley, NM', 'McDowell, NC', 'McHenry, ND', 'McIntosh, ND', 'McKenzie, ND', 'McLean, ND', 'McClain, OK', 'McCurtain, OK', 'McIntosh, OK', 'McKean, PA', 'McCormick, SC', 'McCook, SD', 'McPherson, SD', 'McMinn, TN', 'McNairy, TN', 'McCulloch, TX', 'McLennan, TX', 'McMullen, TX', 'McDowell, WV']

['McDuffie, GA', 'McIntosh, GA', 'McDonough, IL', 'McHenry, IL', 'McLean, IL', 'McPherson, KS', 'McCracken, KY', 'McCreary, KY', 'McLean, KY', 'McLeod, MN', 'McDonald, MO', 'McCone, MT', 'McPherson, NE', 'McKinley, NM', 'McDowell, NC', 'McHenry, ND', 'McIntosh, ND', 'McKenzie, ND', 'McLean, ND', 'McClain, OK', 'McCurtain, OK', 'McIntosh, OK', 'McKean, PA', 'McCormick, SC', 'McCook, SD', 'McPherson, SD', 'McMinn, TN', 'McNairy, TN', 'McCulloch, TX', 'McLennan, TX', 'McMullen, TX', 'McDowell, WV

In [148]:
#Fix the counties that are written incorrectly in mc_v, except for McDonald County in Missouri as that has already 
#been taken care of
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcduffie, GA'), 'McDuffie, GA', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcintosh, GA'), 'McIntosh, GA', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcdonough, IL'), 'McDonough, IL', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mchenry, IL'), 'McHenry, IL', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mclean, IL'), 'McLean, IL', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcpherson, KS'), 'McPherson, KS', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mccracken, KY'), 'McCracken, KY', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mccreary, KY'), 'McCreary, KY', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mclean, KY'), 'McLean, KY', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcleod, MN'), 'McLeod, MN', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mccone, MT'), 'McCone, MT', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcpherson, NE'), 'McPherson, NE', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mckinley, NM'), 'McKinley, NM', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcdowell, NC'), 'McDowell, NC', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mchenry, ND'), 'McHenry, ND', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcintosh, ND'), 'McIntosh, ND', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mckenzie, ND'), 'McKenzie, ND', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mclean, ND'), 'McLean, ND', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcclain, OK'), 'McClain, OK', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mccurtain, OK'), 'McCurtain, OK', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcintosh, OK'), 'McIntosh, OK', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mckean, PA'), 'McKean, PA', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mccormick, SC'), 'McCormick, SC', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mccook, SD'), 'McCook, SD', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcpherson, SD'), 'McPherson, SD', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcminn, TN'), 'McMinn, TN', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcnairy, TN'), 'McNairy, TN', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcculloch, TX'), 'McCulloch, TX', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mclennan, TX'), 'McLennan, TX', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcmullen, TX'), 'McMullen, TX', mc_v['Area'])
mc_v['Area'] = np.where((mc_v['Area'] == 'Mcdowell, WV'), 'McDowell, WV', mc_v['Area'])
mc_v.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
455,2004,Georgia,"McDuffie, GA",7773,4846,"Bush, George W.",2899,"Kerry, John F.",28,1947,R,62.34,37.3,0.36
456,2004,Georgia,"McIntosh, GA",5379,2837,"Bush, George W.",2523,"Kerry, John F.",19,314,R,52.74,46.9,0.35
627,2004,Illinois,"McDonough, IL",14929,7656,"Bush, George W.",7119,"Kerry, John F.",154,537,R,51.28,47.69,1.03
628,2004,Illinois,"McHenry, IL",127948,76412,"Bush, George W.",50330,"Kerry, John F.",1206,26082,R,59.72,39.34,0.94
629,2004,Illinois,"McLean, IL",71620,41276,"Bush, George W.",29877,"Kerry, John F.",467,11399,R,57.63,41.72,0.65


In [149]:
#Drop the old county data from vote and add in the new figures
vote = vote.loc[~(vote['Area'].str.contains('Mc'))]
vote = pd.concat([vote, mc_v])
vote = vote.sort_values(by=['Year','State','Area'])
vote.loc[(vote['Area'].str.contains('Mc'))].head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
455,2004,Georgia,"McDuffie, GA",7773,4846,"Bush, George W.",2899,"Kerry, John F.",28,1947,R,62.34,37.3,0.36
456,2004,Georgia,"McIntosh, GA",5379,2837,"Bush, George W.",2523,"Kerry, John F.",19,314,R,52.74,46.9,0.35
627,2004,Illinois,"McDonough, IL",14929,7656,"Bush, George W.",7119,"Kerry, John F.",154,537,R,51.28,47.69,1.03
628,2004,Illinois,"McHenry, IL",127948,76412,"Bush, George W.",50330,"Kerry, John F.",1206,26082,R,59.72,39.34,0.94
629,2004,Illinois,"McLean, IL",71620,41276,"Bush, George W.",29877,"Kerry, John F.",467,11399,R,57.63,41.72,0.65


In [150]:
#Slice out the rows with De as a string under the GeoName and Area columns
de_i = inc.loc[(inc['GeoName'].str.contains('De'))]
print(list(de_i['GeoName'].unique()))
print()
de_g = gdp.loc[(gdp['GeoName'].str.contains('De'))]
print(list(de_g['GeoName'].unique()))
print()
de_v = vote.loc[(vote['Area'].str.contains('De'))]
print(list(de_v['Area'].unique()))

['DeKalb, AL', 'Desha, AR', 'Del Norte, CA', 'Delta, CO', 'Denver, CO', 'DeSoto, FL', 'DeKalb, GA', 'Decatur, GA', 'De Witt, IL', 'DeKalb, IL', 'DeKalb, IN', 'Dearborn, IN', 'Decatur, IN', 'Delaware, IN', 'Decatur, IA', 'Delaware, IA', 'Des Moines, IA', 'Decatur, KS', 'De Soto, LA', 'Delta, MI', 'DeSoto, MS', 'DeKalb, MO', 'Dent, MO', 'Deer Lodge, MT', 'Deuel, NE', 'De Baca, NM', 'Delaware, NY', 'Defiance, OH', 'Delaware, OH', 'Delaware, OK', 'Dewey, OK', 'Deschutes, OR', 'Delaware, PA', 'Deuel, SD', 'Dewey, SD', 'DeKalb, TN', 'Decatur, TN', 'DeWitt, TX', 'Deaf Smith, TX', 'Delta, TX', 'Denton, TX']

['DeKalb, AL', 'Desha, AR', 'Del Norte, CA', 'Delta, CO', 'Denver, CO', 'DeSoto, FL', 'DeKalb, GA', 'Decatur, GA', 'De Witt, IL', 'DeKalb, IL', 'DeKalb, IN', 'Dearborn, IN', 'Decatur, IN', 'Delaware, IN', 'Decatur, IA', 'Delaware, IA', 'Des Moines, IA', 'Decatur, KS', 'De Soto, LA', 'Delta, MI', 'DeSoto, MS', 'DeKalb, MO', 'Dent, MO', 'Deer Lodge, MT', 'Deuel, NE', 'De Baca, NM', 'Delaware

In [151]:
#Correct the misspelled counties among those listed above
de_v['Area'] = np.where((de_v['Area'] == 'Dekalb, AL'), 'DeKalb, AL', de_v['Area'])
de_v['Area'] = np.where((de_v['Area'] == 'Desoto, FL'), 'DeSoto, FL', de_v['Area'])
de_v['Area'] = np.where((de_v['Area'] == 'Dekalb, GA'), 'DeKalb, GA', de_v['Area'])
de_v['Area'] = np.where((de_v['Area'] == 'Dekalb, IL'), 'DeKalb, IL', de_v['Area'])
de_v['Area'] = np.where((de_v['Area'] == 'Dekalb, IN'), 'DeKalb, IN', de_v['Area'])
de_v['Area'] = np.where((de_v['Area'] == 'Dekalb, MO'), 'DeKalb, MO', de_v['Area'])
de_v['Area'] = np.where((de_v['Area'] == 'Dekalb, TN'), 'DeKalb, TN', de_v['Area'])
de_v['Area'] = np.where((de_v['Area'] == 'De Soto, MS'), 'DeSoto, MS', de_v['Area'])
de_v['Area'] = np.where((de_v['Area'] == 'De Witt, TX'), 'DeWitt, TX', de_v['Area'])
de_v.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
24,2004,Alabama,"DeKalb, AL",24169,16904,"Bush, George W.",7092,"Kerry, John F.",173,9812,R,69.94,29.34,0.72
103,2004,Arkansas,"Desha, AR",4647,1729,"Bush, George W.",2851,"Kerry, John F.",67,1122,D,37.21,61.35,1.44
165,2004,California,"Del Norte, CA",9421,5356,"Bush, George W.",3892,"Kerry, John F.",173,1464,R,56.85,41.31,1.84
231,2004,Colorado,"Delta, CO",14159,9722,"Bush, George W.",4224,"Kerry, John F.",213,5498,R,68.66,29.83,1.5
232,2004,Colorado,"Denver, CO",238826,69903,"Bush, George W.",166135,"Kerry, John F.",2788,96232,D,29.27,69.56,1.17


In [152]:
#Drop the old county data from vote and add in the new figures
vote = vote.loc[~(vote['Area'].str.contains('De'))]
vote = pd.concat([vote, de_v])
vote = vote.sort_values(by=['Year','State','Area'])
vote.loc[(vote['Area'].str.contains('DeK'))].head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
24,2004,Alabama,"DeKalb, AL",24169,16904,"Bush, George W.",7092,"Kerry, John F.",173,9812,R,69.94,29.34,0.72
402,2004,Georgia,"DeKalb, GA",276509,73570,"Bush, George W.",200787,"Kerry, John F.",2152,127217,D,26.61,72.61,0.78
585,2004,Illinois,"DeKalb, IL",40768,21095,"Bush, George W.",19263,"Kerry, John F.",410,1832,R,51.74,47.25,1.01
684,2004,Indiana,"DeKalb, IN",15403,10468,"Bush, George W.",4810,"Kerry, John F.",125,5658,R,67.96,31.23,0.81
1485,2004,Missouri,"DeKalb, MO",4686,2941,"Bush, George W.",1707,"Kerry, John F.",38,1234,R,62.76,36.43,0.81


In [153]:
#Slice out the rows with La as a string under the GeoName and Area columns
la_i = inc.loc[(inc['GeoName'].str.contains('La'))]
print(list(la_i['GeoName'].unique()))
print()
la_g = gdp.loc[(gdp['GeoName'].str.contains('La'))]
print(list(la_g['GeoName'].unique()))
print()
la_v = vote.loc[(vote['Area'].str.contains('La'))]
print(list(la_v['Area'].unique()))

['Lamar, AL', 'Lauderdale, AL', 'Lawrence, AL', 'La Paz, AZ*', 'Lafayette, AR', 'Lawrence, AR', 'Lake, CA', 'Lassen, CA', 'La Plata, CO', 'Lake, CO', 'Larimer, CO', 'Las Animas, CO', 'Lafayette, FL', 'Lake, FL', 'Lamar, GA', 'Lanier, GA', 'Laurens, GA', 'Bear Lake, ID', 'Latah, ID', 'LaSalle, IL', 'Lake, IL', 'Lawrence, IL', 'LaPorte, IN', 'Lagrange, IN', 'Lake, IN', 'Lawrence, IN', 'Labette, KS', 'Lane, KS', 'Larue, KY', 'Laurel, KY', 'Lawrence, KY', 'LaSalle, LA', 'Lafayette, LA', 'Lafourche, LA', 'St. Landry, LA', 'Lake, MI', 'Lapeer, MI', 'Lac qui Parle, MN', 'Lake of the Woods, MN', 'Lake, MN', 'Mille Lacs, MN', 'Red Lake, MN', 'Lafayette, MS', 'Lamar, MS', 'Lauderdale, MS', 'Lawrence, MS', 'Laclede, MO', 'Lafayette, MO', 'Lawrence, MO', 'Lake, MT', 'Lancaster, NE', 'Lander, NV', 'St. Lawrence, NY', 'LaMoure, ND', 'Lake, OH', 'Lawrence, OH', 'Latimer, OK', 'Lake, OR', 'Lane, OR', 'Lackawanna, PA', 'Lancaster, PA', 'Lawrence, PA', 'Lancaster, SC', 'Laurens, SC', 'Lake, SD', 'Lawren

In [154]:
#Correct the misspelled counties among those listed above
la_i['GeoName'] = np.where((la_i['GeoName'] == 'La Paz, AZ*'), 'La Paz, AZ', la_i['GeoName'])
la_v['Area'] = np.where((la_v['Area'] == 'La Salle, IL'), 'LaSalle, IL', la_v['Area'])
la_v['Area'] = np.where((la_v['Area'] == 'Lagrange, IN'), 'LaGrange, IN', la_v['Area'])
la_i['GeoName'] = np.where((la_i['GeoName'] == 'Lagrange, IN'), 'LaGrange, IN', la_i['GeoName'])
la_g['GeoName'] = np.where((la_g['GeoName'] == 'Lagrange, IN'), 'LaGrange, IN', la_g['GeoName'])
la_v['Area'] = np.where((la_v['Area'] == 'La Porte, IN'), 'LaPorte, IN', la_v['Area'])
la_v['Area'] = np.where((la_v['Area'] == 'La Salle, LA'), 'LaSalle, LA', la_v['Area'])
la_v['Area'] = np.where((la_v['Area'] == 'Lake Of The Woods, MN'), 'Lake of the Woods, MN', la_v['Area'])
la_v['Area'] = np.where((la_v['Area'] == 'Lac Qui Parle, MN'), 'Lac qui Parle, MN', la_v['Area'])
la_v['Area'] = np.where((la_v['Area'] == 'La Moure, ND'), 'LaMoure, ND', la_v['Area'])
la_i['GeoName'] = np.where((la_i['GeoName'] == 'Oglala Lakota, SD*'), 'Oglala Lakota, SD', la_i['GeoName'])
la_g['GeoName'] = np.where((la_g['GeoName'] == 'Oglala Lakota, SD*'), 'Oglala Lakota, SD', la_g['GeoName'])
la_v['Area'] = np.where((la_v['Area'] == 'Fond Du Lac, WI'), 'Fond du Lac, WI', la_v['Area'])

In [155]:
#Drop the old county data from vote and add in the new figures
vote = vote.loc[~(vote['Area'].str.contains('La'))]
vote = pd.concat([vote, la_v])
vote = vote.sort_values(by=['Year','State','Area'])
vote.loc[(vote['Area'].str.contains('LaG'))]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent
712,2004,Indiana,"LaGrange, IN",9003,6430,"Bush, George W.",2523,"Kerry, John F.",50,3907,R,71.42,28.02,0.56
3827,2008,Indiana,"LaGrange, IN",9491,5702,"McCain, John S. III",3663,"Obama, Barack H.",126,2039,R,60.08,38.59,1.33
6942,2012,Indiana,"LaGrange, IN",9316,6231,"Romney, W. Mitt",2898,"Obama, Barack H.",187,3333,R,66.88,31.11,2.01
10058,2016,Indiana,"LaGrange, IN",9566,7025,"Trump, Donald J.",2080,"Clinton, Hillary Rodham",461,4945,R,73.44,21.74,4.82


In [156]:
#Drop the old county data from inc and add in the new figures
inc = inc.loc[~(inc['GeoName'].str.contains('La'))]
inc = pd.concat([inc, la_i])
inc = inc.sort_values(by=['State','GeoName'])
inc.loc[(inc['GeoName'].str.contains('LaG'))]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
2166,"""18087""","LaGrange, IN",Personal income (thousands of dollars),Thousands of dollars,853832,887766,930256,954446,936216,853480,954845,1063526,1172850,1270973,1351401,1399942,1452290,Indiana
2167,"""18087""","LaGrange, IN",Population (persons) 1/,Number of persons,36017,36283,36613,36931,37181,37119,37159,37469,37662,38089,38435,38616,39113,Indiana
2168,"""18087""","LaGrange, IN",Per capita personal income (dollars) 2/,Dollars,23706,24468,25408,25844,25180,22993,25696,28384,31141,33369,35161,36253,37131,Indiana


In [157]:
#Drop the old county data from gdp and add in the new figures
gdp = gdp.loc[~(gdp['GeoName'].str.contains('La'))]
gdp = pd.concat([gdp, la_g])
gdp = gdp.sort_values(by=['State','GeoName'])
gdp.loc[(gdp['GeoName'].str.contains('LaG'))].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
24548,"""18087""","LaGrange, IN",All industry total,Thousands of dollars,982915,1034356,1106686,1107428,942405,772328,979551,1021049,1121630,1247494,1417204,1579409,1633498,Indiana
24549,"""18087""","LaGrange, IN",Private industries,Thousands of dollars,909626,958993,1018768,1029322,861269,689187,898382,941780,1041393,1166397,1329213,1492773,1546474,Indiana
24550,"""18087""","LaGrange, IN","Agriculture, forestry, fishing and hunting",Thousands of dollars,50959,30295,46107,56417,39009,22621,62298,91383,123916,146727,207899,257648,172511,Indiana
24551,"""18087""","LaGrange, IN","Mining, quarrying, and oil and gas extraction",Thousands of dollars,518,647,705,699,585,415,440,286,356,376,324,348,492,Indiana
24552,"""18087""","LaGrange, IN",Utilities,Thousands of dollars,1902,1983,2151,2205,2427,2568,2611,2796,2865,2910,3111,3676,3823,Indiana


In [158]:
#Fix Isle of Wight County, Virginia's description
vote['Area'] = np.where((vote.Area == 'Isle Of Wight, VA'), 'Isle of Wight, VA', vote['Area'])
inc['GeoName'] = np.where((inc.GeoName == 'Isle Of Wight, VA'), 'Isle of Wight, VA', inc['GeoName'])
gdp['GeoName'] = np.where((gdp.GeoName == 'Isle Of Wight, VA'), 'Isle of Wight, VA', gdp['GeoName'])
inc.loc[inc.GeoName == 'Isle of Wight, VA']

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
8607,"""51093""","Isle of Wight, VA",Personal income (thousands of dollars),Thousands of dollars,1110794,1200881,1339912,1446069,1553068,1567333,1599186,1660428,1707048,1725407,1754071,1835960,1877776,Virginia
8608,"""51093""","Isle of Wight, VA",Population (persons) 1/,Number of persons,31899,32566,33716,34581,34955,35270,35316,35296,35343,35526,35865,36159,36326,Virginia
8609,"""51093""","Isle of Wight, VA",Per capita personal income (dollars) 2/,Dollars,34822,36875,39741,41817,44430,44438,45282,47043,48299,48567,48908,50775,51692,Virginia


## Part 7: Continuing the Rest of the Cleanup Process

In [159]:
#Clean up the descriptions for some of the key measures under inc, and be sure to remove any asterisks from the
#GeoName column
inc['Description'] = np.where(inc['Description'].str.contains('Population'), 
                              'Population (persons)', inc['Description'])
inc['Description'] = np.where(inc['Description'].str.contains('capita'), 
                              'Per capita personal income (dollars)', inc['Description'])
inc['GeoName'] = inc['GeoName'].replace({'\*': ''}, regex=True)
inc.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
3,"""01001""","Autauga, AL",Personal income (thousands of dollars),Thousands of dollars,1332117,1429633,1528773,1649284,1750849,1764260,1826597,1896347,1927064,1944878,2019288,2131416,2203281,Alabama
4,"""01001""","Autauga, AL",Population (persons),Number of persons,48366,49676,51328,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243,Alabama
5,"""01001""","Autauga, AL",Per capita personal income (dollars),Dollars,27542,28779,29784,31472,32863,32590,33348,34337,35067,35538,36786,38849,39883,Alabama
6,"""01003""","Baldwin, AL",Personal income (thousands of dollars),Thousands of dollars,4744351,5243158,5863953,6222347,6280045,6218741,6618292,7067055,7274734,7448839,7887975,8444952,8956578,Alabama
7,"""01003""","Baldwin, AL",Population (persons),Number of persons,156266,162183,168121,172404,175827,179406,183112,186558,190145,194885,199183,202939,207601,Alabama


In [160]:
#Verify the data types found in inc
inc.dtypes

GeoFIPS        object
GeoName        object
Description    object
Unit           object
2004           object
2005           object
2006           object
2007           object
2008           object
2009           object
2010           object
2011           object
2012           object
2013           object
2014           object
2015           object
2016           object
State          object
dtype: object

In [161]:
#Check for any strange values under the year columns
print(inc['2004'].sort_values().head())
print()
print(inc['2005'].sort_values().head())
print()
print(inc['2006'].sort_values().head())
print()
print(inc['2007'].sort_values().head())
print()
print(inc['2008'].sort_values().head())
print()
print(inc['2009'].sort_values().head())
print()
print(inc['2010'].sort_values().head())
print()
print(inc['2011'].sort_values().head())
print()
print(inc['2012'].sort_values().head())
print()
print(inc['2013'].sort_values().head())
print()
print(inc['2014'].sort_values().head())
print()
print(inc['2015'].sort_values().head())
print()
print(inc['2016'].sort_values().head())

9324       (NA)
9326       (NA)
9325       (NA)
3756    1000105
3187      10008
Name: 2004, dtype: object

9324        (NA)
9326        (NA)
9325        (NA)
3618    10019742
7792       10031
Name: 2005, dtype: object

9324       (NA)
9325       (NA)
9326       (NA)
7077    1000323
5806     100056
Name: 2006, dtype: object

9326        (NA)
9325        (NA)
9324        (NA)
442       100043
8871    10011248
Name: 2007, dtype: object

9326       (NA)
9325       (NA)
9324       (NA)
3408    1000067
3168    1000824
Name: 2008, dtype: object

9326     (NA)
9324     (NA)
9325     (NA)
7276    10013
8560    10013
Name: 2009, dtype: object

9326      (NA)
9325      (NA)
9324      (NA)
1120    100050
5716     10009
Name: 2010, dtype: object

9326       (NA)
9324       (NA)
9325       (NA)
4714    1000040
696      100018
Name: 2011, dtype: object

9326      (NA)
9325      (NA)
9324      (NA)
5716     10003
7731    100032
Name: 2012, dtype: object

9325       (NA)
9324       (NA)
9326       (NA)

In [162]:
#Slice out the counties that have NA somewhere in their rows
nulls = inc.loc[(inc['2004'].str.contains('NA')) | (inc['2006'].str.contains('NA')) | 
                (inc['2006'].str.contains('NA')) | (inc['2007'].str.contains('NA')) | 
                (inc['2008'].str.contains('NA')) | (inc['2009'].str.contains('NA')) | 
                (inc['2010'].str.contains('NA')) | (inc['2011'].str.contains('NA')) | 
                (inc['2012'].str.contains('NA')) | (inc['2013'].str.contains('NA')) | 
                (inc['2014'].str.contains('NA')) | (inc['2015'].str.contains('NA')) | 
                (inc['2016'].str.contains('NA'))]
nulls

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
9324,"""55901""","Shawano (includes Menominee), WI",Personal income (thousands of dollars),Thousands of dollars,(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),Wisconsin
9325,"""55901""","Shawano (includes Menominee), WI",Population (persons),Number of persons,(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),Wisconsin
9326,"""55901""","Shawano (includes Menominee), WI",Per capita personal income (dollars),Dollars,(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),Wisconsin


In [163]:
#As a preliminary check, compare the lengths of the lists of counties in Wisconsin from both vote and inc
wi_v = vote.loc[(vote.State == 'Wisconsin')]
print(len(list(wi_v.Area.unique())))
print()
wi_i = inc.loc[(inc.State == 'Wisconsin')]
print(len(list(wi_i.GeoName.unique())))

72

73


In [164]:
#Check if any other states have counties with an includes string
inc.loc[(inc.GeoName.str.contains('includes'))]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
1656,"""16043""","Fremont (includes Yellowstone Park), ID",Personal income (thousands of dollars),Thousands of dollars,261959,266152,294936,313468,324544,340368,336409,363815,365768,374712,385169,404644,418946,Idaho
1657,"""16043""","Fremont (includes Yellowstone Park), ID",Population (persons),Number of persons,12640,12610,12770,13005,13112,13173,13225,13109,12966,12872,12803,12812,12914,Idaho
1658,"""16043""","Fremont (includes Yellowstone Park), ID",Per capita personal income (dollars),Dollars,20725,21106,23096,24104,24752,25838,25437,27753,28210,29111,30084,31583,32441,Idaho
9324,"""55901""","Shawano (includes Menominee), WI",Personal income (thousands of dollars),Thousands of dollars,(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),Wisconsin
9325,"""55901""","Shawano (includes Menominee), WI",Population (persons),Number of persons,(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),Wisconsin
9326,"""55901""","Shawano (includes Menominee), WI",Per capita personal income (dollars),Dollars,(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),(NA),Wisconsin


In [165]:
#Using the last slice as a reference, compare the lengths of the lists of counties in Idaho from both vote and inc
id_v = vote.loc[(vote.State == 'Idaho')]
print(len(list(id_v.Area.unique())))
print()
id_i = inc.loc[(inc.State == 'Idaho')]
print(len(list(id_i.GeoName.unique())))

44

44


In [166]:
#Update Fremont County, Idaho's description and drop the null counties
inc['GeoName'] = np.where((inc['GeoName'] == 'Fremont (includes Yellowstone Park), ID'), 
                           'Fremont, ID', inc['GeoName'])
inc = inc.loc[~(inc.GeoName == 'Shawano (includes Menominee), WI')]
inc.loc[(inc['GeoName'] == 'Fremont, ID') | (inc['GeoName'] == 'Menominee, WI') | (inc['GeoName'] == 'Shawano, WI')]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
1656,"""16043""","Fremont, ID",Personal income (thousands of dollars),Thousands of dollars,261959,266152,294936,313468,324544,340368,336409,363815,365768,374712,385169,404644,418946,Idaho
1657,"""16043""","Fremont, ID",Population (persons),Number of persons,12640,12610,12770,13005,13112,13173,13225,13109,12966,12872,12803,12812,12914,Idaho
1658,"""16043""","Fremont, ID",Per capita personal income (dollars),Dollars,20725,21106,23096,24104,24752,25838,25437,27753,28210,29111,30084,31583,32441,Idaho
9225,"""55078""","Menominee, WI",Personal income (thousands of dollars),Thousands of dollars,85021,84706,85862,93030,94484,104395,108167,110904,116036,120798,122794,134596,131558,Wisconsin
9226,"""55078""","Menominee, WI",Population (persons),Number of persons,4406,4353,4312,4293,4214,4191,4269,4372,4363,4383,4507,4509,4527,Wisconsin
9227,"""55078""","Menominee, WI",Per capita personal income (dollars),Dollars,19297,19459,19912,21670,22421,24909,25338,25367,26595,27561,27245,29851,29061,Wisconsin
9282,"""55115""","Shawano, WI",Personal income (thousands of dollars),Thousands of dollars,1118170,1142386,1192946,1266995,1321697,1293319,1354926,1420492,1462668,1491017,1555025,1589562,1608630,Wisconsin
9283,"""55115""","Shawano, WI",Population (persons),Number of persons,41273,41437,41639,41829,41894,41889,41939,41671,41509,41409,41345,41051,40952,Wisconsin
9284,"""55115""","Shawano, WI",Per capita personal income (dollars),Dollars,27092,27569,28650,30290,31549,30875,32307,34088,35237,36007,37611,38722,39281,Wisconsin


In [167]:
#Convert the year columns into floats
inc['2004'] = inc['2004'].astype(int)
inc['2005'] = inc['2005'].astype(int)
inc['2006'] = inc['2006'].astype(int)
inc['2007'] = inc['2007'].astype(int)
inc['2008'] = inc['2008'].astype(int)
inc['2009'] = inc['2009'].astype(int)
inc['2010'] = inc['2010'].astype(int)
inc['2011'] = inc['2011'].astype(int)
inc['2012'] = inc['2012'].astype(int)
inc['2013'] = inc['2013'].astype(int)
inc['2014'] = inc['2014'].astype(int)
inc['2015'] = inc['2015'].astype(int)
inc['2016'] = inc['2016'].astype(int)
inc.dtypes

GeoFIPS        object
GeoName        object
Description    object
Unit           object
2004            int64
2005            int64
2006            int64
2007            int64
2008            int64
2009            int64
2010            int64
2011            int64
2012            int64
2013            int64
2014            int64
2015            int64
2016            int64
State          object
dtype: object

In [168]:
#Check for the same counties with the includes string in gdp
gdp.loc[(gdp['GeoName'].str.contains('includes'))].head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
18768,"""16043""","Fremont (includes Yellowstone Park), ID",All industry total,Thousands of dollars,198986,188344,253684,273241,277549,368282,282016,317249,292306,306948,302587,318928,330677,Idaho
18769,"""16043""","Fremont (includes Yellowstone Park), ID",Private industries,Thousands of dollars,144590,131304,193161,210534,211768,302238,216260,252550,226764,240749,236676,251469,262182,Idaho
18770,"""16043""","Fremont (includes Yellowstone Park), ID","Agriculture, forestry, fishing and hunting",Thousands of dollars,25827,4154,(D),36197,35849,124759,31527,76367,46464,41912,(D),33863,30802,Idaho
18771,"""16043""","Fremont (includes Yellowstone Park), ID","Mining, quarrying, and oil and gas extraction",Thousands of dollars,175,574,644,746,990,800,792,752,519,483,575,462,391,Idaho
18772,"""16043""","Fremont (includes Yellowstone Park), ID",Utilities,Thousands of dollars,6769,6377,(D),7181,9050,10612,10007,9958,8941,10959,(D),10293,9818,Idaho


In [169]:
#Update Fremont County, Idaho's description again and test for any discrepancies in the counties' names
gdp['GeoName'] = np.where((gdp['GeoName'] == 'Fremont (includes Yellowstone Park), ID'), 
                           'Fremont, ID', gdp['GeoName'])
a = list(vote.Area.unique())
b = list(inc.GeoName.unique())
c = list(gdp.GeoName.unique())
print(list(set(a) - set(b)))
print()
print(list(set(b) - set(a)))
print()
print(list(set(a) - set(c)))
print()
print(list(set(c) - set(a)))
print()
print(list(set(b) - set(c)))
print()
print(list(set(c) - set(b)))
print()

['Du Page, IL', 'Richland , SC', 'Dona Ana, NM', 'Lewis And Clark, MT', 'St. John The Baptist, LA', 'Greenville , SC']

['Greenville, SC', 'St. John the Baptist, LA', 'Lewis and Clark, MT', 'Richland, SC', 'Doña Ana, NM', 'DuPage, IL']

['Richland , SC', 'Du Page, IL', 'Dona Ana, NM', 'Adams, CO', 'Lewis And Clark, MT', 'St. John The Baptist, LA', 'Broomfield, CO', 'Boulder, CO', 'Jefferson, CO', 'Weld, CO', 'Greenville , SC']

['Greenville, SC', 'St. John the Baptist, LA', 'Broomfield, CO*', 'Lewis and Clark, MT', 'Richland, SC', 'Jefferson, CO*', 'Doña Ana, NM', 'Adams, CO*', 'Weld, CO*', 'Boulder, CO*', 'DuPage, IL']

['Adams, CO', 'Broomfield, CO', 'Boulder, CO', 'Jefferson, CO', 'Weld, CO']

['Broomfield, CO*', 'Jefferson, CO*', 'Adams, CO*', 'Weld, CO*', 'Boulder, CO*']



In [170]:
#Correct the counties with such discrepancies as shown above and Make sure that the lengths of the lists of 
#individual counties match in each dataset
gdp['GeoName'] = np.where((gdp['GeoName'] == 'Adams, CO*'), 'Adams, CO', gdp['GeoName'])
gdp['GeoName'] = np.where((gdp['GeoName'] == 'Boulder, CO*'), 'Boulder, CO', gdp['GeoName'])
gdp['GeoName'] = np.where((gdp['GeoName'] == 'Broomfield, CO*'), 'Broomfield, CO', gdp['GeoName'])
gdp['GeoName'] = np.where((gdp['GeoName'] == 'Jefferson, CO*'), 'Jefferson, CO', gdp['GeoName'])
gdp['GeoName'] = np.where((gdp['GeoName'] == 'Weld, CO*'), 'Weld, CO', gdp['GeoName'])
vote['Area'] = np.where((vote['Area'] == 'Du Page, IL'), 'DuPage, IL', vote['Area'])
vote['Area'] = np.where((vote['Area'] == 'Greenville , SC'), 'Greenville, SC', vote['Area'])
vote['Area'] = np.where((vote['Area'] == 'St. John The Baptist, LA'), 'St. John the Baptist, LA', vote['Area'])
vote['Area'] = np.where((vote['Area'] == 'Richland , SC'), 'Richland, SC', vote['Area'])
vote['Area'] = np.where((vote['Area'] == 'Dona Ana, NM'), 'Doña Ana, NM', vote['Area'])
vote['Area'] = np.where((vote['Area'] == 'Lewis And Clark, MT'), 'Lewis and Clark, MT', vote['Area'])
print(len(vote.Area.unique()))
print()
print(len(inc.GeoName.unique()))
print()
print(len(gdp.GeoName.unique()))

3085

3085

3085


In [171]:
#List the unique industries under the Description column
list(gdp.Description.unique())

['All industry total',
 ' Private industries',
 '  Agriculture, forestry, fishing and hunting',
 '  Mining, quarrying, and oil and gas extraction',
 '  Utilities',
 '  Construction',
 '  Manufacturing',
 '   Durable goods manufacturing',
 '   Nondurable goods manufacturing',
 '  Wholesale trade',
 '  Retail trade',
 '  Transportation and warehousing',
 '  Information',
 '  Finance, insurance, real estate, rental, and leasing',
 '   Finance and insurance',
 '   Real estate and rental and leasing',
 '  Professional and business services',
 '   Professional, scientific, and technical services',
 '   Management of companies and enterprises',
 '   Administrative and support and waste management and remediation services',
 '  Educational services, health care, and social assistance',
 '   Educational services',
 '   Health care and social assistance',
 '  Arts, entertainment, recreation, accommodation, and food services',
 '   Arts, entertainment, and recreation',
 '   Accommodation and food

In [172]:
#Keep only the industries with broad descriptions, as these form the composition of each county's economy
gdp = gdp.loc[(gdp.Description == '  Utilities') | 
              (gdp.Description == '  Construction') | (gdp.Description == '  Manufacturing') | 
              (gdp.Description == '  Transportation and warehousing') | (gdp.Description == '  Information') | 
              (gdp.Description == '  Finance, insurance, real estate, rental, and leasing') | 
              (gdp.Description == '  Professional and business services') | 
              (gdp.Description == '  Educational services, health care, and social assistance') | 
              (gdp.Description == '  Arts, entertainment, recreation, accommodation, and food services') | 
              (gdp.Description == '  Other services (except government and government enterprises)') | 
              (gdp.Description == 'Government and government enterprises') |
              (gdp.Description == 'Natural resources and mining') | (gdp.Description == 'Trade')]

In [173]:
#Clean up the industries' descriptions (the Trade description is good as it is)
#gdp['Description'] = np.where((gdp.Description == 'All industry total'), 'Total GDP', gdp.Description)
gdp['Description'] = np.where((gdp.Description == '  Utilities'), 'Utilities', gdp.Description)
gdp['Description'] = np.where((gdp.Description == '  Construction'), 'Construction', gdp.Description)
gdp['Description'] = np.where((gdp.Description == '  Manufacturing'), 'Manufacturing', gdp.Description)
gdp['Description'] = np.where((gdp.Description == '  Transportation and warehousing'), 
                              'Transportation', gdp.Description)
gdp['Description'] = np.where((gdp.Description == '  Information'), 'Information', gdp.Description)
gdp['Description'] = np.where((gdp.Description == '  Finance, insurance, real estate, rental, and leasing'), 
                              'Finance', gdp.Description)
gdp['Description'] = np.where((gdp.Description == '  Professional and business services'), 
                              'Professional', gdp.Description)
gdp['Description'] = np.where((gdp.Description == '  Educational services, health care, and social assistance'), 
                              'Education and Health', gdp.Description)
gdp['Description'] = np.where((gdp.Description == '  Arts, entertainment, recreation, accommodation, and food services'), 
                              'Food and Recreation', gdp.Description)
gdp['Description'] = np.where((gdp.Description == '  Other services (except government and government enterprises)'), 
                              'Other', gdp.Description)
gdp['Description'] = np.where((gdp.Description == 'Government and government enterprises'),
                              'Government', gdp.Description)
gdp['Description'] = np.where((gdp.Description == 'Natural resources and mining'), 
                              'Natural Resources', gdp.Description)
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
38,"""01001""","Autauga, AL",Utilities,Thousands of dollars,81339,67098,111611,129539,106810,145888,214034,305643,443613,376088,377660,464229,456310,Alabama
39,"""01001""","Autauga, AL",Construction,Thousands of dollars,49238,45636,49644,68217,50474,44517,38084,44495,46135,44961,41907,44482,47297,Alabama
40,"""01001""","Autauga, AL",Manufacturing,Thousands of dollars,192625,207433,228140,208033,185294,204019,204926,190401,201070,191982,226805,251270,280563,Alabama
45,"""01001""","Autauga, AL",Transportation,Thousands of dollars,14354,16462,19876,20234,15024,11910,12332,14944,16466,15429,14597,16877,16124,Alabama
46,"""01001""","Autauga, AL",Information,Thousands of dollars,11902,11854,12418,11649,9022,6240,5567,5212,8921,13805,17050,17600,25814,Alabama


In [174]:
#Check the data types in gdp
gdp.dtypes

GeoFIPS        object
GeoName        object
Description    object
Unit           object
2004           object
2005           object
2006           object
2007           object
2008           object
2009           object
2010           object
2011           object
2012           object
2013           object
2014           object
2015           object
2016           object
State          object
dtype: object

In [175]:
#Check for any strange values under the year columns
print(gdp['2004'].sort_values().head())
print()
print(gdp['2005'].sort_values().head())
print()
print(gdp['2006'].sort_values().head())
print()
print(gdp['2007'].sort_values().head())
print()
print(gdp['2008'].sort_values().head())
print()
print(gdp['2009'].sort_values().head())
print()
print(gdp['2010'].sort_values().head())
print()
print(gdp['2011'].sort_values().head())
print()
print(gdp['2012'].sort_values().head())
print()
print(gdp['2013'].sort_values().head())
print()
print(gdp['2014'].sort_values().head())
print()
print(gdp['2015'].sort_values().head())
print()
print(gdp['2016'].sort_values().head())

106483    (D)
78578     (D)
24968     (D)
24967     (D)
78600     (D)
Name: 2004, dtype: object

106483    (D)
17232     (D)
57940     (D)
57964     (D)
57965     (D)
Name: 2005, dtype: object

106483    (D)
52366     (D)
52371     (D)
11418     (D)
52372     (D)
Name: 2006, dtype: object

106483    (D)
58049     (D)
58050     (D)
14325     (D)
58054     (D)
Name: 2007, dtype: object

106483    (D)
50646     (D)
50655     (D)
50791     (D)
50774     (D)
Name: 2008, dtype: object

73230    (D)
82189    (D)
82190    (D)
82198    (D)
82206    (D)
Name: 2009, dtype: object

53180    (D)
83805    (D)
83821    (D)
16850    (D)
16841    (D)
Name: 2010, dtype: object

24355    (D)
21482    (D)
81150    (D)
81153    (D)
21465    (D)
Name: 2011, dtype: object

106483    (D)
31104     (D)
92710     (D)
31122     (D)
70527     (D)
Name: 2012, dtype: object

106483    (D)
27138     (D)
27137     (D)
27118     (D)
27109     (D)
Name: 2013, dtype: object

53180    (D)
43973    (D)
43990    (D)
44075 

In [176]:
gdp.tail()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
106477,"""56045""","Weston, WY",Food and Recreation,Thousands of dollars,(D),(D),4840,(D),(D),5393,5129,4769,(D),(D),(D),(D),4416,Wyoming
106480,"""56045""","Weston, WY",Other,Thousands of dollars,3756,4068,4817,4825,4680,3940,3714,3382,4130,4381,4178,3793,3431,Wyoming
106481,"""56045""","Weston, WY",Government,Thousands of dollars,35767,36914,39838,44529,48209,49955,50468,51598,54887,58915,59527,60272,61697,Wyoming
106482,"""56045""","Weston, WY",Natural Resources,Thousands of dollars,27781,54443,55318,63958,76119,51045,42306,39764,35902,(D),51059,33323,27200,Wyoming
106483,"""56045""","Weston, WY",Trade,Thousands of dollars,(D),(D),(D),(D),(D),20620,21630,21257,(D),(D),(D),(D),(D),Wyoming


In [177]:
#Seeing that there are D's found in the data, indicating unreported figures, replace these with zeros by default, and
#then convert the year columns into floats and billions
gdp['2004'] = np.where(gdp['2004'].str.contains('D'), 0 , gdp['2004'])
gdp['2005'] = np.where(gdp['2005'].str.contains('D'), 0 , gdp['2005'])
gdp['2006'] = np.where(gdp['2006'].str.contains('D'), 0 , gdp['2006'])
gdp['2007'] = np.where(gdp['2007'].str.contains('D'), 0 , gdp['2007'])
gdp['2008'] = np.where(gdp['2008'].str.contains('D'), 0 , gdp['2008'])
gdp['2009'] = np.where(gdp['2009'].str.contains('D'), 0 , gdp['2009'])
gdp['2010'] = np.where(gdp['2010'].str.contains('D'), 0 , gdp['2010'])
gdp['2011'] = np.where(gdp['2011'].str.contains('D'), 0 , gdp['2011'])
gdp['2012'] = np.where(gdp['2012'].str.contains('D'), 0 , gdp['2012'])
gdp['2013'] = np.where(gdp['2013'].str.contains('D'), 0 , gdp['2013'])
gdp['2014'] = np.where(gdp['2014'].str.contains('D'), 0 , gdp['2014'])
gdp['2015'] = np.where(gdp['2015'].str.contains('D'), 0 , gdp['2015'])
gdp['2016'] = np.where(gdp['2016'].str.contains('D'), 0 , gdp['2016'])
gdp['2004'] = gdp['2004'].astype(float)
gdp['2005'] = gdp['2005'].astype(float)
gdp['2006'] = gdp['2006'].astype(float)
gdp['2007'] = gdp['2007'].astype(float)
gdp['2008'] = gdp['2008'].astype(float)
gdp['2009'] = gdp['2009'].astype(float)
gdp['2010'] = gdp['2010'].astype(float)
gdp['2011'] = gdp['2011'].astype(float)
gdp['2012'] = gdp['2012'].astype(float)
gdp['2013'] = gdp['2013'].astype(float)
gdp['2014'] = gdp['2014'].astype(float)
gdp['2015'] = gdp['2015'].astype(float)
gdp['2016'] = gdp['2016'].astype(float)
gdp.iloc[:,4:17] = gdp.iloc[:,4:17].apply(lambda x: x*1000/(1*10**9))
gdp['Unit'] = str('Billions of dollars')
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
38,"""01001""","Autauga, AL",Utilities,Billions of dollars,0.081339,0.067098,0.111611,0.129539,0.10681,0.145888,0.214034,0.305643,0.443613,0.376088,0.37766,0.464229,0.45631,Alabama
39,"""01001""","Autauga, AL",Construction,Billions of dollars,0.049238,0.045636,0.049644,0.068217,0.050474,0.044517,0.038084,0.044495,0.046135,0.044961,0.041907,0.044482,0.047297,Alabama
40,"""01001""","Autauga, AL",Manufacturing,Billions of dollars,0.192625,0.207433,0.22814,0.208033,0.185294,0.204019,0.204926,0.190401,0.20107,0.191982,0.226805,0.25127,0.280563,Alabama
45,"""01001""","Autauga, AL",Transportation,Billions of dollars,0.014354,0.016462,0.019876,0.020234,0.015024,0.01191,0.012332,0.014944,0.016466,0.015429,0.014597,0.016877,0.016124,Alabama
46,"""01001""","Autauga, AL",Information,Billions of dollars,0.011902,0.011854,0.012418,0.011649,0.009022,0.00624,0.005567,0.005212,0.008921,0.013805,0.01705,0.0176,0.025814,Alabama


In [178]:
#Make a temporary data frame that groups the GeoFIPS codes and counties together by the sums of all the industries
#listed
total_gdp = gdp.groupby(['GeoFIPS','GeoName'])['2004','2005','2006','2007','2008','2009','2010','2011',
                                               '2012','2013','2014','2015','2016'].sum()
total_gdp['Description'] = str('Total GDP')
total_gdp['Unit'] = str('Billions of dollars')
total_gdp = pd.DataFrame(total_gdp)
total_gdp

Unnamed: 0_level_0,Unnamed: 1_level_0,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,Description,Unit
GeoFIPS,GeoName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
"""01001""","Autauga, AL",0.946963,0.969849,1.080319,1.136582,1.043680,1.124581,1.201882,1.316843,1.487128,1.457700,1.503624,1.660498,1.716235,Total GDP,Billions of dollars
"""01003""","Baldwin, AL",3.927737,4.473373,4.369740,4.834320,4.646661,4.520188,4.705936,4.807109,5.051126,5.306440,5.512316,5.851579,6.183518,Total GDP,Billions of dollars
"""01005""","Barbour, AL",0.681321,0.672010,0.761096,0.672612,0.569649,0.564304,0.551170,0.637092,0.636427,0.740815,0.695952,0.697792,0.673165,Total GDP,Billions of dollars
"""01007""","Bibb, AL",0.239625,0.241001,0.269903,0.294965,0.281890,0.295128,0.322094,0.356409,0.319041,0.351260,0.363125,0.345204,0.347471,Total GDP,Billions of dollars
"""01009""","Blount, AL",0.504418,0.524833,0.572324,0.727791,0.727683,0.728032,0.724063,0.702790,0.776375,0.807416,0.793163,0.840102,0.800812,Total GDP,Billions of dollars
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"""56037""","Sweetwater, WY",1.061871,2.632418,3.243595,3.562393,1.713040,1.680884,1.711371,1.740426,1.837878,1.856389,1.821377,1.856552,1.889661,Total GDP,Billions of dollars
"""56039""","Teton, WY",1.182082,1.282137,1.362242,1.435217,1.493768,1.514109,1.499874,1.501772,1.637638,1.686538,1.842657,1.964749,2.051707,Total GDP,Billions of dollars
"""56041""","Uinta, WY",0.818684,0.973575,1.116528,1.189325,1.364746,1.077439,1.107394,1.033281,1.000607,0.997536,0.989476,0.910614,0.893873,Total GDP,Billions of dollars
"""56043""","Washakie, WY",0.249556,0.197730,0.289841,0.279983,0.328551,0.355975,0.356518,0.392372,0.370922,0.367674,0.399819,0.400259,0.363856,Total GDP,Billions of dollars


In [179]:
#Reset the index for the new data frame before concatenating it to gdp
total_gdp = total_gdp.reset_index()
gdp = pd.concat([gdp, total_gdp])
gdp.tail()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
3080,"""56037""","Sweetwater, WY",Total GDP,Billions of dollars,1.061871,2.632418,3.243595,3.562393,1.71304,1.680884,1.711371,1.740426,1.837878,1.856389,1.821377,1.856552,1.889661,
3081,"""56039""","Teton, WY",Total GDP,Billions of dollars,1.182082,1.282137,1.362242,1.435217,1.493768,1.514109,1.499874,1.501772,1.637638,1.686538,1.842657,1.964749,2.051707,
3082,"""56041""","Uinta, WY",Total GDP,Billions of dollars,0.818684,0.973575,1.116528,1.189325,1.364746,1.077439,1.107394,1.033281,1.000607,0.997536,0.989476,0.910614,0.893873,
3083,"""56043""","Washakie, WY",Total GDP,Billions of dollars,0.249556,0.19773,0.289841,0.279983,0.328551,0.355975,0.356518,0.392372,0.370922,0.367674,0.399819,0.400259,0.363856,
3084,"""56045""","Weston, WY",Total GDP,Billions of dollars,0.179308,0.147899,0.162386,0.375698,0.417122,0.253881,0.259986,0.275096,0.295615,0.152571,0.257657,0.275543,0.225217,


In [180]:
#Fill the null rows under the State column with the appropriate name
gdp['State'] = np.where((gdp['GeoName'].str.contains('AL')), 'Alabama', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'Alaska'), 'Alaska', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('AZ')), 'Arizona', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('AR')), 'Arkansas', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('CA')), 'California', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('CO')), 'Colorado', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('CT')), 'Connecticut', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('DE')), 'Delaware', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'] == 'District of Columbia'), 'District of Columbia', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('FL')), 'Florida', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('GA')), 'Georgia', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('HI')), 'Hawaii', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('ID')), 'Idaho', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('IL')), 'Illinois', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('IN')), 'Indiana', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('IA')), 'Iowa', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('KS')), 'Kansas', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('KY')), 'Kentucky', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('LA')), 'Louisiana', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('ME')), 'Maine', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('MD')), 'Maryland', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('MA')), 'Massachusetts', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('MI')), 'Michigan', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('MN')), 'Minnesota', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('MS')), 'Mississippi', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('MO')), 'Missouri', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('MT')), 'Montana', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('NE')), 'Nebraska', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('NV')), 'Nevada', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('NH')), 'New Hampshire', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('NJ')), 'New Jersey', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('NM')), 'New Mexico', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('NY')), 'New York', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('NC')), 'North Carolina', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('ND')), 'North Dakota', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('OH')), 'Ohio', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('OK')), 'Oklahoma', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('OR')), 'Oregon', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('PA')), 'Pennsylvania', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('RI')), 'Rhode Island', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('SC')), 'South Carolina', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('SD')), 'South Dakota', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('TN')), 'Tennessee', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('TX')), 'Texas', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('UT')), 'Utah', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('VT')), 'Vermont', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('VA')), 'Virginia', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('WA')), 'Washington', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('WV')), 'West Virginia', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('WI')), 'Wisconsin', gdp['State'])
gdp['State'] = np.where((gdp['GeoName'].str.contains('WY')), 'Wyoming', gdp['State'])
gdp = gdp.sort_values(by=['State','GeoName'])
gdp.tail()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
106480,"""56045""","Weston, WY",Other,Billions of dollars,0.003756,0.004068,0.004817,0.004825,0.00468,0.00394,0.003714,0.003382,0.00413,0.004381,0.004178,0.003793,0.003431,Wyoming
106481,"""56045""","Weston, WY",Government,Billions of dollars,0.035767,0.036914,0.039838,0.044529,0.048209,0.049955,0.050468,0.051598,0.054887,0.058915,0.059527,0.060272,0.061697,Wyoming
106482,"""56045""","Weston, WY",Natural Resources,Billions of dollars,0.027781,0.054443,0.055318,0.063958,0.076119,0.051045,0.042306,0.039764,0.035902,0.0,0.051059,0.033323,0.0272,Wyoming
106483,"""56045""","Weston, WY",Trade,Billions of dollars,0.0,0.0,0.0,0.0,0.0,0.02062,0.02163,0.021257,0.0,0.0,0.0,0.0,0.0,Wyoming
3084,"""56045""","Weston, WY",Total GDP,Billions of dollars,0.179308,0.147899,0.162386,0.375698,0.417122,0.253881,0.259986,0.275096,0.295615,0.152571,0.257657,0.275543,0.225217,Wyoming


In [181]:
#Test if any counties voted for a third party by a plurality
list(vote.PluralityParty.unique())

['R', 'D']

In [182]:
elec.head()

Unnamed: 0,Year,Area,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
0,2004,Alabama,9,0,0
1,2004,Alaska,3,0,0
2,2004,Arizona,10,0,0
3,2004,Arkansas,6,0,0
4,2004,California,0,55,0


In [183]:
#Loop over each of the states to test whether (and when if so) their number of counties varies in vote
for state in states:
    print(str(state), str(' - '),
          (len(list(vote.loc[(vote.State == state) & (vote.Year == 2004)].Area.unique()))),
          (len(list(vote.loc[(vote.State == state) & (vote.Year == 2008)].Area.unique()))),
          (len(list(vote.loc[(vote.State == state) & (vote.Year == 2012)].Area.unique()))),
          (len(list(vote.loc[(vote.State == state) & (vote.Year == 2016)].Area.unique()))))
    print()

Alabama  -  67 67 67 67

Alaska  -  1 1 1 1

Arizona  -  15 15 15 15

Arkansas  -  75 75 75 75

California  -  58 58 58 58

Colorado  -  64 64 64 64

Connecticut  -  8 8 8 8

Delaware  -  3 3 3 3

District of Columbia  -  1 1 1 1

Florida  -  67 67 67 67

Georgia  -  159 159 159 159

Hawaii  -  4 4 4 4

Idaho  -  44 44 44 44

Illinois  -  102 102 102 102

Indiana  -  92 92 92 92

Iowa  -  99 99 99 99

Kansas  -  105 105 105 105

Kentucky  -  120 120 120 120

Louisiana  -  64 64 64 64

Maine  -  16 16 16 16

Maryland  -  24 24 24 24

Massachusetts  -  14 14 14 14

Michigan  -  83 83 83 83

Minnesota  -  87 87 87 87

Mississippi  -  82 82 82 82

Missouri  -  115 115 115 115

Montana  -  56 56 56 56

Nebraska  -  93 93 93 93

Nevada  -  17 17 17 17

New Hampshire  -  10 10 10 10

New Jersey  -  21 21 21 21

New Mexico  -  33 33 33 33

New York  -  62 62 62 62

North Carolina  -  100 100 100 100

North Dakota  -  53 53 53 53

Ohio  -  88 88 88 88

Oklahoma  -  77 77 77 77

Oregon  -  36 36

In [184]:
#Loop over each of the states to confirm that the number of counties for each state matches with inc, taking into
#account new or defunct counties over time
for state in states:
    print(str(state), str(' - '),
          (len(list(inc.loc[(inc.State == state)].GeoName.unique()))))
    print()

Alabama  -  67

Alaska  -  1

Arizona  -  15

Arkansas  -  75

California  -  58

Colorado  -  64

Connecticut  -  8

Delaware  -  3

District of Columbia  -  1

Florida  -  67

Georgia  -  159

Hawaii  -  4

Idaho  -  44

Illinois  -  102

Indiana  -  92

Iowa  -  99

Kansas  -  105

Kentucky  -  120

Louisiana  -  64

Maine  -  16

Maryland  -  24

Massachusetts  -  14

Michigan  -  83

Minnesota  -  87

Mississippi  -  82

Missouri  -  115

Montana  -  56

Nebraska  -  93

Nevada  -  17

New Hampshire  -  10

New Jersey  -  21

New Mexico  -  33

New York  -  62

North Carolina  -  100

North Dakota  -  53

Ohio  -  88

Oklahoma  -  77

Oregon  -  36

Pennsylvania  -  67

Rhode Island  -  5

South Carolina  -  46

South Dakota  -  66

Tennessee  -  95

Texas  -  254

Utah  -  29

Vermont  -  14

Virginia  -  105

Washington  -  39

West Virginia  -  55

Wisconsin  -  72

Wyoming  -  23



In [185]:
#For each state in the next couple of cells, slice from elec the rows corresponding to that state and for each year 
#replicate its rows to whatever number of counties it has (except for Alaska, the District of Columbia, and Hawaii).
#States that saw shifts in their number of counties will be attended to later
al_e = elec.loc[elec.Area == 'Alabama']
al_e = pd.DataFrame(np.repeat(al_e.values, 67, axis=0), columns=al_e.columns)
az_e = elec.loc[elec.Area == 'Arizona']
az_e = pd.DataFrame(np.repeat(az_e.values, 15, axis=0), columns=az_e.columns)
ar_e = elec.loc[elec.Area == 'Arkansas']
ar_e = pd.DataFrame(np.repeat(ar_e.values, 75, axis=0), columns=ar_e.columns)
ca_e = elec.loc[elec.Area == 'California']
ca_e = pd.DataFrame(np.repeat(ca_e.values, 58, axis=0), columns=ca_e.columns)
co_e = elec.loc[elec.Area == 'Colorado']
co_e = pd.DataFrame(np.repeat(co_e.values, 64, axis=0), columns=co_e.columns)
ct_e = elec.loc[elec.Area == 'Connecticut']
ct_e = pd.DataFrame(np.repeat(ct_e.values, 8, axis=0), columns=ct_e.columns)
de_e = elec.loc[elec.Area == 'Delaware']
de_e = pd.DataFrame(np.repeat(de_e.values, 3, axis=0), columns=de_e.columns)
fl_e = elec.loc[elec.Area == 'Florida']
fl_e = pd.DataFrame(np.repeat(fl_e.values, 67, axis=0), columns=fl_e.columns)
ga_e = elec.loc[elec.Area == 'Georgia']
ga_e = pd.DataFrame(np.repeat(ga_e.values, 159, axis=0), columns=ga_e.columns)
hi_e = elec.loc[elec.Area == 'Hawaii']
hi_e = pd.DataFrame(np.repeat(hi_e.values, 4, axis=0), columns=hi_e.columns)
id_e = elec.loc[elec.Area == 'Idaho']
id_e = pd.DataFrame(np.repeat(id_e.values, 44, axis=0), columns=id_e.columns)
il_e = elec.loc[elec.Area == 'Illinois']
il_e = pd.DataFrame(np.repeat(il_e.values, 102, axis=0), columns=il_e.columns)
in_e = elec.loc[elec.Area == 'Indiana']
in_e = pd.DataFrame(np.repeat(in_e.values, 92, axis=0), columns=in_e.columns)
ia_e = elec.loc[elec.Area == 'Iowa']
ia_e = pd.DataFrame(np.repeat(ia_e.values, 99, axis=0), columns=ia_e.columns)
ks_e = elec.loc[elec.Area == 'Kansas']
ks_e = pd.DataFrame(np.repeat(ks_e.values, 105, axis=0), columns=ks_e.columns)
ky_e = elec.loc[elec.Area == 'Kentucky']
ky_e = pd.DataFrame(np.repeat(ky_e.values, 120, axis=0), columns=ky_e.columns)
la_e = elec.loc[elec.Area == 'Louisiana']
la_e = pd.DataFrame(np.repeat(la_e.values, 64, axis=0), columns=la_e.columns)
me_e = elec.loc[elec.Area == 'Maine']
me_e = pd.DataFrame(np.repeat(me_e.values, 16, axis=0), columns=me_e.columns)
md_e = elec.loc[elec.Area == 'Maryland']
md_e = pd.DataFrame(np.repeat(md_e.values, 24, axis=0), columns=md_e.columns)
ma_e = elec.loc[elec.Area == 'Massachusetts']
ma_e = pd.DataFrame(np.repeat(ma_e.values, 14, axis=0), columns=ma_e.columns)
mi_e = elec.loc[elec.Area == 'Michigan']
mi_e = pd.DataFrame(np.repeat(mi_e.values, 83, axis=0), columns=mi_e.columns)
mn_e = elec.loc[elec.Area == 'Minnesota']
mn_e = pd.DataFrame(np.repeat(mn_e.values, 87, axis=0), columns=mn_e.columns)
ms_e = elec.loc[elec.Area == 'Mississippi']
ms_e = pd.DataFrame(np.repeat(ms_e.values, 82, axis=0), columns=ms_e.columns)
mo_e = elec.loc[elec.Area == 'Missouri']
mo_e = pd.DataFrame(np.repeat(mo_e.values, 115, axis=0), columns=mo_e.columns)
mt_e = elec.loc[elec.Area == 'Montana']
mt_e = pd.DataFrame(np.repeat(mt_e.values, 56, axis=0), columns=mt_e.columns)
ne_e = elec.loc[elec.Area == 'Nebraska']
ne_e = pd.DataFrame(np.repeat(ne_e.values, 93, axis=0), columns=ne_e.columns)
nv_e = elec.loc[elec.Area == 'Nevada']
nv_e = pd.DataFrame(np.repeat(nv_e.values, 17, axis=0), columns=nv_e.columns)
nh_e = elec.loc[elec.Area == 'New Hampshire']
nh_e = pd.DataFrame(np.repeat(nh_e.values, 10, axis=0), columns=nh_e.columns)
nj_e = elec.loc[elec.Area == 'New Jersey']
nj_e = pd.DataFrame(np.repeat(nj_e.values, 21, axis=0), columns=nj_e.columns)
nm_e = elec.loc[elec.Area == 'New Mexico']
nm_e = pd.DataFrame(np.repeat(nm_e.values, 33, axis=0), columns=nm_e.columns)
ny_e = elec.loc[elec.Area == 'New York']
ny_e = pd.DataFrame(np.repeat(ny_e.values, 62, axis=0), columns=ny_e.columns)
nc_e = elec.loc[elec.Area == 'North Carolina']
nc_e = pd.DataFrame(np.repeat(nc_e.values, 100, axis=0), columns=nc_e.columns)
nd_e = elec.loc[elec.Area == 'North Dakota']
nd_e = pd.DataFrame(np.repeat(nd_e.values, 53, axis=0), columns=nd_e.columns)
oh_e = elec.loc[elec.Area == 'Ohio']
oh_e = pd.DataFrame(np.repeat(oh_e.values, 88, axis=0), columns=oh_e.columns)
ok_e = elec.loc[elec.Area == 'Oklahoma']
ok_e = pd.DataFrame(np.repeat(ok_e.values, 77, axis=0), columns=ok_e.columns)
or_e = elec.loc[elec.Area == 'Oregon']
or_e = pd.DataFrame(np.repeat(or_e.values, 36, axis=0), columns=or_e.columns)
pa_e = elec.loc[elec.Area == 'Pennsylvania']
pa_e = pd.DataFrame(np.repeat(pa_e.values, 67, axis=0), columns=pa_e.columns)
ri_e = elec.loc[elec.Area == 'Rhode Island']
ri_e = pd.DataFrame(np.repeat(ri_e.values, 5, axis=0), columns=ri_e.columns)
sc_e = elec.loc[elec.Area == 'South Carolina']
sc_e = pd.DataFrame(np.repeat(sc_e.values, 46, axis=0), columns=sc_e.columns)
sd_e = elec.loc[elec.Area == 'South Dakota']
sd_e = pd.DataFrame(np.repeat(sd_e.values, 66, axis=0), columns=sd_e.columns)
tn_e = elec.loc[elec.Area == 'Tennessee']
tn_e = pd.DataFrame(np.repeat(tn_e.values, 95, axis=0), columns=tn_e.columns)
tx_e = elec.loc[elec.Area == 'Texas']
tx_e = pd.DataFrame(np.repeat(tx_e.values, 254, axis=0), columns=tx_e.columns)
ut_e = elec.loc[elec.Area == 'Utah']
ut_e = pd.DataFrame(np.repeat(ut_e.values, 29, axis=0), columns=ut_e.columns)
vt_e = elec.loc[elec.Area == 'Vermont']
vt_e = pd.DataFrame(np.repeat(vt_e.values, 14, axis=0), columns=vt_e.columns)
va_e = elec.loc[elec.Area == 'Virginia']
va_e = pd.DataFrame(np.repeat(va_e.values, 105, axis=0), columns=va_e.columns)
wa_e = elec.loc[elec.Area == 'Washington']
wa_e = pd.DataFrame(np.repeat(wa_e.values, 39, axis=0), columns=wa_e.columns)
wv_e = elec.loc[elec.Area == 'West Virginia']
wv_e = pd.DataFrame(np.repeat(wv_e.values, 55, axis=0), columns=wv_e.columns)
wi_e = elec.loc[elec.Area == 'Wisconsin']
wi_e = pd.DataFrame(np.repeat(wi_e.values, 72, axis=0), columns=wi_e.columns)
wy_e = elec.loc[elec.Area == 'Wyoming']
wy_e = pd.DataFrame(np.repeat(wy_e.values, 23, axis=0), columns=wy_e.columns)

In [186]:
hi_e

Unnamed: 0,Year,Area,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
0,2004,Hawaii,0,4,0
1,2004,Hawaii,0,4,0
2,2004,Hawaii,0,4,0
3,2004,Hawaii,0,4,0
4,2008,Hawaii,0,4,0
5,2008,Hawaii,0,4,0
6,2008,Hawaii,0,4,0
7,2008,Hawaii,0,4,0
8,2012,Hawaii,0,4,0
9,2012,Hawaii,0,4,0


In [187]:
elec.dtypes

Year                    int64
Area                   object
ElectoralRepVotes       int64
ElectoralDemVotes       int64
ElectoralOtherVotes     int64
dtype: object

In [188]:
#Slice elec to include only Alaska and the District of Columbia before concatenating the other states in
elec = elec.loc[(elec.Area == 'Alaska') | (elec.Area == 'District of Columbia')]
elec = pd.concat([elec,al_e,az_e,ar_e,ca_e,co_e,ct_e,de_e,fl_e,ga_e,
                  hi_e,id_e,il_e,in_e,ia_e,ks_e,ky_e,la_e,me_e,md_e,
                  ma_e,mi_e,mn_e,ms_e,mo_e,mt_e,ne_e,nv_e,nh_e,nj_e,
                  nm_e,ny_e,nc_e,nd_e,oh_e,ok_e,or_e,pa_e,ri_e,sc_e,
                  sd_e,tn_e,tx_e,ut_e,vt_e,va_e,wa_e,wv_e,wi_e,wy_e])
elec = elec.sort_values(by=['Year','Area'])
elec.tail()

Unnamed: 0,Year,Area,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
87,2016,Wyoming,3,0,0
88,2016,Wyoming,3,0,0
89,2016,Wyoming,3,0,0
90,2016,Wyoming,3,0,0
91,2016,Wyoming,3,0,0


In [189]:
#Make sure that all of the states have been added in to elec and confirm that elec and vote have the same number of
#rows
print(len(list(elec.Area.unique())))
print()
print(elec.shape)
print()
print(vote.shape)

51

(12340, 5)

(12340, 14)


In [190]:
#Reset the indices for vote and elec before concatenating the last three columns of the latter to the former
vote = vote.reset_index(drop=True)
elec = elec.reset_index(drop=True)
vote = pd.concat([vote, elec.iloc[:,2:]], axis=1)
print(vote.shape)
print()
vote.head()

(12340, 17)



Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
0,2004,Alabama,"Autauga, AL",20081,15196,"Bush, George W.",4758,"Kerry, John F.",127,10438,R,75.67,23.69,0.63,9,0,0
1,2004,Alabama,"Baldwin, AL",69320,52971,"Bush, George W.",15599,"Kerry, John F.",750,37372,R,76.42,22.5,1.08,9,0,0
2,2004,Alabama,"Barbour, AL",10777,5899,"Bush, George W.",4832,"Kerry, John F.",46,1067,R,54.74,44.84,0.43,9,0,0
3,2004,Alabama,"Bibb, AL",7600,5472,"Bush, George W.",2089,"Kerry, John F.",39,3383,R,72.0,27.49,0.51,9,0,0
4,2004,Alabama,"Blount, AL",21504,17386,"Bush, George W.",3938,"Kerry, John F.",180,13448,R,80.85,18.31,0.84,9,0,0


In [191]:
inc.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
3,"""01001""","Autauga, AL",Personal income (thousands of dollars),Thousands of dollars,1332117,1429633,1528773,1649284,1750849,1764260,1826597,1896347,1927064,1944878,2019288,2131416,2203281,Alabama
4,"""01001""","Autauga, AL",Population (persons),Number of persons,48366,49676,51328,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243,Alabama
5,"""01001""","Autauga, AL",Per capita personal income (dollars),Dollars,27542,28779,29784,31472,32863,32590,33348,34337,35067,35538,36786,38849,39883,Alabama
6,"""01003""","Baldwin, AL",Personal income (thousands of dollars),Thousands of dollars,4744351,5243158,5863953,6222347,6280045,6218741,6618292,7067055,7274734,7448839,7887975,8444952,8956578,Alabama
7,"""01003""","Baldwin, AL",Population (persons),Number of persons,156266,162183,168121,172404,175827,179406,183112,186558,190145,194885,199183,202939,207601,Alabama


In [192]:
#For the purposes of testing the relationship between income and party preference, slice out the aggregate income data
#from inc and keep only the per-capita figures
inc = inc.loc[~(inc.Description.str.contains('thousands'))]
inc = inc.reset_index(drop=True)
inc.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
0,"""01001""","Autauga, AL",Population (persons),Number of persons,48366,49676,51328,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243,Alabama
1,"""01001""","Autauga, AL",Per capita personal income (dollars),Dollars,27542,28779,29784,31472,32863,32590,33348,34337,35067,35538,36786,38849,39883,Alabama
2,"""01003""","Baldwin, AL",Population (persons),Number of persons,156266,162183,168121,172404,175827,179406,183112,186558,190145,194885,199183,202939,207601,Alabama
3,"""01003""","Baldwin, AL",Per capita personal income (dollars),Dollars,30361,32329,34879,36092,35717,34663,36143,37881,38259,38222,39602,41613,43143,Alabama
4,"""01005""","Barbour, AL",Population (persons),Number of persons,28287,28027,27861,27757,27808,27657,27327,27341,27169,26937,26755,26283,25806,Alabama


In [193]:
#As a convenience, set the GeoName and State columns next to each other in inc and gdp, and update 
inc = inc.reindex(columns = ['GeoFIPS','State','GeoName','Description','Unit','2004','2005','2006','2007','2008',
                             '2009','2010','2011','2012','2013','2014','2015','2016'])
inc = inc.reindex(columns = ['GeoFIPS','State','GeoName','Description','Unit','2004','2005','2006','2007','2008',
                             '2009','2010','2011','2012','2013','2014','2015','2016'])

In [194]:
#Calculate the absolute and relative changes of the three variables from between the years in inc
inc['Total_Ab'] = inc['2016']-inc['2004']
inc['Total_Re'] = (inc['Total_Ab']/inc['2004'])*100
inc.head()

Unnamed: 0,GeoFIPS,State,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,Total_Ab,Total_Re
0,"""01001""",Alabama,"Autauga, AL",Population (persons),Number of persons,48366,49676,51328,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243,6877,14.218666
1,"""01001""",Alabama,"Autauga, AL",Per capita personal income (dollars),Dollars,27542,28779,29784,31472,32863,32590,33348,34337,35067,35538,36786,38849,39883,12341,44.80793
2,"""01003""",Alabama,"Baldwin, AL",Population (persons),Number of persons,156266,162183,168121,172404,175827,179406,183112,186558,190145,194885,199183,202939,207601,51335,32.851036
3,"""01003""",Alabama,"Baldwin, AL",Per capita personal income (dollars),Dollars,30361,32329,34879,36092,35717,34663,36143,37881,38259,38222,39602,41613,43143,12782,42.100063
4,"""01005""",Alabama,"Barbour, AL",Population (persons),Number of persons,28287,28027,27861,27757,27808,27657,27327,27341,27169,26937,26755,26283,25806,-2481,-8.770813


In [195]:
#Some counties are missing votes because they were previously categorized alongside names under the Area column that
#didn't refer to themselves.  To correct these vote totals, start off by slicing Rhode Island in 2016, and use
#Wikipedia's corresponding article as a reference to make the necessary adjustments
ri16 = vote.loc[(vote.State == 'Rhode Island') & (vote.Year == 2016)]
ri16

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
11537,2016,Rhode Island,"Bristol, RI",25475,8965,"Trump, Donald J.",14609,"Clinton, Hillary Rodham",1901,5644,D,35.19,57.35,7.46,0,4,0
11538,2016,Rhode Island,"Kent, RI",81453,37736,"Trump, Donald J.",37788,"Clinton, Hillary Rodham",5929,52,D,46.33,46.39,7.28,0,4,0
11539,2016,Rhode Island,"Newport, RI",41045,15077,"Trump, Donald J.",22851,"Clinton, Hillary Rodham",3117,7774,D,36.73,55.67,7.6,0,4,0
11540,2016,Rhode Island,"Providence, RI",248452,90881,"Trump, Donald J.",142879,"Clinton, Hillary Rodham",14692,51998,D,36.58,57.51,5.92,0,4,0
11541,2016,Rhode Island,"Washington, RI",66369,27230,"Trump, Donald J.",33741,"Clinton, Hillary Rodham",5398,6511,D,41.03,50.84,8.13,0,4,0


In [196]:
vote['RepVotes'] = np.where(((vote.Area == 'Kent, RI') & (vote.Year == 2016)), 38336, vote['RepVotes'])

vote['RepVotes'] = np.where(((vote.Area == 'Providence, RI') & (vote.Year == 2016)), 90210, vote['RepVotes'])
vote['DemVotes'] = np.where(((vote.Area == 'Providence, RI') & (vote.Year == 2016)), 143571, vote['DemVotes'])
vote['ThirdVotes'] = np.where(((vote.Area == 'Providence, RI') & (vote.Year == 2016)), 14693, vote['ThirdVotes'])

vote.loc[(vote.State == 'Rhode Island') & (vote.Year == 2016)]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
11537,2016,Rhode Island,"Bristol, RI",25475,8965,"Trump, Donald J.",14609,"Clinton, Hillary Rodham",1901,5644,D,35.19,57.35,7.46,0,4,0
11538,2016,Rhode Island,"Kent, RI",81453,38336,"Trump, Donald J.",37788,"Clinton, Hillary Rodham",5929,52,D,46.33,46.39,7.28,0,4,0
11539,2016,Rhode Island,"Newport, RI",41045,15077,"Trump, Donald J.",22851,"Clinton, Hillary Rodham",3117,7774,D,36.73,55.67,7.6,0,4,0
11540,2016,Rhode Island,"Providence, RI",248452,90210,"Trump, Donald J.",143571,"Clinton, Hillary Rodham",14693,51998,D,36.58,57.51,5.92,0,4,0
11541,2016,Rhode Island,"Washington, RI",66369,27230,"Trump, Donald J.",33741,"Clinton, Hillary Rodham",5398,6511,D,41.03,50.84,8.13,0,4,0


In [197]:
#Slice New York in 2016
ny16 = vote.loc[(vote.State == 'New York') & (vote.Year == 2016)]
ny16.iloc[10:20]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
11064,2016,New York,"Columbia, NY",30902,13756,"Trump, Donald J.",15284,"Clinton, Hillary Rodham",1862,1528,D,44.51,49.46,6.02,0,29,0
11065,2016,New York,"Cortland, NY",20244,9900,"Trump, Donald J.",8771,"Clinton, Hillary Rodham",1573,1129,R,48.9,43.33,7.77,0,29,0
11066,2016,New York,"Delaware, NY",19792,11942,"Trump, Donald J.",6627,"Clinton, Hillary Rodham",1223,5315,R,60.34,33.48,6.18,0,29,0
11067,2016,New York,"Dutchess, NY",130934,61797,"Trump, Donald J.",62261,"Clinton, Hillary Rodham",6876,464,D,47.2,47.55,5.25,0,29,0
11068,2016,New York,"Erie, NY",423625,188303,"Trump, Donald J.",215456,"Clinton, Hillary Rodham",19866,27153,D,44.45,50.86,4.69,0,29,0
11069,2016,New York,"Essex, NY",17218,7958,"Trump, Donald J.",7762,"Clinton, Hillary Rodham",1498,196,R,46.22,45.08,8.7,0,29,0
11070,2016,New York,"Franklin, NY",16952,8221,"Trump, Donald J.",7297,"Clinton, Hillary Rodham",1434,924,R,48.5,43.05,8.46,0,29,0
11071,2016,New York,"Fulton, NY",21214,13462,"Trump, Donald J.",6496,"Clinton, Hillary Rodham",1256,6966,R,63.46,30.62,5.92,0,29,0
11072,2016,New York,"Genesee, NY",26432,16915,"Trump, Donald J.",7650,"Clinton, Hillary Rodham",1867,9265,R,63.99,28.94,7.06,0,29,0
11073,2016,New York,"Greene, NY",22050,13073,"Trump, Donald J.",7405,"Clinton, Hillary Rodham",1572,5668,R,59.29,33.58,7.13,0,29,0


In [198]:
vote['RepVotes'] = np.where(((vote.Area == 'Dutchess, NY') & (vote.Year == 2016)), 61821, vote['RepVotes'])
vote['DemVotes'] = np.where(((vote.Area == 'Dutchess, NY') & (vote.Year == 2016)), 62285, vote['DemVotes'])
vote['ThirdVotes'] = np.where(((vote.Area == 'Dutchess, NY') & (vote.Year == 2016)), 6912, vote['ThirdVotes'])

vote.loc[(vote.State == 'New York') & (vote.Year == 2016) & (vote.Area.str.contains('D'))]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
11066,2016,New York,"Delaware, NY",19792,11942,"Trump, Donald J.",6627,"Clinton, Hillary Rodham",1223,5315,R,60.34,33.48,6.18,0,29,0
11067,2016,New York,"Dutchess, NY",130934,61821,"Trump, Donald J.",62285,"Clinton, Hillary Rodham",6912,464,D,47.2,47.55,5.25,0,29,0


In [199]:
#Slice Ohio in 2012
oh12 = vote.loc[(vote.State == 'Ohio') & (vote.Year == 2012)]
oh12.iloc[10:20]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
8194,2012,Ohio,"Champaign, OH",18421,11045,"Romney, W. Mitt",7044,"Obama, Barack H.",332,4001,R,59.96,38.24,1.8,0,18,0
8195,2012,Ohio,"Clark, OH",64144,31816,"Romney, W. Mitt",31298,"Obama, Barack H.",1030,518,R,49.6,48.79,1.6,0,18,0
8196,2012,Ohio,"Clermont, OH",96271,64208,"Romney, W. Mitt",30458,"Obama, Barack H.",1605,33750,R,66.7,31.64,1.67,0,18,0
8197,2012,Ohio,"Clinton, OH",18143,12009,"Romney, W. Mitt",5791,"Obama, Barack H.",343,6218,R,66.19,31.92,1.89,0,18,0
8198,2012,Ohio,"Columbiana, OH",46051,25251,"Romney, W. Mitt",19821,"Obama, Barack H.",979,5430,R,54.83,43.04,2.12,0,18,0
8199,2012,Ohio,"Coshocton, OH",15739,8390,"Romney, W. Mitt",6940,"Obama, Barack H.",409,1450,R,53.31,44.09,2.6,0,18,0
8200,2012,Ohio,"Crawford, OH",19803,11852,"Romney, W. Mitt",7507,"Obama, Barack H.",444,4345,R,59.85,37.91,2.24,0,18,0
8201,2012,Ohio,"Cuyahoga, OH",644331,190660,"Romney, W. Mitt",447273,"Obama, Barack H.",6398,256613,D,29.59,69.42,1.0,0,18,0
8202,2012,Ohio,"Darke, OH",25401,18108,"Romney, W. Mitt",6826,"Obama, Barack H.",467,11282,R,71.29,26.87,1.83,0,18,0
8203,2012,Ohio,"Defiance, OH",18288,10176,"Romney, W. Mitt",7732,"Obama, Barack H.",380,2444,R,55.64,42.28,2.08,0,18,0


In [200]:
vote['RepVotes'] = np.where(((vote.Area == 'Clark, OH') & (vote.Year == 2012)), 31820, vote['RepVotes'])
vote['DemVotes'] = np.where(((vote.Area == 'Clark, OH') & (vote.Year == 2012)), 31297, vote['DemVotes'])
vote['ThirdVotes'] = np.where(((vote.Area == 'Clark, OH') & (vote.Year == 2012)), 1184, vote['ThirdVotes'])

vote.loc[(vote.State == 'Ohio') & (vote.Year == 2012) & (vote.Area.str.contains('C'))]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
8193,2012,Ohio,"Carroll, OH",13262,7315,"Romney, W. Mitt",5543,"Obama, Barack H.",404,1772,R,55.16,41.8,3.04,0,18,0
8194,2012,Ohio,"Champaign, OH",18421,11045,"Romney, W. Mitt",7044,"Obama, Barack H.",332,4001,R,59.96,38.24,1.8,0,18,0
8195,2012,Ohio,"Clark, OH",64144,31820,"Romney, W. Mitt",31297,"Obama, Barack H.",1184,518,R,49.6,48.79,1.6,0,18,0
8196,2012,Ohio,"Clermont, OH",96271,64208,"Romney, W. Mitt",30458,"Obama, Barack H.",1605,33750,R,66.7,31.64,1.67,0,18,0
8197,2012,Ohio,"Clinton, OH",18143,12009,"Romney, W. Mitt",5791,"Obama, Barack H.",343,6218,R,66.19,31.92,1.89,0,18,0
8198,2012,Ohio,"Columbiana, OH",46051,25251,"Romney, W. Mitt",19821,"Obama, Barack H.",979,5430,R,54.83,43.04,2.12,0,18,0
8199,2012,Ohio,"Coshocton, OH",15739,8390,"Romney, W. Mitt",6940,"Obama, Barack H.",409,1450,R,53.31,44.09,2.6,0,18,0
8200,2012,Ohio,"Crawford, OH",19803,11852,"Romney, W. Mitt",7507,"Obama, Barack H.",444,4345,R,59.85,37.91,2.24,0,18,0
8201,2012,Ohio,"Cuyahoga, OH",644331,190660,"Romney, W. Mitt",447273,"Obama, Barack H.",6398,256613,D,29.59,69.42,1.0,0,18,0


In [201]:
#Slice Indiana in 2016
in16 = vote.loc[(vote.State == 'Indiana') & (vote.Year == 2016)]
in16.head(10)

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
9923,2016,Indiana,"Adams, IN",13081,9642,"Trump, Donald J.",2802,"Clinton, Hillary Rodham",637,6840,R,73.71,21.42,4.87,11,0,0
9924,2016,Indiana,"Allen, IN",146809,83801,"Trump, Donald J.",55222,"Clinton, Hillary Rodham",7786,28579,R,57.08,37.61,5.31,11,0,0
9925,2016,Indiana,"Bartholomew, IN",32489,20639,"Trump, Donald J.",9841,"Clinton, Hillary Rodham",2009,10798,R,63.53,30.29,6.18,11,0,0
9926,2016,Indiana,"Benton, IN",3666,2579,"Trump, Donald J.",860,"Clinton, Hillary Rodham",227,1719,R,70.35,23.46,6.19,11,0,0
9927,2016,Indiana,"Blackford, IN",4840,3350,"Trump, Donald J.",1243,"Clinton, Hillary Rodham",247,2107,R,69.21,25.68,5.1,11,0,0
9928,2016,Indiana,"Boone, IN",32178,19654,"Trump, Donald J.",10181,"Clinton, Hillary Rodham",2343,9473,R,61.08,31.64,7.28,11,0,0
9929,2016,Indiana,"Brown, IN",7944,5015,"Trump, Donald J.",2518,"Clinton, Hillary Rodham",411,2497,R,63.13,31.7,5.17,11,0,0
9930,2016,Indiana,"Carroll, IN",8642,6273,"Trump, Donald J.",1891,"Clinton, Hillary Rodham",478,4382,R,72.59,21.88,5.53,11,0,0
9931,2016,Indiana,"Cass, IN",14204,9697,"Trump, Donald J.",3758,"Clinton, Hillary Rodham",749,5939,R,68.27,26.46,5.28,11,0,0
9932,2016,Indiana,"Clark, IN",51104,30012,"Trump, Donald J.",18791,"Clinton, Hillary Rodham",2301,11221,R,58.73,36.77,4.5,11,0,0


In [202]:
in16.iloc[10:20]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
9933,2016,Indiana,"Clay, IN",11279,8528,"Trump, Donald J.",2306,"Clinton, Hillary Rodham",445,6222,R,75.61,20.45,3.95,11,0,0
9934,2016,Indiana,"Clinton, IN",11936,8531,"Trump, Donald J.",2819,"Clinton, Hillary Rodham",586,5712,R,71.47,23.62,4.91,11,0,0
9935,2016,Indiana,"Crawford, IN",4591,3013,"Trump, Donald J.",1323,"Clinton, Hillary Rodham",255,1690,R,65.63,28.82,5.55,11,0,0
9936,2016,Indiana,"Daviess, IN",10768,8545,"Trump, Donald J.",1800,"Clinton, Hillary Rodham",423,6745,R,79.36,16.72,3.93,11,0,0
9937,2016,Indiana,"DeKalb, IN",16901,12054,"Trump, Donald J.",3941,"Clinton, Hillary Rodham",906,8113,R,71.32,23.32,5.36,11,0,0
9938,2016,Indiana,"Dearborn, IN",23984,18110,"Trump, Donald J.",4883,"Clinton, Hillary Rodham",991,13227,R,75.51,20.36,4.13,11,0,0
9939,2016,Indiana,"Decatur, IN",11427,8790,"Trump, Donald J.",2121,"Clinton, Hillary Rodham",516,6669,R,76.92,18.56,4.51,11,0,0
9940,2016,Indiana,"Delaware, IN",44699,24217,"Trump, Donald J.",18100,"Clinton, Hillary Rodham",2382,6117,R,54.18,40.49,5.33,11,0,0
9941,2016,Indiana,"Dubois, IN",19984,13365,"Trump, Donald J.",5389,"Clinton, Hillary Rodham",1230,7976,R,66.88,26.97,6.16,11,0,0
9942,2016,Indiana,"Elkhart, IN",65398,41810,"Trump, Donald J.",20667,"Clinton, Hillary Rodham",2921,21143,R,63.93,31.6,4.46,11,0,0


In [203]:
in16.iloc[20:30]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
9943,2016,Indiana,"Fayette, IN",9530,6839,"Trump, Donald J.",2252,"Clinton, Hillary Rodham",439,4587,R,71.76,23.63,4.61,11,0,0
9944,2016,Indiana,"Floyd, IN",37225,21427,"Trump, Donald J.",13939,"Clinton, Hillary Rodham",1859,7488,R,57.56,37.45,4.99,11,0,0
9945,2016,Indiana,"Fountain, IN",7475,5661,"Trump, Donald J.",1476,"Clinton, Hillary Rodham",338,4185,R,75.73,19.75,4.52,11,0,0
9946,2016,Indiana,"Franklin, IN",11007,8665,"Trump, Donald J.",1967,"Clinton, Hillary Rodham",375,6698,R,78.72,17.87,3.4,11,0,0
9947,2016,Indiana,"Fulton, IN",8374,6010,"Trump, Donald J.",1960,"Clinton, Hillary Rodham",404,4050,R,71.77,23.41,4.82,11,0,0
9948,2016,Indiana,"Gibson, IN",15481,11079,"Trump, Donald J.",3720,"Clinton, Hillary Rodham",682,7359,R,71.57,24.03,4.41,11,0,0
9949,2016,Indiana,"Grant, IN",25335,17009,"Trump, Donald J.",7029,"Clinton, Hillary Rodham",1297,9980,R,67.14,27.74,5.12,11,0,0
9950,2016,Indiana,"Greene, IN",13801,10277,"Trump, Donald J.",2929,"Clinton, Hillary Rodham",595,7348,R,74.47,21.22,4.31,11,0,0
9951,2016,Indiana,"Hamilton, IN",153630,87299,"Trump, Donald J.",57214,"Clinton, Hillary Rodham",9117,30085,R,56.82,37.24,5.93,11,0,0
9952,2016,Indiana,"Hancock, IN",36171,25067,"Trump, Donald J.",8903,"Clinton, Hillary Rodham",2201,16164,R,69.3,24.61,6.09,11,0,0


In [204]:
in16.iloc[30:40]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
9953,2016,Indiana,"Harrison, IN",18541,12933,"Trump, Donald J.",4776,"Clinton, Hillary Rodham",832,8157,R,69.75,25.76,4.48,11,0,0
9954,2016,Indiana,"Hendricks, IN",75258,48326,"Trump, Donald J.",22595,"Clinton, Hillary Rodham",4337,25731,R,64.21,30.02,5.76,11,0,0
9955,2016,Indiana,"Henry, IN",20155,13895,"Trump, Donald J.",5124,"Clinton, Hillary Rodham",1136,8771,R,68.94,25.42,5.64,11,0,0
9956,2016,Indiana,"Howard, IN",36754,23675,"Trump, Donald J.",11215,"Clinton, Hillary Rodham",1864,12460,R,64.41,30.51,5.07,11,0,0
9957,2016,Indiana,"Huntington, IN",16043,11649,"Trump, Donald J.",3506,"Clinton, Hillary Rodham",888,8143,R,72.61,21.85,5.54,11,0,0
9958,2016,Indiana,"Jackson, IN",17566,12857,"Trump, Donald J.",3843,"Clinton, Hillary Rodham",866,9014,R,73.19,21.88,4.93,11,0,0
9959,2016,Indiana,"Jasper, IN",13378,9382,"Trump, Donald J.",3329,"Clinton, Hillary Rodham",667,6053,R,70.13,24.88,4.98,11,0,0
9960,2016,Indiana,"Jay, IN",7986,5697,"Trump, Donald J.",1889,"Clinton, Hillary Rodham",400,3808,R,71.34,23.65,5.01,11,0,0
9961,2016,Indiana,"Jefferson, IN",13535,8538,"Trump, Donald J.",4325,"Clinton, Hillary Rodham",672,4213,R,63.08,31.95,4.97,11,0,0
9962,2016,Indiana,"Jennings, IN",11148,8222,"Trump, Donald J.",2364,"Clinton, Hillary Rodham",562,5858,R,73.75,21.21,5.05,11,0,0


In [205]:
in16.iloc[40:50]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
9963,2016,Indiana,"Johnson, IN",66500,45456,"Trump, Donald J.",17318,"Clinton, Hillary Rodham",3726,28138,R,68.35,26.04,5.61,11,0,0
9964,2016,Indiana,"Knox, IN",15516,11077,"Trump, Donald J.",3772,"Clinton, Hillary Rodham",667,7305,R,71.39,24.31,4.3,11,0,0
9965,2016,Indiana,"Kosciusko, IN",32074,23927,"Trump, Donald J.",6311,"Clinton, Hillary Rodham",1836,17616,R,74.6,19.68,5.72,11,0,0
9966,2016,Indiana,"LaGrange, IN",9566,7025,"Trump, Donald J.",2080,"Clinton, Hillary Rodham",461,4945,R,73.44,21.74,4.82,11,0,0
9967,2016,Indiana,"LaPorte, IN",45191,22678,"Trump, Donald J.",19795,"Clinton, Hillary Rodham",2718,2883,R,50.18,43.8,6.02,11,0,0
9968,2016,Indiana,"Lake, IN",200247,75565,"Trump, Donald J.",116896,"Clinton, Hillary Rodham",7786,41331,D,37.74,58.38,3.89,11,0,0
9969,2016,Indiana,"Lawrence, IN",19152,14035,"Trump, Donald J.",4210,"Clinton, Hillary Rodham",907,9825,R,73.28,21.98,4.73,11,0,0
9970,2016,Indiana,"Madison, IN",53660,32376,"Trump, Donald J.",18595,"Clinton, Hillary Rodham",2689,13781,R,60.34,34.65,5.01,11,0,0
9971,2016,Indiana,"Marion, IN",361070,130228,"Trump, Donald J.",212676,"Clinton, Hillary Rodham",18166,82448,D,36.07,58.9,5.03,11,0,0
9972,2016,Indiana,"Marshall, IN",18080,12286,"Trump, Donald J.",4798,"Clinton, Hillary Rodham",996,7488,R,67.95,26.54,5.51,11,0,0


In [206]:
in16.iloc[50:60]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
9973,2016,Indiana,"Martin, IN",4809,3697,"Trump, Donald J.",881,"Clinton, Hillary Rodham",231,2816,R,76.88,18.32,4.8,11,0,0
9974,2016,Indiana,"Miami, IN",13482,9975,"Trump, Donald J.",2766,"Clinton, Hillary Rodham",741,7209,R,73.99,20.52,5.49,11,0,0
9975,2016,Indiana,"Monroe, IN",58349,20527,"Trump, Donald J.",34183,"Clinton, Hillary Rodham",3639,13656,D,35.18,58.58,6.23,11,0,0
9976,2016,Indiana,"Montgomery, IN",15101,11051,"Trump, Donald J.",3362,"Clinton, Hillary Rodham",688,7689,R,73.18,22.26,4.56,11,0,0
9977,2016,Indiana,"Morgan, IN",35450,23671,"Trump, Donald J.",10037,"Clinton, Hillary Rodham",1742,13634,R,66.77,28.31,4.91,11,0,0
9978,2016,Indiana,"Newton, IN",5809,4077,"Trump, Donald J.",1404,"Clinton, Hillary Rodham",328,2673,R,70.18,24.17,5.65,11,0,0
9979,2016,Indiana,"Noble, IN",16890,12198,"Trump, Donald J.",3904,"Clinton, Hillary Rodham",788,8294,R,72.22,23.11,4.67,11,0,0
9980,2016,Indiana,"Ohio, IN",2917,2118,"Trump, Donald J.",686,"Clinton, Hillary Rodham",113,1432,R,72.61,23.52,3.87,11,0,0
9981,2016,Indiana,"Orange, IN",8193,5803,"Trump, Donald J.",2048,"Clinton, Hillary Rodham",342,3755,R,70.83,25.0,4.17,11,0,0
9982,2016,Indiana,"Owen, IN",8555,6151,"Trump, Donald J.",1946,"Clinton, Hillary Rodham",458,4205,R,71.9,22.75,5.35,11,0,0


In [207]:
in16.iloc[60:70]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
9983,2016,Indiana,"Parke, IN",6582,4863,"Trump, Donald J.",1441,"Clinton, Hillary Rodham",278,3422,R,73.88,21.89,4.22,11,0,0
9984,2016,Indiana,"Perry, IN",8055,4556,"Trump, Donald J.",3062,"Clinton, Hillary Rodham",437,1494,R,56.56,38.01,5.42,11,0,0
9985,2016,Indiana,"Pike, IN",5977,4398,"Trump, Donald J.",1297,"Clinton, Hillary Rodham",282,3101,R,73.58,21.7,4.72,11,0,0
9986,2016,Indiana,"Porter, IN",76492,38719,"Trump, Donald J.",33531,"Clinton, Hillary Rodham",4242,5188,R,50.62,43.84,5.55,11,0,0
9987,2016,Indiana,"Posey, IN",12448,8393,"Trump, Donald J.",3515,"Clinton, Hillary Rodham",540,4878,R,67.42,28.24,4.34,11,0,0
9988,2016,Indiana,"Pulaski, IN",5459,3854,"Trump, Donald J.",1327,"Clinton, Hillary Rodham",278,2527,R,70.6,24.31,5.09,11,0,0
9989,2016,Indiana,"Putnam, IN",14715,10637,"Trump, Donald J.",3356,"Clinton, Hillary Rodham",722,7281,R,72.29,22.81,4.91,11,0,0
9990,2016,Indiana,"Randolph, IN",10456,7515,"Trump, Donald J.",2446,"Clinton, Hillary Rodham",495,5069,R,71.87,23.39,4.73,11,0,0
9991,2016,Indiana,"Ripley, IN",12810,9806,"Trump, Donald J.",2471,"Clinton, Hillary Rodham",533,7335,R,76.55,19.29,4.16,11,0,0
9992,2016,Indiana,"Rush, IN",7214,5292,"Trump, Donald J.",1525,"Clinton, Hillary Rodham",397,3767,R,73.36,21.14,5.5,11,0,0


In [208]:
in16.iloc[70:80]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
9993,2016,Indiana,"Scott, IN",9101,6074,"Trump, Donald J.",2642,"Clinton, Hillary Rodham",385,3432,R,66.74,29.03,4.23,11,0,0
9994,2016,Indiana,"Shelby, IN",17994,12718,"Trump, Donald J.",4247,"Clinton, Hillary Rodham",1029,8471,R,70.68,23.6,5.72,11,0,0
9995,2016,Indiana,"Spencer, IN",9979,6572,"Trump, Donald J.",2861,"Clinton, Hillary Rodham",546,3711,R,65.86,28.67,5.47,11,0,0
9996,2016,Indiana,"St. Joseph, IN",109452,52019,"Trump, Donald J.",52247,"Clinton, Hillary Rodham",5186,228,D,47.53,47.74,4.74,11,0,0
9997,2016,Indiana,"Starke, IN",9226,6367,"Trump, Donald J.",2489,"Clinton, Hillary Rodham",370,3878,R,69.01,26.98,4.01,11,0,0
9998,2016,Indiana,"Steuben, IN",14557,10127,"Trump, Donald J.",3741,"Clinton, Hillary Rodham",689,6386,R,69.57,25.7,4.74,11,0,0
9999,2016,Indiana,"Sullivan, IN",8559,6138,"Trump, Donald J.",2113,"Clinton, Hillary Rodham",308,4025,R,71.71,24.69,3.6,11,0,0
10000,2016,Indiana,"Switzerland, IN",3699,2558,"Trump, Donald J.",930,"Clinton, Hillary Rodham",211,1628,R,69.15,25.14,5.7,11,0,0
10001,2016,Indiana,"Tippecanoe, IN",58317,30711,"Trump, Donald J.",27207,"Clinton, Hillary Rodham",399,3504,R,52.66,46.65,0.68,11,0,0
10002,2016,Indiana,"Tipton, IN",7510,5589,"Trump, Donald J.",1587,"Clinton, Hillary Rodham",334,4002,R,74.42,21.13,4.45,11,0,0


In [209]:
in16.iloc[80:]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
10003,2016,Indiana,"Union, IN",3291,2445,"Trump, Donald J.",715,"Clinton, Hillary Rodham",131,1730,R,74.29,21.73,3.98,11,0,0
10004,2016,Indiana,"Vanderburgh, IN",71957,40422,"Trump, Donald J.",28296,"Clinton, Hillary Rodham",3239,12126,R,56.18,39.32,4.5,11,0,0
10005,2016,Indiana,"Vermillion, IN",6939,4513,"Trump, Donald J.",2081,"Clinton, Hillary Rodham",345,2432,R,65.04,29.99,4.98,11,0,0
10006,2016,Indiana,"Vigo, IN",39796,21924,"Trump, Donald J.",15922,"Clinton, Hillary Rodham",1950,6002,R,55.09,40.01,4.9,11,0,0
10007,2016,Indiana,"Wabash, IN",13466,9819,"Trump, Donald J.",3018,"Clinton, Hillary Rodham",629,6801,R,72.92,22.41,4.67,11,0,0
10008,2016,Indiana,"Warren, IN",3930,2898,"Trump, Donald J.",839,"Clinton, Hillary Rodham",193,2059,R,73.74,21.35,4.91,11,0,0
10009,2016,Indiana,"Warrick, IN",29609,19133,"Trump, Donald J.",9086,"Clinton, Hillary Rodham",1390,10047,R,64.62,30.69,4.69,11,0,0
10010,2016,Indiana,"Washington, IN",11339,8204,"Trump, Donald J.",2636,"Clinton, Hillary Rodham",499,5568,R,72.35,23.25,4.4,11,0,0
10011,2016,Indiana,"Wayne, IN",25579,16028,"Trump, Donald J.",8322,"Clinton, Hillary Rodham",1229,7706,R,62.66,32.53,4.81,11,0,0
10012,2016,Indiana,"Wells, IN",13178,9999,"Trump, Donald J.",2585,"Clinton, Hillary Rodham",594,7414,R,75.88,19.62,4.51,11,0,0


In [210]:
vote['RepVotes'] = np.where((vote['Area'] == 'Whitley, IN') & (vote.Year == 2016), 11358, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Clay, IN') & (vote.Year == 2016), 1022, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'White, IN') & (vote.Year == 2016), 6893, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'White, IN') & (vote.Year == 2016), 2590, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'White, IN') & (vote.Year == 2016), 613, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Wells, IN') & (vote.Year == 2016), 10005, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Wells, IN') & (vote.Year == 2016), 2586, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Wells, IN') & (vote.Year == 2016), 695, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Washington, IN') & (vote.Year == 2016), 8209, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Washington, IN') & (vote.Year == 2016), 537, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Warrick, IN') & (vote.Year == 2016), 19113, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Warrick, IN') & (vote.Year == 2016), 1740, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Wabash, IN') & (vote.Year == 2016), 9821, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Wabash, IN') & (vote.Year == 2016), 713, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Vigo, IN') & (vote.Year == 2016), 21937, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Vigo, IN') & (vote.Year == 2016), 15931, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Vigo, IN') & (vote.Year == 2016), 2259, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Vanderburgh, IN') & (vote.Year == 2016), 40496, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Vanderburgh, IN') & (vote.Year == 2016), 28530, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Vanderburgh, IN') & (vote.Year == 2016), 4343, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Tippecanoe, IN') & (vote.Year == 2016), 30768, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Tippecanoe, IN') & (vote.Year == 2016), 27282, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Tippecanoe, IN') & (vote.Year == 2016), 5289, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Steuben, IN') & (vote.Year == 2016), 10133, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Steuben, IN') & (vote.Year == 2016), 3744, vote['DemVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'St. Joseph, IN') & (vote.Year == 2016), 52021, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'St. Joseph, IN') & (vote.Year == 2016), 52252, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'St. Joseph, IN') & (vote.Year == 2016), 7569, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Randolph, IN') & (vote.Year == 2016), 7517, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Randolph, IN') & (vote.Year == 2016), 560, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Posey, IN') & (vote.Year == 2016), 8404, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Posey, IN') & (vote.Year == 2016), 3521, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Posey, IN') & (vote.Year == 2016), 542, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Porter, IN') & (vote.Year == 2016), 38832, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Porter, IN') & (vote.Year == 2016), 33676, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Porter, IN') & (vote.Year == 2016), 5745, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Owen, IN') & (vote.Year == 2016), 6153, vote['RepVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Morgan, IN') & (vote.Year == 2016), 23674, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Morgan, IN') & (vote.Year == 2016), 6040, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Morgan, IN') & (vote.Year == 2016), 1732, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Montgomery, IN') & (vote.Year == 2016), 11059, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Montgomery, IN') & (vote.Year == 2016), 735, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Monroe, IN') & (vote.Year == 2016), 20592, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Monroe, IN') & (vote.Year == 2016), 34216, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Monroe, IN') & (vote.Year == 2016), 3646, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Marshall, IN') & (vote.Year == 2016), 12288, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Marshall, IN') & (vote.Year == 2016), 1155, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Marion, IN') & (vote.Year == 2016), 130360, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Marion, IN') & (vote.Year == 2016), 212899, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Marion, IN') & (vote.Year == 2016), 23620, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'LaPorte, IN') & (vote.Year == 2016), 22687, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'LaPorte, IN') & (vote.Year == 2016), 19798, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'LaPorte, IN') & (vote.Year == 2016), 3124, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Lake, IN') & (vote.Year == 2016), 75625, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Lake, IN') & (vote.Year == 2016), 116935, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Lake, IN') & (vote.Year == 2016), 10241, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Kosciusko, IN') & (vote.Year == 2016), 23935, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Kosciusko, IN') & (vote.Year == 2016), 6311, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Kosciusko, IN') & (vote.Year == 2016), 2193, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Jennings, IN') & (vote.Year == 2016), 8224, vote['RepVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Jefferson, IN') & (vote.Year == 2016), 8546, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Jefferson, IN') & (vote.Year == 2016), 4326, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Jefferson, IN') & (vote.Year == 2016), 781, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Jackson, IN') & (vote.Year == 2016), 3843, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Jackson, IN') & (vote.Year == 2016), 965, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Hendricks, IN') & (vote.Year == 2016), 48337, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Hendricks, IN') & (vote.Year == 2016), 22600, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Hendricks, IN') & (vote.Year == 2016), 5247, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Harrison, IN') & (vote.Year == 2016), 12943, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Harrison, IN') & (vote.Year == 2016), 4783, vote['DemVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Hancock, IN') & (vote.Year == 2016), 25074, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Hancock, IN') & (vote.Year == 2016), 8904, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Hancock, IN') & (vote.Year == 2016), 2518, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Hamilton, IN') & (vote.Year == 2016), 87404, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Hamilton, IN') & (vote.Year == 2016), 57263, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Hamilton, IN') & (vote.Year == 2016), 11291, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Grant, IN') & (vote.Year == 2016), 17008, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Grant, IN') & (vote.Year == 2016), 7010, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Grant, IN') & (vote.Year == 2016), 1554, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Gibson, IN') & (vote.Year == 2016), 11081, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Gibson, IN') & (vote.Year == 2016), 3721, vote['DemVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Franklin, IN') & (vote.Year == 2016), 8669, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Franklin, IN') & (vote.Year == 2016), 1969, vote['DemVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Fountain, IN') & (vote.Year == 2016), 5662, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Fountain, IN') & (vote.Year == 2016), 397, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Floyd, IN') & (vote.Year == 2016), 21432, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Floyd, IN') & (vote.Year == 2016), 13945, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Floyd, IN') & (vote.Year == 2016), 2645, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Elkhart, IN') & (vote.Year == 2016), 41867, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Elkhart, IN') & (vote.Year == 2016), 20740, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Elkhart, IN') & (vote.Year == 2016), 3629, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Delaware, IN') & (vote.Year == 2016), 24263, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Delaware, IN') & (vote.Year == 2016), 18153, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Delaware, IN') & (vote.Year == 2016), 3089, vote['ThirdVotes'])

vote['DemVotes'] = np.where((vote['Area'] == 'DeKalb, IN') & (vote.Year == 2016), 3942, vote['DemVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Decatur, IN') & (vote.Year == 2016), 8490, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Decatur, IN') & (vote.Year == 2016), 567, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Crawford, IN') & (vote.Year == 2016), 3015, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Crawford, IN') & (vote.Year == 2016), 304, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Clay, IN') & (vote.Year == 2016), 8531, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Clay, IN') & (vote.Year == 2016), 498, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Clark, IN') & (vote.Year == 2016), 30035, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Clark, IN') & (vote.Year == 2016), 18808, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Clark, IN') & (vote.Year == 2016), 2946, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Cass, IN') & (vote.Year == 2016), 9701, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Cass, IN') & (vote.Year == 2016), 3759, vote['DemVotes'])

vote['DemVotes'] = np.where((vote['Area'] == 'Carroll, IN') & (vote.Year == 2016), 1892, vote['DemVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Brown, IN') & (vote.Year == 2016), 5016, vote['RepVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Bartholomew, IN') & (vote.Year == 2016), 20640, vote['RepVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Bartholomew, IN') & (vote.Year == 2016), 2236, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Allen, IN') & (vote.Year == 2016), 83930, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Allen, IN') & (vote.Year == 2016), 55382, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Allen, IN') & (vote.Year == 2016), 9320, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Adams, IN') & (vote.Year == 2016), 9648, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Adams, IN') & (vote.Year == 2016), 2805, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Adams, IN') & (vote.Year == 2016), 741, vote['ThirdVotes'])

vote.loc[(vote.State == 'Indiana') & (vote.Year == 2016)].head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
9923,2016,Indiana,"Adams, IN",13081,9648,"Trump, Donald J.",2805,"Clinton, Hillary Rodham",741,6840,R,73.71,21.42,4.87,11,0,0
9924,2016,Indiana,"Allen, IN",146809,83930,"Trump, Donald J.",55382,"Clinton, Hillary Rodham",9320,28579,R,57.08,37.61,5.31,11,0,0
9925,2016,Indiana,"Bartholomew, IN",32489,20640,"Trump, Donald J.",9841,"Clinton, Hillary Rodham",2236,10798,R,63.53,30.29,6.18,11,0,0
9926,2016,Indiana,"Benton, IN",3666,2579,"Trump, Donald J.",860,"Clinton, Hillary Rodham",227,1719,R,70.35,23.46,6.19,11,0,0
9927,2016,Indiana,"Blackford, IN",4840,3350,"Trump, Donald J.",1243,"Clinton, Hillary Rodham",247,2107,R,69.21,25.68,5.1,11,0,0


In [211]:
#Slice Hawaii across all four years
hi = vote.loc[(vote.State == 'Hawaii')]
hi

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
518,2004,Hawaii,"Hawaii, HI",57702,22032,"Bush, George W.",35116,"Kerry, John F.",554,13084,D,38.18,60.86,0.96,0,4,0
519,2004,Hawaii,"Honolulu, HI",298547,144157,"Bush, George W.",152500,"Kerry, John F.",1890,8343,D,48.29,51.08,0.63,0,4,0
520,2004,Hawaii,"Kauai, HI",24876,9740,"Bush, George W.",14916,"Kerry, John F.",220,5176,D,39.15,59.96,0.88,0,4,0
521,2004,Hawaii,"Maui and Kalawao, HI",47430,18187,"Bush, George W.",28803,"Kerry, John F.",440,10616,D,38.34,60.73,0.93,0,4,0
3603,2008,Hawaii,"Hawaii, HI",66916,14866,"McCain, John S. III",50819,"Obama, Barack H.",1231,35953,D,22.22,75.94,1.84,0,4,0
3604,2008,Hawaii,"Honolulu, HI",306813,88164,"McCain, John S. III",214239,"Obama, Barack H.",4410,126075,D,28.74,69.83,1.44,0,4,0
3605,2008,Hawaii,"Kauai, HI",27224,6245,"McCain, John S. III",20416,"Obama, Barack H.",563,14171,D,22.94,74.99,2.07,0,4,0
3606,2008,Hawaii,"Maui and Kalawao, HI",51789,11154,"McCain, John S. III",39727,"Obama, Barack H.",908,28573,D,21.54,76.71,1.75,0,4,0
6688,2012,Hawaii,"Hawaii, HI",63454,14753,"Romney, W. Mitt",47224,"Obama, Barack H.",1477,32471,D,23.25,74.42,2.33,0,4,0
6689,2012,Hawaii,"Honolulu, HI",296742,88461,"Romney, W. Mitt",204349,"Obama, Barack H.",3932,115888,D,29.81,68.86,1.33,0,4,0


In [212]:
vote['RepVotes'] = np.where((vote['Area'] == 'Maui and Kalawao, HI') & (vote.Year == 2004), 18201, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Maui and Kalawao, HI') & (vote.Year == 2004), 28829, vote['DemVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Maui and Kalawao, HI') & (vote.Year == 2008), 11160, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Maui and Kalawao, HI') & (vote.Year == 2008), 39751, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Maui and Kalawao, HI') & (vote.Year == 2008), 909, vote['ThirdVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Maui and Kalawao, HI') & (vote.Year == 2012), 11604, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Maui and Kalawao, HI') & (vote.Year == 2012), 36077, vote['DemVotes'])

vote['RepVotes'] = np.where((vote['Area'] == 'Maui and Kalawao, HI') & (vote.Year == 2016), 13447, vote['RepVotes'])
vote['DemVotes'] = np.where((vote['Area'] == 'Maui and Kalawao, HI') & (vote.Year == 2016), 33494, vote['DemVotes'])
vote['ThirdVotes'] = np.where((vote['Area'] == 'Maui and Kalawao, HI') & (vote.Year == 2016), 5024, vote['ThirdVotes'])

In [213]:
#Update the TotalVotes, RepVotesTotalPercent, DemVotesTotalPercent, PluralityVotes, and PluralityParty columns to 
#reflect the rows that have had their values recently corrected in the previous cells.  Note in the PluralityVotes 
#column that positive values indicate a Republican plurality, and negative values a Democrat one
vote['TotalVotes'] = vote['RepVotes'] + vote['DemVotes'] + vote['ThirdVotes']
vote['RepVotesTotalPercent'] = (vote['RepVotes']/vote['TotalVotes'])*100
vote['DemVotesTotalPercent'] = (vote['DemVotes']/vote['TotalVotes'])*100
vote['ThirdVotesTotalPercent'] = (vote['ThirdVotes']/vote['TotalVotes'])*100
vote['PluralityVotes'] = vote['RepVotes'] - vote['DemVotes']
vote['PluralityParty'] = np.where((vote['RepVotes'] > vote['DemVotes']), 'R', vote['PluralityParty'])
vote['PluralityParty'] = np.where((vote['RepVotes'] < vote['DemVotes']), 'D', vote['PluralityParty'])
vote.loc[(vote.Area == 'Kent, RI')]

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
2283,2004,Rhode Island,"Kent, RI",77842,33699,"Bush, George W.",42830,"Kerry, John F.",1313,-9131,D,43.291539,55.021711,1.68675,0,4,0
5368,2008,Rhode Island,"Kent, RI",84074,33780,"McCain, John S. III",48406,"Obama, Barack H.",1888,-14626,D,40.17889,57.575469,2.245641,0,4,0
8453,2012,Rhode Island,"Kent, RI",78923,31567,"Romney, W. Mitt",45564,"Obama, Barack H.",1792,-13997,D,39.997212,57.73222,2.270568,0,4,0
11538,2016,Rhode Island,"Kent, RI",82053,38336,"Trump, Donald J.",37788,"Clinton, Hillary Rodham",5929,548,R,46.721022,46.053161,7.225817,0,4,0


In [214]:
#List the candidates from each party
print(list(vote['RepCandidate'].unique()))
print()
print(list(vote['DemCandidate'].unique()))

['Bush, George W.', 'Bush, George W. ', 'McCain, John S. III', 'Romney, W. Mitt', 'Romney, W. Mitt  ', 'Trump, Donald J.', 'Trump, Donald J. ']

['Kerry, John F.', 'Kerry, John F. ', 'Obama, Barack H.', 'Obama, Barack H. ', 'Clinton, Hillary Rodham', 'Clinton, Hillary Rodham ']


In [215]:
#Fix the rows that have added an unnecessary space at the end of a candidate's name
vote['RepCandidate'] = np.where(vote['RepCandidate'] == 'Bush, George W. ', 'Bush, George W.', 
                                vote['RepCandidate'])
vote['RepCandidate'] = np.where(vote['RepCandidate'] == 'Romney, W. Mitt  ', 'Romney, W. Mitt', 
                                vote['RepCandidate'])
vote['RepCandidate'] = np.where(vote['RepCandidate'] == 'Trump, Donald J. ', 'Trump, Donald J.', 
                                vote['RepCandidate'])
vote['DemCandidate'] = np.where(vote['DemCandidate'] == 'Kerry, John F. ', 'Kerry, John F.', 
                                vote['DemCandidate'])
vote['DemCandidate'] = np.where(vote['DemCandidate'] == 'Obama, Barack H. ', 'Obama, Barack H.', 
                                vote['DemCandidate'])
vote['DemCandidate'] = np.where((vote['DemCandidate'] == 'Clinton, Hillary Rodham') | 
                                (vote['DemCandidate'] == 'Clinton, Hillary Rodham '), 'Clinton, Hillary R.', 
                                vote['DemCandidate'])
vote.tail()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
12335,2016,Wyoming,"Sweetwater, WY",17130,12154,"Trump, Donald J.",3231,"Clinton, Hillary R.",1745,8923,R,70.951547,18.861646,10.186807,3,0,0
12336,2016,Wyoming,"Teton, WY",12627,3921,"Trump, Donald J.",7314,"Clinton, Hillary R.",1392,-3393,D,31.052507,57.923497,11.023996,3,0,0
12337,2016,Wyoming,"Uinta, WY",8470,6154,"Trump, Donald J.",1202,"Clinton, Hillary R.",1114,4952,R,72.656434,14.191263,13.152302,3,0,0
12338,2016,Wyoming,"Washakie, WY",3814,2911,"Trump, Donald J.",532,"Clinton, Hillary R.",371,2379,R,76.324069,13.94861,9.72732,3,0,0
12339,2016,Wyoming,"Weston, WY",3526,3033,"Trump, Donald J.",299,"Clinton, Hillary R.",194,2734,R,86.018151,8.479864,5.501985,3,0,0


In [216]:
#Create a new data frame by grouping the State and Area columns of vote together to calculate the average voting
#percentages for the two major parties and third ones over each election in the data
vote_groups = vote.groupby(['State','Area'])['RepVotesTotalPercent','DemVotesTotalPercent',
                                             'ThirdVotesTotalPercent'].mean()
vote_groups = pd.DataFrame(vote_groups)
vote_groups = vote_groups.rename(columns = {'RepVotesTotalPercent':'RepAveragePercent',
                                            'DemVotesTotalPercent':'DemAveragePercent',
                                            'ThirdVotesTotalPercent':'ThirdAveragePercent'})
vote_groups

Unnamed: 0_level_0,Unnamed: 1_level_0,RepAveragePercent,DemAveragePercent,ThirdAveragePercent
State,Area,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,"Autauga, AL",73.636951,24.944773,1.418276
Alabama,"Baldwin, AL",76.360048,21.812804,1.827148
Alabama,"Barbour, AL",51.365440,47.920566,0.713994
Alabama,"Bibb, AL",73.418284,25.371230,1.210486
Alabama,"Blount, AL",85.119047,13.399093,1.481859
...,...,...,...,...
Wyoming,"Sweetwater, WY",66.521484,28.405306,5.073210
Wyoming,"Teton, WY",38.900963,56.340835,4.758202
Wyoming,"Uinta, WY",73.530346,20.839022,5.630632
Wyoming,"Washakie, WY",75.704660,20.086538,4.208802


In [217]:
print(list(gdp.Description.unique()))

['Utilities', 'Construction', 'Manufacturing', 'Transportation', 'Information', 'Finance', 'Professional', 'Education and Health', 'Food and Recreation', 'Other', 'Government', 'Natural Resources', 'Trade', 'Total GDP']


In [218]:
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
38,"""01001""","Autauga, AL",Utilities,Billions of dollars,0.081339,0.067098,0.111611,0.129539,0.10681,0.145888,0.214034,0.305643,0.443613,0.376088,0.37766,0.464229,0.45631,Alabama
39,"""01001""","Autauga, AL",Construction,Billions of dollars,0.049238,0.045636,0.049644,0.068217,0.050474,0.044517,0.038084,0.044495,0.046135,0.044961,0.041907,0.044482,0.047297,Alabama
40,"""01001""","Autauga, AL",Manufacturing,Billions of dollars,0.192625,0.207433,0.22814,0.208033,0.185294,0.204019,0.204926,0.190401,0.20107,0.191982,0.226805,0.25127,0.280563,Alabama
45,"""01001""","Autauga, AL",Transportation,Billions of dollars,0.014354,0.016462,0.019876,0.020234,0.015024,0.01191,0.012332,0.014944,0.016466,0.015429,0.014597,0.016877,0.016124,Alabama
46,"""01001""","Autauga, AL",Information,Billions of dollars,0.011902,0.011854,0.012418,0.011649,0.009022,0.00624,0.005567,0.005212,0.008921,0.013805,0.01705,0.0176,0.025814,Alabama


In [219]:
#For each industry, make a temporary data frame corresponding to its description, and from there calculate its GDP
#share for each year, using the rows pertaining to Total GDP as a reference
u1 = gdp.loc[(gdp.Description == 'Total GDP')]
u2 = gdp.loc[(gdp.Description == 'Utilities')]
u1['2004-I'] = u2['2004'].values
u1['2005-I'] = u2['2005'].values
u1['2006-I'] = u2['2006'].values
u1['2007-I'] = u2['2007'].values
u1['2008-I'] = u2['2008'].values
u1['2009-I'] = u2['2009'].values
u1['2010-I'] = u2['2010'].values
u1['2011-I'] = u2['2011'].values
u1['2012-I'] = u2['2012'].values
u1['2013-I'] = u2['2013'].values
u1['2014-I'] = u2['2014'].values
u1['2015-I'] = u2['2015'].values
u1['2016-I'] = u2['2016'].values
u1['2004-Share'] = (u1['2004-I']/u1['2004'])*100
u1['2005-Share'] = (u1['2005-I']/u1['2005'])*100
u1['2006-Share'] = (u1['2006-I']/u1['2006'])*100
u1['2007-Share'] = (u1['2007-I']/u1['2007'])*100
u1['2008-Share'] = (u1['2008-I']/u1['2008'])*100
u1['2009-Share'] = (u1['2009-I']/u1['2009'])*100
u1['2010-Share'] = (u1['2010-I']/u1['2010'])*100
u1['2011-Share'] = (u1['2011-I']/u1['2011'])*100
u1['2012-Share'] = (u1['2012-I']/u1['2012'])*100
u1['2013-Share'] = (u1['2013-I']/u1['2013'])*100
u1['2014-Share'] = (u1['2014-I']/u1['2014'])*100
u1['2015-Share'] = (u1['2015-I']/u1['2015'])*100
u1['2016-Share'] = (u1['2016-I']/u1['2016'])*100
u1['Description'] = str('Utilities')

c1 = gdp.loc[(gdp.Description == 'Total GDP')]
c2 = gdp.loc[(gdp.Description == 'Construction')]
c1['2004-I'] = c2['2004'].values
c1['2005-I'] = c2['2005'].values
c1['2006-I'] = c2['2006'].values
c1['2007-I'] = c2['2007'].values
c1['2008-I'] = c2['2008'].values
c1['2009-I'] = c2['2009'].values
c1['2010-I'] = c2['2010'].values
c1['2011-I'] = c2['2011'].values
c1['2012-I'] = c2['2012'].values
c1['2013-I'] = c2['2013'].values
c1['2014-I'] = c2['2014'].values
c1['2015-I'] = c2['2015'].values
c1['2016-I'] = c2['2016'].values
c1['2004-Share'] = (c1['2004-I']/c1['2004'])*100
c1['2005-Share'] = (c1['2005-I']/c1['2005'])*100
c1['2006-Share'] = (c1['2006-I']/c1['2006'])*100
c1['2007-Share'] = (c1['2007-I']/c1['2007'])*100
c1['2008-Share'] = (c1['2008-I']/c1['2008'])*100
c1['2009-Share'] = (c1['2009-I']/c1['2009'])*100
c1['2010-Share'] = (c1['2010-I']/c1['2010'])*100
c1['2011-Share'] = (c1['2011-I']/c1['2011'])*100
c1['2012-Share'] = (c1['2012-I']/c1['2012'])*100
c1['2013-Share'] = (c1['2013-I']/c1['2013'])*100
c1['2014-Share'] = (c1['2014-I']/c1['2014'])*100
c1['2015-Share'] = (c1['2015-I']/c1['2015'])*100
c1['2016-Share'] = (c1['2016-I']/c1['2016'])*100
c1['Description'] = str('Construction')

m1 = gdp.loc[(gdp.Description == 'Total GDP')]
m2 = gdp.loc[(gdp.Description == 'Manufacturing')]
m1['2004-I'] = m2['2004'].values
m1['2005-I'] = m2['2005'].values
m1['2006-I'] = m2['2006'].values
m1['2007-I'] = m2['2007'].values
m1['2008-I'] = m2['2008'].values
m1['2009-I'] = m2['2009'].values
m1['2010-I'] = m2['2010'].values
m1['2011-I'] = m2['2011'].values
m1['2012-I'] = m2['2012'].values
m1['2013-I'] = m2['2013'].values
m1['2014-I'] = m2['2014'].values
m1['2015-I'] = m2['2015'].values
m1['2016-I'] = m2['2016'].values
m1['2004-Share'] = (m1['2004-I']/m1['2004'])*100
m1['2005-Share'] = (m1['2005-I']/m1['2005'])*100
m1['2006-Share'] = (m1['2006-I']/m1['2006'])*100
m1['2007-Share'] = (m1['2007-I']/m1['2007'])*100
m1['2008-Share'] = (m1['2008-I']/m1['2008'])*100
m1['2009-Share'] = (m1['2009-I']/m1['2009'])*100
m1['2010-Share'] = (m1['2010-I']/m1['2010'])*100
m1['2011-Share'] = (m1['2011-I']/m1['2011'])*100
m1['2012-Share'] = (m1['2012-I']/m1['2012'])*100
m1['2013-Share'] = (m1['2013-I']/m1['2013'])*100
m1['2014-Share'] = (m1['2014-I']/m1['2014'])*100
m1['2015-Share'] = (m1['2015-I']/m1['2015'])*100
m1['2016-Share'] = (m1['2016-I']/m1['2016'])*100
m1['Description'] = str('Manufacturing')

tran1 = gdp.loc[(gdp.Description == 'Total GDP')]
tran2 = gdp.loc[(gdp.Description == 'Transportation')]
tran1['2004-I'] = tran2['2004'].values
tran1['2005-I'] = tran2['2005'].values
tran1['2006-I'] = tran2['2006'].values
tran1['2007-I'] = tran2['2007'].values
tran1['2008-I'] = tran2['2008'].values
tran1['2009-I'] = tran2['2009'].values
tran1['2010-I'] = tran2['2010'].values
tran1['2011-I'] = tran2['2011'].values
tran1['2012-I'] = tran2['2012'].values
tran1['2013-I'] = tran2['2013'].values
tran1['2014-I'] = tran2['2014'].values
tran1['2015-I'] = tran2['2015'].values
tran1['2016-I'] = tran2['2016'].values
tran1['2004-Share'] = (tran1['2004-I']/tran1['2004'])*100
tran1['2005-Share'] = (tran1['2005-I']/tran1['2005'])*100
tran1['2006-Share'] = (tran1['2006-I']/tran1['2006'])*100
tran1['2007-Share'] = (tran1['2007-I']/tran1['2007'])*100
tran1['2008-Share'] = (tran1['2008-I']/tran1['2008'])*100
tran1['2009-Share'] = (tran1['2009-I']/tran1['2009'])*100
tran1['2010-Share'] = (tran1['2010-I']/tran1['2010'])*100
tran1['2011-Share'] = (tran1['2011-I']/tran1['2011'])*100
tran1['2012-Share'] = (tran1['2012-I']/tran1['2012'])*100
tran1['2013-Share'] = (tran1['2013-I']/tran1['2013'])*100
tran1['2014-Share'] = (tran1['2014-I']/tran1['2014'])*100
tran1['2015-Share'] = (tran1['2015-I']/tran1['2015'])*100
tran1['2016-Share'] = (tran1['2016-I']/tran1['2016'])*100
tran1['Description'] = str('Transportation')

i1 = gdp.loc[(gdp.Description == 'Total GDP')]
i2 = gdp.loc[(gdp.Description == 'Information')]
i1['2004-I'] = i2['2004'].values
i1['2005-I'] = i2['2005'].values
i1['2006-I'] = i2['2006'].values
i1['2007-I'] = i2['2007'].values
i1['2008-I'] = i2['2008'].values
i1['2009-I'] = i2['2009'].values
i1['2010-I'] = i2['2010'].values
i1['2011-I'] = i2['2011'].values
i1['2012-I'] = i2['2012'].values
i1['2013-I'] = i2['2013'].values
i1['2014-I'] = i2['2014'].values
i1['2015-I'] = i2['2015'].values
i1['2016-I'] = i2['2016'].values
i1['2004-Share'] = (i1['2004-I']/i1['2004'])*100
i1['2005-Share'] = (i1['2005-I']/i1['2005'])*100
i1['2006-Share'] = (i1['2006-I']/i1['2006'])*100
i1['2007-Share'] = (i1['2007-I']/i1['2007'])*100
i1['2008-Share'] = (i1['2008-I']/i1['2008'])*100
i1['2009-Share'] = (i1['2009-I']/i1['2009'])*100
i1['2010-Share'] = (i1['2010-I']/i1['2010'])*100
i1['2011-Share'] = (i1['2011-I']/i1['2011'])*100
i1['2012-Share'] = (i1['2012-I']/i1['2012'])*100
i1['2013-Share'] = (i1['2013-I']/i1['2013'])*100
i1['2014-Share'] = (i1['2014-I']/i1['2014'])*100
i1['2015-Share'] = (i1['2015-I']/i1['2015'])*100
i1['2016-Share'] = (i1['2016-I']/i1['2016'])*100
i1['Description'] = str('Information')

f1 = gdp.loc[(gdp.Description == 'Total GDP')]
f2 = gdp.loc[(gdp.Description == 'Finance')]
f1['2004-I'] = f2['2004'].values
f1['2005-I'] = f2['2005'].values
f1['2006-I'] = f2['2006'].values
f1['2007-I'] = f2['2007'].values
f1['2008-I'] = f2['2008'].values
f1['2009-I'] = f2['2009'].values
f1['2010-I'] = f2['2010'].values
f1['2011-I'] = f2['2011'].values
f1['2012-I'] = f2['2012'].values
f1['2013-I'] = f2['2013'].values
f1['2014-I'] = f2['2014'].values
f1['2015-I'] = f2['2015'].values
f1['2016-I'] = f2['2016'].values
f1['2004-Share'] = (f1['2004-I']/f1['2004'])*100
f1['2005-Share'] = (f1['2005-I']/f1['2005'])*100
f1['2006-Share'] = (f1['2006-I']/f1['2006'])*100
f1['2007-Share'] = (f1['2007-I']/f1['2007'])*100
f1['2008-Share'] = (f1['2008-I']/f1['2008'])*100
f1['2009-Share'] = (f1['2009-I']/f1['2009'])*100
f1['2010-Share'] = (f1['2010-I']/f1['2010'])*100
f1['2011-Share'] = (f1['2011-I']/f1['2011'])*100
f1['2012-Share'] = (f1['2012-I']/f1['2012'])*100
f1['2013-Share'] = (f1['2013-I']/f1['2013'])*100
f1['2014-Share'] = (f1['2014-I']/f1['2014'])*100
f1['2015-Share'] = (f1['2015-I']/f1['2015'])*100
f1['2016-Share'] = (f1['2016-I']/f1['2016'])*100
f1['Description'] = str('Finance')

p1 = gdp.loc[(gdp.Description == 'Total GDP')]
p2 = gdp.loc[(gdp.Description == 'Professional')]
p1['2004-I'] = p2['2004'].values
p1['2005-I'] = p2['2005'].values
p1['2006-I'] = p2['2006'].values
p1['2007-I'] = p2['2007'].values
p1['2008-I'] = p2['2008'].values
p1['2009-I'] = p2['2009'].values
p1['2010-I'] = p2['2010'].values
p1['2011-I'] = p2['2011'].values
p1['2012-I'] = p2['2012'].values
p1['2013-I'] = p2['2013'].values
p1['2014-I'] = p2['2014'].values
p1['2015-I'] = p2['2015'].values
p1['2016-I'] = p2['2016'].values
p1['2004-Share'] = (p1['2004-I']/p1['2004'])*100
p1['2005-Share'] = (p1['2005-I']/p1['2005'])*100
p1['2006-Share'] = (p1['2006-I']/p1['2006'])*100
p1['2007-Share'] = (p1['2007-I']/p1['2007'])*100
p1['2008-Share'] = (p1['2008-I']/p1['2008'])*100
p1['2009-Share'] = (p1['2009-I']/p1['2009'])*100
p1['2010-Share'] = (p1['2010-I']/p1['2010'])*100
p1['2011-Share'] = (p1['2011-I']/p1['2011'])*100
p1['2012-Share'] = (p1['2012-I']/p1['2012'])*100
p1['2013-Share'] = (p1['2013-I']/p1['2013'])*100
p1['2014-Share'] = (p1['2014-I']/p1['2014'])*100
p1['2015-Share'] = (p1['2015-I']/p1['2015'])*100
p1['2016-Share'] = (p1['2016-I']/p1['2016'])*100
p1['Description'] = str('Professional')

e1 = gdp.loc[(gdp.Description == 'Total GDP')]
e2 = gdp.loc[(gdp.Description == 'Education and Health')]
e1['2004-I'] = e2['2004'].values
e1['2005-I'] = e2['2005'].values
e1['2006-I'] = e2['2006'].values
e1['2007-I'] = e2['2007'].values
e1['2008-I'] = e2['2008'].values
e1['2009-I'] = e2['2009'].values
e1['2010-I'] = e2['2010'].values
e1['2011-I'] = e2['2011'].values
e1['2012-I'] = e2['2012'].values
e1['2013-I'] = e2['2013'].values
e1['2014-I'] = e2['2014'].values
e1['2015-I'] = e2['2015'].values
e1['2016-I'] = e2['2016'].values
e1['2004-Share'] = (e1['2004-I']/e1['2004'])*100
e1['2005-Share'] = (e1['2005-I']/e1['2005'])*100
e1['2006-Share'] = (e1['2006-I']/e1['2006'])*100
e1['2007-Share'] = (e1['2007-I']/e1['2007'])*100
e1['2008-Share'] = (e1['2008-I']/e1['2008'])*100
e1['2009-Share'] = (e1['2009-I']/e1['2009'])*100
e1['2010-Share'] = (e1['2010-I']/e1['2010'])*100
e1['2011-Share'] = (e1['2011-I']/e1['2011'])*100
e1['2012-Share'] = (e1['2012-I']/e1['2012'])*100
e1['2013-Share'] = (e1['2013-I']/e1['2013'])*100
e1['2014-Share'] = (e1['2014-I']/e1['2014'])*100
e1['2015-Share'] = (e1['2015-I']/e1['2015'])*100
e1['2016-Share'] = (e1['2016-I']/e1['2016'])*100
e1['Description'] = str('Education and Health')

r1 = gdp.loc[(gdp.Description == 'Total GDP')]
r2 = gdp.loc[(gdp.Description == 'Food and Recreation')]
r1['2004-I'] = r2['2004'].values
r1['2005-I'] = r2['2005'].values
r1['2006-I'] = r2['2006'].values
r1['2007-I'] = r2['2007'].values
r1['2008-I'] = r2['2008'].values
r1['2009-I'] = r2['2009'].values
r1['2010-I'] = r2['2010'].values
r1['2011-I'] = r2['2011'].values
r1['2012-I'] = r2['2012'].values
r1['2013-I'] = r2['2013'].values
r1['2014-I'] = r2['2014'].values
r1['2015-I'] = r2['2015'].values
r1['2016-I'] = r2['2016'].values
r1['2004-Share'] = (r1['2004-I']/r1['2004'])*100
r1['2005-Share'] = (r1['2005-I']/r1['2005'])*100
r1['2006-Share'] = (r1['2006-I']/r1['2006'])*100
r1['2007-Share'] = (r1['2007-I']/r1['2007'])*100
r1['2008-Share'] = (r1['2008-I']/r1['2008'])*100
r1['2009-Share'] = (r1['2009-I']/r1['2009'])*100
r1['2010-Share'] = (r1['2010-I']/r1['2010'])*100
r1['2011-Share'] = (r1['2011-I']/r1['2011'])*100
r1['2012-Share'] = (r1['2012-I']/r1['2012'])*100
r1['2013-Share'] = (r1['2013-I']/r1['2013'])*100
r1['2014-Share'] = (r1['2014-I']/r1['2014'])*100
r1['2015-Share'] = (r1['2015-I']/r1['2015'])*100
r1['2016-Share'] = (r1['2016-I']/r1['2016'])*100
r1['Description'] = str('Food and Recreation')

o1 = gdp.loc[(gdp.Description == 'Total GDP')]
o2 = gdp.loc[(gdp.Description == 'Other')]
o1['2004-I'] = o2['2004'].values
o1['2005-I'] = o2['2005'].values
o1['2006-I'] = o2['2006'].values
o1['2007-I'] = o2['2007'].values
o1['2008-I'] = o2['2008'].values
o1['2009-I'] = o2['2009'].values
o1['2010-I'] = o2['2010'].values
o1['2011-I'] = o2['2011'].values
o1['2012-I'] = o2['2012'].values
o1['2013-I'] = o2['2013'].values
o1['2014-I'] = o2['2014'].values
o1['2015-I'] = o2['2015'].values
o1['2016-I'] = o2['2016'].values
o1['2004-Share'] = (o1['2004-I']/o1['2004'])*100
o1['2005-Share'] = (o1['2005-I']/o1['2005'])*100
o1['2006-Share'] = (o1['2006-I']/o1['2006'])*100
o1['2007-Share'] = (o1['2007-I']/o1['2007'])*100
o1['2008-Share'] = (o1['2008-I']/o1['2008'])*100
o1['2009-Share'] = (o1['2009-I']/o1['2009'])*100
o1['2010-Share'] = (o1['2010-I']/o1['2010'])*100
o1['2011-Share'] = (o1['2011-I']/o1['2011'])*100
o1['2012-Share'] = (o1['2012-I']/o1['2012'])*100
o1['2013-Share'] = (o1['2013-I']/o1['2013'])*100
o1['2014-Share'] = (o1['2014-I']/o1['2014'])*100
o1['2015-Share'] = (o1['2015-I']/o1['2015'])*100
o1['2016-Share'] = (o1['2016-I']/o1['2016'])*100
o1['Description'] = str('Other')

g1 = gdp.loc[(gdp.Description == 'Total GDP')]
g2 = gdp.loc[(gdp.Description == 'Government')]
g1['2004-I'] = g2['2004'].values
g1['2005-I'] = g2['2005'].values
g1['2006-I'] = g2['2006'].values
g1['2007-I'] = g2['2007'].values
g1['2008-I'] = g2['2008'].values
g1['2009-I'] = g2['2009'].values
g1['2010-I'] = g2['2010'].values
g1['2011-I'] = g2['2011'].values
g1['2012-I'] = g2['2012'].values
g1['2013-I'] = g2['2013'].values
g1['2014-I'] = g2['2014'].values
g1['2015-I'] = g2['2015'].values
g1['2016-I'] = g2['2016'].values
g1['2004-Share'] = (g1['2004-I']/g1['2004'])*100
g1['2005-Share'] = (g1['2005-I']/g1['2005'])*100
g1['2006-Share'] = (g1['2006-I']/g1['2006'])*100
g1['2007-Share'] = (g1['2007-I']/g1['2007'])*100
g1['2008-Share'] = (g1['2008-I']/g1['2008'])*100
g1['2009-Share'] = (g1['2009-I']/g1['2009'])*100
g1['2010-Share'] = (g1['2010-I']/g1['2010'])*100
g1['2011-Share'] = (g1['2011-I']/g1['2011'])*100
g1['2012-Share'] = (g1['2012-I']/g1['2012'])*100
g1['2013-Share'] = (g1['2013-I']/g1['2013'])*100
g1['2014-Share'] = (g1['2014-I']/g1['2014'])*100
g1['2015-Share'] = (g1['2015-I']/g1['2015'])*100
g1['2016-Share'] = (g1['2016-I']/g1['2016'])*100
g1['Description'] = str('Government')

n1 = gdp.loc[(gdp.Description == 'Total GDP')]
n2 = gdp.loc[(gdp.Description == 'Natural Resources')]
n1['2004-I'] = n2['2004'].values
n1['2005-I'] = n2['2005'].values
n1['2006-I'] = n2['2006'].values
n1['2007-I'] = n2['2007'].values
n1['2008-I'] = n2['2008'].values
n1['2009-I'] = n2['2009'].values
n1['2010-I'] = n2['2010'].values
n1['2011-I'] = n2['2011'].values
n1['2012-I'] = n2['2012'].values
n1['2013-I'] = n2['2013'].values
n1['2014-I'] = n2['2014'].values
n1['2015-I'] = n2['2015'].values
n1['2016-I'] = n2['2016'].values
n1['2004-Share'] = (n1['2004-I']/n1['2004'])*100
n1['2005-Share'] = (n1['2005-I']/n1['2005'])*100
n1['2006-Share'] = (n1['2006-I']/n1['2006'])*100
n1['2007-Share'] = (n1['2007-I']/n1['2007'])*100
n1['2008-Share'] = (n1['2008-I']/n1['2008'])*100
n1['2009-Share'] = (n1['2009-I']/n1['2009'])*100
n1['2010-Share'] = (n1['2010-I']/n1['2010'])*100
n1['2011-Share'] = (n1['2011-I']/n1['2011'])*100
n1['2012-Share'] = (n1['2012-I']/n1['2012'])*100
n1['2013-Share'] = (n1['2013-I']/n1['2013'])*100
n1['2014-Share'] = (n1['2014-I']/n1['2014'])*100
n1['2015-Share'] = (n1['2015-I']/n1['2015'])*100
n1['2016-Share'] = (n1['2016-I']/n1['2016'])*100
n1['Description'] = str('Natural Resources')

trad1 = gdp.loc[(gdp.Description == 'Total GDP')]
trad2 = gdp.loc[(gdp.Description == 'Trade')]
trad1['2004-I'] = trad2['2004'].values
trad1['2005-I'] = trad2['2005'].values
trad1['2006-I'] = trad2['2006'].values
trad1['2007-I'] = trad2['2007'].values
trad1['2008-I'] = trad2['2008'].values
trad1['2009-I'] = trad2['2009'].values
trad1['2010-I'] = trad2['2010'].values
trad1['2011-I'] = trad2['2011'].values
trad1['2012-I'] = trad2['2012'].values
trad1['2013-I'] = trad2['2013'].values
trad1['2014-I'] = trad2['2014'].values
trad1['2015-I'] = trad2['2015'].values
trad1['2016-I'] = trad2['2016'].values
trad1['2004-Share'] = (trad1['2004-I']/trad1['2004'])*100
trad1['2005-Share'] = (trad1['2005-I']/trad1['2005'])*100
trad1['2006-Share'] = (trad1['2006-I']/trad1['2006'])*100
trad1['2007-Share'] = (trad1['2007-I']/trad1['2007'])*100
trad1['2008-Share'] = (trad1['2008-I']/trad1['2008'])*100
trad1['2009-Share'] = (trad1['2009-I']/trad1['2009'])*100
trad1['2010-Share'] = (trad1['2010-I']/trad1['2010'])*100
trad1['2011-Share'] = (trad1['2011-I']/trad1['2011'])*100
trad1['2012-Share'] = (trad1['2012-I']/trad1['2012'])*100
trad1['2013-Share'] = (trad1['2013-I']/trad1['2013'])*100
trad1['2014-Share'] = (trad1['2014-I']/trad1['2014'])*100
trad1['2015-Share'] = (trad1['2015-I']/trad1['2015'])*100
trad1['2016-Share'] = (trad1['2016-I']/trad1['2016'])*100
trad1['Description'] = str('Trade')

print(list(gdp.columns))
print()
u1.head()

['GeoFIPS', 'GeoName', 'Description', 'Unit', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', 'State']



Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,...,2007-Share,2008-Share,2009-Share,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share
0,"""01001""","Autauga, AL",Utilities,Billions of dollars,0.946963,0.969849,1.080319,1.136582,1.04368,1.124581,...,11.397242,10.23398,12.972654,17.808237,23.210284,29.830183,25.800096,25.116652,27.957215,26.587851
1,"""01003""","Baldwin, AL",Utilities,Billions of dollars,3.927737,4.473373,4.36974,4.83432,4.646661,4.520188,...,0.402104,0.457038,0.47852,0.468175,0.485469,0.443505,0.454429,0.470202,0.450323,0.476557
2,"""01005""","Barbour, AL",Utilities,Billions of dollars,0.681321,0.67201,0.761096,0.672612,0.569649,0.564304,...,1.097958,1.236551,1.299831,1.318649,1.282075,1.190082,1.165068,1.254109,1.327186,1.391635
3,"""01007""","Bibb, AL",Utilities,Billions of dollars,0.239625,0.241001,0.269903,0.294965,0.28189,0.295128,...,0.0,0.538863,0.0,0.0,0.522153,0.518429,0.494505,0.485783,0.0,0.59746
4,"""01009""","Blount, AL",Utilities,Billions of dollars,0.504418,0.524833,0.572324,0.727791,0.727683,0.728032,...,0.624218,0.586657,0.61261,0.524264,0.620811,0.530027,0.538508,0.626353,0.601951,0.653587


In [220]:
#Concatenate the industries and their shares together in a temporary data frame
test = pd.concat([u1, c1, m1, tran1, i1, f1, p1, e1, r1, o1, g1, n1, trad1])
test = test.drop(['2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016'], axis=1)
test = test.rename(columns = {'2004-I':'2004', '2005-I':'2005', '2006-I':'2006', '2007-I':'2007',
                              '2008-I':'2008', '2009-I':'2009', '2010-I':'2010', '2011-I':'2011',
                              '2012-I':'2012', '2013-I':'2013', '2014-I':'2014', '2015-I':'2015', '2016-I':'2016'})
test = test.reset_index(drop=True)
print(test.shape)
print()
test.head()

(40105, 31)



Unnamed: 0,GeoFIPS,GeoName,Description,Unit,State,2004,2005,2006,2007,2008,...,2007-Share,2008-Share,2009-Share,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share
0,"""01001""","Autauga, AL",Utilities,Billions of dollars,Alabama,0.081339,0.067098,0.111611,0.129539,0.10681,...,11.397242,10.23398,12.972654,17.808237,23.210284,29.830183,25.800096,25.116652,27.957215,26.587851
1,"""01003""","Baldwin, AL",Utilities,Billions of dollars,Alabama,0.01682,0.017515,0.018825,0.019439,0.021237,...,0.402104,0.457038,0.47852,0.468175,0.485469,0.443505,0.454429,0.470202,0.450323,0.476557
2,"""01005""","Barbour, AL",Utilities,Billions of dollars,Alabama,0.009381,0.006128,0.00734,0.007385,0.007044,...,1.097958,1.236551,1.299831,1.318649,1.282075,1.190082,1.165068,1.254109,1.327186,1.391635
3,"""01007""","Bibb, AL",Utilities,Billions of dollars,Alabama,0.002276,0.001535,0.001605,0.0,0.001519,...,0.0,0.538863,0.0,0.0,0.522153,0.518429,0.494505,0.485783,0.0,0.59746
4,"""01009""","Blount, AL",Utilities,Billions of dollars,Alabama,0.003801,0.003805,0.004299,0.004543,0.004269,...,0.624218,0.586657,0.61261,0.524264,0.620811,0.530027,0.538508,0.626353,0.601951,0.653587


In [221]:
#Slice from gdp the rows referring to aggregate GDP figures, and set its share columns to 100 by default 
total_gdp = gdp.loc[(gdp.Description == 'Total GDP')]
total_gdp['2004-Share'] = 100
total_gdp['2005-Share'] = 100
total_gdp['2006-Share'] = 100
total_gdp['2007-Share'] = 100
total_gdp['2008-Share'] = 100
total_gdp['2009-Share'] = 100
total_gdp['2010-Share'] = 100
total_gdp['2011-Share'] = 100
total_gdp['2012-Share'] = 100
total_gdp['2013-Share'] = 100
total_gdp['2014-Share'] = 100
total_gdp['2015-Share'] = 100
total_gdp['2016-Share'] = 100
total_gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,...,2007-Share,2008-Share,2009-Share,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share
0,"""01001""","Autauga, AL",Total GDP,Billions of dollars,0.946963,0.969849,1.080319,1.136582,1.04368,1.124581,...,100,100,100,100,100,100,100,100,100,100
1,"""01003""","Baldwin, AL",Total GDP,Billions of dollars,3.927737,4.473373,4.36974,4.83432,4.646661,4.520188,...,100,100,100,100,100,100,100,100,100,100
2,"""01005""","Barbour, AL",Total GDP,Billions of dollars,0.681321,0.67201,0.761096,0.672612,0.569649,0.564304,...,100,100,100,100,100,100,100,100,100,100
3,"""01007""","Bibb, AL",Total GDP,Billions of dollars,0.239625,0.241001,0.269903,0.294965,0.28189,0.295128,...,100,100,100,100,100,100,100,100,100,100
4,"""01009""","Blount, AL",Total GDP,Billions of dollars,0.504418,0.524833,0.572324,0.727791,0.727683,0.728032,...,100,100,100,100,100,100,100,100,100,100


In [222]:
#Concatenate the industries and total GDP figures together as gdp
gdp = pd.concat([test, total_gdp])
gdp = gdp.sort_values(by=['State','GeoName'])
gdp = gdp.reset_index(drop=True)
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,State,2004,2005,2006,2007,2008,...,2007-Share,2008-Share,2009-Share,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share
0,"""01001""","Autauga, AL",Utilities,Billions of dollars,Alabama,0.081339,0.067098,0.111611,0.129539,0.10681,...,11.397242,10.23398,12.972654,17.808237,23.210284,29.830183,25.800096,25.116652,27.957215,26.587851
1,"""01001""","Autauga, AL",Construction,Billions of dollars,Alabama,0.049238,0.045636,0.049644,0.068217,0.050474,...,6.001943,4.836157,3.958541,3.168697,3.378915,3.102288,3.08438,2.787066,2.678835,2.755858
2,"""01001""","Autauga, AL",Manufacturing,Billions of dollars,Alabama,0.192625,0.207433,0.22814,0.208033,0.185294,...,18.303387,17.753909,18.141779,17.050426,14.458899,13.520692,13.1702,15.083891,15.132207,16.347586
3,"""01001""","Autauga, AL",Transportation,Billions of dollars,Alabama,0.014354,0.016462,0.019876,0.020234,0.015024,...,1.78025,1.439522,1.059061,1.026057,1.134835,1.107235,1.058448,0.970788,1.016382,0.939498
4,"""01001""","Autauga, AL",Information,Billions of dollars,Alabama,0.011902,0.011854,0.012418,0.011649,0.009022,...,1.024915,0.864441,0.554873,0.46319,0.395795,0.599881,0.94704,1.133927,1.059923,1.504106


In [223]:
gdp.tail()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,State,2004,2005,2006,2007,2008,...,2007-Share,2008-Share,2009-Share,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share
43185,"""56045""","Weston, WY",Other,Billions of dollars,Wyoming,0.003756,0.004068,0.004817,0.004825,0.00468,...,1.284276,1.121974,1.551908,1.428538,1.229389,1.397087,2.87145,1.621536,1.376555,1.52342
43186,"""56045""","Weston, WY",Government,Billions of dollars,Wyoming,0.035767,0.036914,0.039838,0.044529,0.048209,...,11.852339,11.55753,19.676541,19.411814,18.756361,18.567055,38.614809,23.103195,21.8739,27.394468
43187,"""56045""","Weston, WY",Natural Resources,Billions of dollars,Wyoming,0.027781,0.054443,0.055318,0.063958,0.076119,...,17.02378,18.248618,20.105876,16.272415,14.45459,12.144851,0.0,19.816655,12.093575,12.077241
43188,"""56045""","Weston, WY",Trade,Billions of dollars,Wyoming,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,8.121915,8.319679,7.727121,0.0,0.0,0.0,0.0,0.0
43189,"""56045""","Weston, WY",Total GDP,Billions of dollars,Wyoming,0.179308,0.147899,0.162386,0.375698,0.417122,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0


In [224]:
inc.head()

Unnamed: 0,GeoFIPS,State,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,Total_Ab,Total_Re
0,"""01001""",Alabama,"Autauga, AL",Population (persons),Number of persons,48366,49676,51328,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243,6877,14.218666
1,"""01001""",Alabama,"Autauga, AL",Per capita personal income (dollars),Dollars,27542,28779,29784,31472,32863,32590,33348,34337,35067,35538,36786,38849,39883,12341,44.80793
2,"""01003""",Alabama,"Baldwin, AL",Population (persons),Number of persons,156266,162183,168121,172404,175827,179406,183112,186558,190145,194885,199183,202939,207601,51335,32.851036
3,"""01003""",Alabama,"Baldwin, AL",Per capita personal income (dollars),Dollars,30361,32329,34879,36092,35717,34663,36143,37881,38259,38222,39602,41613,43143,12782,42.100063
4,"""01005""",Alabama,"Barbour, AL",Population (persons),Number of persons,28287,28027,27861,27757,27808,27657,27327,27341,27169,26937,26755,26283,25806,-2481,-8.770813


In [225]:
#Calculate the absolute and relative changes of the industries between the years in gdp
gdp['Total_Ab'] = gdp['2016']-gdp['2004']
gdp['Total_Re'] = (gdp['Total_Ab']/gdp['2004'])*100
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,State,2004,2005,2006,2007,2008,...,2009-Share,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share,Total_Ab,Total_Re
0,"""01001""","Autauga, AL",Utilities,Billions of dollars,Alabama,0.081339,0.067098,0.111611,0.129539,0.10681,...,12.972654,17.808237,23.210284,29.830183,25.800096,25.116652,27.957215,26.587851,0.374971,460.997799
1,"""01001""","Autauga, AL",Construction,Billions of dollars,Alabama,0.049238,0.045636,0.049644,0.068217,0.050474,...,3.958541,3.168697,3.378915,3.102288,3.08438,2.787066,2.678835,2.755858,-0.001941,-3.942077
2,"""01001""","Autauga, AL",Manufacturing,Billions of dollars,Alabama,0.192625,0.207433,0.22814,0.208033,0.185294,...,18.141779,17.050426,14.458899,13.520692,13.1702,15.083891,15.132207,16.347586,0.087938,45.652433
3,"""01001""","Autauga, AL",Transportation,Billions of dollars,Alabama,0.014354,0.016462,0.019876,0.020234,0.015024,...,1.059061,1.026057,1.134835,1.107235,1.058448,0.970788,1.016382,0.939498,0.00177,12.331058
4,"""01001""","Autauga, AL",Information,Billions of dollars,Alabama,0.011902,0.011854,0.012418,0.011649,0.009022,...,0.554873,0.46319,0.395795,0.599881,0.94704,1.133927,1.059923,1.504106,0.013912,116.887918


In [226]:
gdp.loc[(gdp.GeoName == 'Broomfield, CO')]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,State,2004,2005,2006,2007,2008,...,2009-Share,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share,Total_Ab,Total_Re
3122,"""08014""","Broomfield, CO",Utilities,Billions of dollars,Colorado,0.002455,0.002062,0.002615,0.001502,0.002105,...,0.016034,0.01752,0.017466,0.00737,0.0,0.017439,0.021022,0.0,-0.002455,-100.0
3123,"""08014""","Broomfield, CO",Construction,Billions of dollars,Colorado,0.074455,0.077835,0.089941,0.0852,0.093985,...,1.706718,1.72155,1.550387,1.479661,1.657521,2.081573,2.336259,2.653176,0.101219,135.946545
3124,"""08014""","Broomfield, CO",Manufacturing,Billions of dollars,Colorado,0.469278,0.580422,0.712,0.931904,0.982008,...,25.944161,31.990907,38.379126,42.129564,36.511122,31.148477,24.486842,18.741311,0.771635,164.430252
3125,"""08014""","Broomfield, CO",Transportation,Billions of dollars,Colorado,0.0,0.0,0.0,0.0,0.0,...,0.20042,0.119976,0.230525,0.243591,0.310658,0.367124,0.428994,0.479289,0.031735,inf
3126,"""08014""","Broomfield, CO",Information,Billions of dollars,Colorado,0.850312,0.805042,0.923302,1.057729,1.190607,...,26.236872,24.497734,20.699302,17.035462,18.778587,20.46934,25.075207,27.048447,0.940639,110.622807
3127,"""08014""","Broomfield, CO",Finance,Billions of dollars,Colorado,0.350061,0.401637,0.408606,0.452243,0.475091,...,10.093274,9.090797,8.163884,7.90088,8.199793,8.760645,10.178524,11.378344,0.40333,115.217062
3128,"""08014""","Broomfield, CO",Professional,Billions of dollars,Colorado,0.0,0.678762,0.79968,0.900922,0.912078,...,18.055703,15.57669,15.115287,16.05591,18.507427,20.133871,20.779262,22.302999,1.476742,inf
3129,"""08014""","Broomfield, CO",Education and Health,Billions of dollars,Colorado,0.053082,0.055336,0.061412,0.068645,0.079941,...,1.761119,1.688383,1.619386,1.636712,1.867131,2.064645,2.104428,2.338524,0.101758,191.699635
3130,"""08014""","Broomfield, CO",Food and Recreation,Billions of dollars,Colorado,0.100512,0.11887,0.160251,0.15391,0.166568,...,2.692961,2.40937,2.337308,2.291221,2.95646,2.692171,2.99429,3.460408,0.128611,127.955866
3131,"""08014""","Broomfield, CO",Other,Billions of dollars,Colorado,0.045572,0.049578,0.046677,0.045181,0.047588,...,0.996397,0.88088,0.851711,0.819426,0.842421,0.941544,1.0674,1.14472,0.030223,66.319231


In [227]:
#In Broomfield County, Colorado's case, it is a relatively young county, having only been around since 2001.  In other
#counties that likewise report zeros for a given industry, there are some cases where they report an infinite rate
#of growth between election years.  To solve this, convert these infinite rows to zeros by default, and likewise fill
#any null values that come from dividing by zero
gdp.replace([np.inf, -np.inf], 0, inplace=True)
gdp = gdp.fillna(0)
gdp.loc[(gdp.GeoName == 'Broomfield, CO')]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,State,2004,2005,2006,2007,2008,...,2009-Share,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share,Total_Ab,Total_Re
3122,"""08014""","Broomfield, CO",Utilities,Billions of dollars,Colorado,0.002455,0.002062,0.002615,0.001502,0.002105,...,0.016034,0.01752,0.017466,0.00737,0.0,0.017439,0.021022,0.0,-0.002455,-100.0
3123,"""08014""","Broomfield, CO",Construction,Billions of dollars,Colorado,0.074455,0.077835,0.089941,0.0852,0.093985,...,1.706718,1.72155,1.550387,1.479661,1.657521,2.081573,2.336259,2.653176,0.101219,135.946545
3124,"""08014""","Broomfield, CO",Manufacturing,Billions of dollars,Colorado,0.469278,0.580422,0.712,0.931904,0.982008,...,25.944161,31.990907,38.379126,42.129564,36.511122,31.148477,24.486842,18.741311,0.771635,164.430252
3125,"""08014""","Broomfield, CO",Transportation,Billions of dollars,Colorado,0.0,0.0,0.0,0.0,0.0,...,0.20042,0.119976,0.230525,0.243591,0.310658,0.367124,0.428994,0.479289,0.031735,0.0
3126,"""08014""","Broomfield, CO",Information,Billions of dollars,Colorado,0.850312,0.805042,0.923302,1.057729,1.190607,...,26.236872,24.497734,20.699302,17.035462,18.778587,20.46934,25.075207,27.048447,0.940639,110.622807
3127,"""08014""","Broomfield, CO",Finance,Billions of dollars,Colorado,0.350061,0.401637,0.408606,0.452243,0.475091,...,10.093274,9.090797,8.163884,7.90088,8.199793,8.760645,10.178524,11.378344,0.40333,115.217062
3128,"""08014""","Broomfield, CO",Professional,Billions of dollars,Colorado,0.0,0.678762,0.79968,0.900922,0.912078,...,18.055703,15.57669,15.115287,16.05591,18.507427,20.133871,20.779262,22.302999,1.476742,0.0
3129,"""08014""","Broomfield, CO",Education and Health,Billions of dollars,Colorado,0.053082,0.055336,0.061412,0.068645,0.079941,...,1.761119,1.688383,1.619386,1.636712,1.867131,2.064645,2.104428,2.338524,0.101758,191.699635
3130,"""08014""","Broomfield, CO",Food and Recreation,Billions of dollars,Colorado,0.100512,0.11887,0.160251,0.15391,0.166568,...,2.692961,2.40937,2.337308,2.291221,2.95646,2.692171,2.99429,3.460408,0.128611,127.955866
3131,"""08014""","Broomfield, CO",Other,Billions of dollars,Colorado,0.045572,0.049578,0.046677,0.045181,0.047588,...,0.996397,0.88088,0.851711,0.819426,0.842421,0.941544,1.0674,1.14472,0.030223,66.319231


In [228]:
inc.head()

Unnamed: 0,GeoFIPS,State,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,Total_Ab,Total_Re
0,"""01001""",Alabama,"Autauga, AL",Population (persons),Number of persons,48366,49676,51328,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243,6877,14.218666
1,"""01001""",Alabama,"Autauga, AL",Per capita personal income (dollars),Dollars,27542,28779,29784,31472,32863,32590,33348,34337,35067,35538,36786,38849,39883,12341,44.80793
2,"""01003""",Alabama,"Baldwin, AL",Population (persons),Number of persons,156266,162183,168121,172404,175827,179406,183112,186558,190145,194885,199183,202939,207601,51335,32.851036
3,"""01003""",Alabama,"Baldwin, AL",Per capita personal income (dollars),Dollars,30361,32329,34879,36092,35717,34663,36143,37881,38259,38222,39602,41613,43143,12782,42.100063
4,"""01005""",Alabama,"Barbour, AL",Population (persons),Number of persons,28287,28027,27861,27757,27808,27657,27327,27341,27169,26937,26755,26283,25806,-2481,-8.770813


In [229]:
gdp.head()

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,State,2004,2005,2006,2007,2008,...,2009-Share,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share,Total_Ab,Total_Re
0,"""01001""","Autauga, AL",Utilities,Billions of dollars,Alabama,0.081339,0.067098,0.111611,0.129539,0.10681,...,12.972654,17.808237,23.210284,29.830183,25.800096,25.116652,27.957215,26.587851,0.374971,460.997799
1,"""01001""","Autauga, AL",Construction,Billions of dollars,Alabama,0.049238,0.045636,0.049644,0.068217,0.050474,...,3.958541,3.168697,3.378915,3.102288,3.08438,2.787066,2.678835,2.755858,-0.001941,-3.942077
2,"""01001""","Autauga, AL",Manufacturing,Billions of dollars,Alabama,0.192625,0.207433,0.22814,0.208033,0.185294,...,18.141779,17.050426,14.458899,13.520692,13.1702,15.083891,15.132207,16.347586,0.087938,45.652433
3,"""01001""","Autauga, AL",Transportation,Billions of dollars,Alabama,0.014354,0.016462,0.019876,0.020234,0.015024,...,1.059061,1.026057,1.134835,1.107235,1.058448,0.970788,1.016382,0.939498,0.00177,12.331058
4,"""01001""","Autauga, AL",Information,Billions of dollars,Alabama,0.011902,0.011854,0.012418,0.011649,0.009022,...,0.554873,0.46319,0.395795,0.599881,0.94704,1.133927,1.059923,1.504106,0.013912,116.887918


In [230]:
vote.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
0,2004,Alabama,"Autauga, AL",20081,15196,"Bush, George W.",4758,"Kerry, John F.",127,10438,R,75.673522,23.694039,0.632439,9,0,0
1,2004,Alabama,"Baldwin, AL",69320,52971,"Bush, George W.",15599,"Kerry, John F.",750,37372,R,76.415176,22.502885,1.081939,9,0,0
2,2004,Alabama,"Barbour, AL",10777,5899,"Bush, George W.",4832,"Kerry, John F.",46,1067,R,54.73694,44.836225,0.426835,9,0,0
3,2004,Alabama,"Bibb, AL",7600,5472,"Bush, George W.",2089,"Kerry, John F.",39,3383,R,72.0,27.486842,0.513158,9,0,0
4,2004,Alabama,"Blount, AL",21504,17386,"Bush, George W.",3938,"Kerry, John F.",180,13448,R,80.850074,18.312872,0.837054,9,0,0


In [231]:
#Slice out the GeoFIPS column from inc and reset its index numbers
fips = inc.loc[(inc.Description.str.contains('Population'))]
fips = fips.iloc[:,0:3]
fips = fips.reset_index(drop=True)
fips

Unnamed: 0,GeoFIPS,State,GeoName
0,"""01001""",Alabama,"Autauga, AL"
1,"""01003""",Alabama,"Baldwin, AL"
2,"""01005""",Alabama,"Barbour, AL"
3,"""01007""",Alabama,"Bibb, AL"
4,"""01009""",Alabama,"Blount, AL"
...,...,...,...
3080,"""56037""",Wyoming,"Sweetwater, WY"
3081,"""56039""",Wyoming,"Teton, WY"
3082,"""56041""",Wyoming,"Uinta, WY"
3083,"""56043""",Wyoming,"Washakie, WY"


In [232]:
#Slice each individual year and add the counties' GeoFIPS codes alongside them.  To avoid problems concatenating,
#reset the index numbers for the latter three years
v4 = vote.loc[(vote.Year == 2004)]
v4 = pd.concat([v4,fips.iloc[:,0]], axis=1)
v8 = vote.loc[(vote.Year == 2008)]
v8 = v8.reset_index(drop=True)
v8 = pd.concat([v8,fips.iloc[:,0]], axis=1)
v12 = vote.loc[(vote.Year == 2012)]
v12 = v12.reset_index(drop=True)
v12 = pd.concat([v12,fips.iloc[:,0]], axis=1)
v16 = vote.loc[(vote.Year == 2016)]
v16 = v16.reset_index(drop=True)
v16 = pd.concat([v16,fips.iloc[:,0]], axis=1)
v16.head()

Unnamed: 0,Year,State,Area,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes,GeoFIPS
0,2016,Alabama,"Autauga, AL",24973,18172,"Trump, Donald J.",5936,"Clinton, Hillary R.",865,12236,R,72.766588,23.769671,3.463741,9,0,0,"""01001"""
1,2016,Alabama,"Baldwin, AL",95215,72883,"Trump, Donald J.",18458,"Clinton, Hillary R.",3874,54425,R,76.545712,19.385601,4.068687,9,0,0,"""01003"""
2,2016,Alabama,"Barbour, AL",10469,5454,"Trump, Donald J.",4871,"Clinton, Hillary R.",144,583,R,52.096666,46.527844,1.37549,9,0,0,"""01005"""
3,2016,Alabama,"Bibb, AL",8819,6738,"Trump, Donald J.",1874,"Clinton, Hillary R.",207,4864,R,76.40322,21.249575,2.347205,9,0,0,"""01007"""
4,2016,Alabama,"Blount, AL",25588,22859,"Trump, Donald J.",2156,"Clinton, Hillary R.",573,20703,R,89.334844,8.425825,2.239331,9,0,0,"""01009"""


In [233]:
#Redefine vote with these smaller dataframes, and reset its index numbers
vote = pd.concat([v4,v8,v12,v16])
vote = vote.sort_values(by=['Year','State','Area'])
vote = vote.reset_index(drop=True)
vote = vote.reindex(columns = ['GeoFIPS','State','Area','Year','TotalVotes','RepVotes','RepCandidate','DemVotes',
                               'DemCandidate','ThirdVotes','PluralityVotes','PluralityParty','RepVotesTotalPercent',
                               'DemVotesTotalPercent','ThirdVotesTotalPercent','ElectoralRepVotes',
                               'ElectoralDemVotes','ElectoralOtherVotes'])
vote.tail()

Unnamed: 0,GeoFIPS,State,Area,Year,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes
12335,"""56037""",Wyoming,"Sweetwater, WY",2016,17130,12154,"Trump, Donald J.",3231,"Clinton, Hillary R.",1745,8923,R,70.951547,18.861646,10.186807,3,0,0
12336,"""56039""",Wyoming,"Teton, WY",2016,12627,3921,"Trump, Donald J.",7314,"Clinton, Hillary R.",1392,-3393,D,31.052507,57.923497,11.023996,3,0,0
12337,"""56041""",Wyoming,"Uinta, WY",2016,8470,6154,"Trump, Donald J.",1202,"Clinton, Hillary R.",1114,4952,R,72.656434,14.191263,13.152302,3,0,0
12338,"""56043""",Wyoming,"Washakie, WY",2016,3814,2911,"Trump, Donald J.",532,"Clinton, Hillary R.",371,2379,R,76.324069,13.94861,9.72732,3,0,0
12339,"""56045""",Wyoming,"Weston, WY",2016,3526,3033,"Trump, Donald J.",299,"Clinton, Hillary R.",194,2734,R,86.018151,8.479864,5.501985,3,0,0


In [234]:
#Add a VictoriousParty column whose value depends on which major party a state awards all (or in a few cases most when
#a third party is involved) of its electoral votes for a given year
vote['VictoriousParty'] = 'None'
vote['VictoriousParty'] = np.where((vote['ElectoralRepVotes'] > vote['ElectoralDemVotes']), 'R',
                                   vote['VictoriousParty'])
vote['VictoriousParty'] = np.where((vote['ElectoralRepVotes'] < vote['ElectoralDemVotes']), 'D',
                                   vote['VictoriousParty'])
vote.head()

Unnamed: 0,GeoFIPS,State,Area,Year,TotalVotes,RepVotes,RepCandidate,DemVotes,DemCandidate,ThirdVotes,PluralityVotes,PluralityParty,RepVotesTotalPercent,DemVotesTotalPercent,ThirdVotesTotalPercent,ElectoralRepVotes,ElectoralDemVotes,ElectoralOtherVotes,VictoriousParty
0,"""01001""",Alabama,"Autauga, AL",2004,20081,15196,"Bush, George W.",4758,"Kerry, John F.",127,10438,R,75.673522,23.694039,0.632439,9,0,0,R
1,"""01003""",Alabama,"Baldwin, AL",2004,69320,52971,"Bush, George W.",15599,"Kerry, John F.",750,37372,R,76.415176,22.502885,1.081939,9,0,0,R
2,"""01005""",Alabama,"Barbour, AL",2004,10777,5899,"Bush, George W.",4832,"Kerry, John F.",46,1067,R,54.73694,44.836225,0.426835,9,0,0,R
3,"""01007""",Alabama,"Bibb, AL",2004,7600,5472,"Bush, George W.",2089,"Kerry, John F.",39,3383,R,72.0,27.486842,0.513158,9,0,0,R
4,"""01009""",Alabama,"Blount, AL",2004,21504,17386,"Bush, George W.",3938,"Kerry, John F.",180,13448,R,80.850074,18.312872,0.837054,9,0,0,R


In [235]:
vote_groups

Unnamed: 0_level_0,Unnamed: 1_level_0,RepAveragePercent,DemAveragePercent,ThirdAveragePercent
State,Area,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,"Autauga, AL",73.636951,24.944773,1.418276
Alabama,"Baldwin, AL",76.360048,21.812804,1.827148
Alabama,"Barbour, AL",51.365440,47.920566,0.713994
Alabama,"Bibb, AL",73.418284,25.371230,1.210486
Alabama,"Blount, AL",85.119047,13.399093,1.481859
...,...,...,...,...
Wyoming,"Sweetwater, WY",66.521484,28.405306,5.073210
Wyoming,"Teton, WY",38.900963,56.340835,4.758202
Wyoming,"Uinta, WY",73.530346,20.839022,5.630632
Wyoming,"Washakie, WY",75.704660,20.086538,4.208802


In [236]:
#Reset the index for vote_groups and add the GeoFIPS column alongside it to correspond with each county
vote_groups = vote_groups.reset_index()
vote_groups = pd.concat([v16['GeoFIPS'], vote_groups], axis=1)
vote_groups.head()

Unnamed: 0,GeoFIPS,State,Area,RepAveragePercent,DemAveragePercent,ThirdAveragePercent
0,"""01001""",Alabama,"Autauga, AL",73.636951,24.944773,1.418276
1,"""01003""",Alabama,"Baldwin, AL",76.360048,21.812804,1.827148
2,"""01005""",Alabama,"Barbour, AL",51.36544,47.920566,0.713994
3,"""01007""",Alabama,"Bibb, AL",73.418284,25.37123,1.210486
4,"""01009""",Alabama,"Blount, AL",85.119047,13.399093,1.481859


In [237]:
#Create a new column that classifies the counties by how strongly they vote for either major party across all years on
#average
vote_groups['PartyPref'] = 'Tossup'
vote_groups['PartyPref'] = np.where(((vote_groups['RepAveragePercent'] - vote_groups['DemAveragePercent']) >= 15),
                                    'Solid R', vote_groups['PartyPref'])
vote_groups['PartyPref'] = np.where(((vote_groups['RepAveragePercent'] - vote_groups['DemAveragePercent']) >= 10) &
                                    ((vote_groups['RepAveragePercent'] - vote_groups['DemAveragePercent']) < 15),
                                    'Likely R', vote_groups['PartyPref'])
vote_groups['PartyPref'] = np.where(((vote_groups['RepAveragePercent'] - vote_groups['DemAveragePercent']) >= 5) &
                                    ((vote_groups['RepAveragePercent'] - vote_groups['DemAveragePercent']) < 10),
                                    'Tilt R', vote_groups['PartyPref'])
vote_groups['PartyPref'] = np.where(((vote_groups['DemAveragePercent'] - vote_groups['RepAveragePercent']) >= 5) &
                                    ((vote_groups['DemAveragePercent'] - vote_groups['RepAveragePercent']) < 10),
                                    'Tilt D', vote_groups['PartyPref'])
vote_groups['PartyPref'] = np.where(((vote_groups['DemAveragePercent'] - vote_groups['RepAveragePercent']) >= 10) &
                                    ((vote_groups['DemAveragePercent'] - vote_groups['RepAveragePercent']) < 15),
                                    'Likely D', vote_groups['PartyPref'])
vote_groups['PartyPref'] = np.where(((vote_groups['DemAveragePercent'] - vote_groups['RepAveragePercent']) >= 15),
                                    'Solid D', vote_groups['PartyPref'])
vote_groups.head(15)

Unnamed: 0,GeoFIPS,State,Area,RepAveragePercent,DemAveragePercent,ThirdAveragePercent,PartyPref
0,"""01001""",Alabama,"Autauga, AL",73.636951,24.944773,1.418276,Solid R
1,"""01003""",Alabama,"Baldwin, AL",76.360048,21.812804,1.827148,Solid R
2,"""01005""",Alabama,"Barbour, AL",51.36544,47.920566,0.713994,Tossup
3,"""01007""",Alabama,"Bibb, AL",73.418284,25.37123,1.210486,Solid R
4,"""01009""",Alabama,"Blount, AL",85.119047,13.399093,1.481859,Solid R
5,"""01011""",Alabama,"Bullock, AL",26.26765,73.344143,0.388207,Solid D
6,"""01013""",Alabama,"Butler, AL",56.329521,43.097998,0.572481,Likely R
7,"""01015""",Alabama,"Calhoun, AL",66.387228,31.906232,1.70654,Solid R
8,"""01017""",Alabama,"Chambers, AL",55.244703,43.767413,0.987884,Likely R
9,"""01019""",Alabama,"Cherokee, AL",75.10362,23.361164,1.535216,Solid R


In [238]:
inc.head()

Unnamed: 0,GeoFIPS,State,GeoName,Description,Unit,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,Total_Ab,Total_Re
0,"""01001""",Alabama,"Autauga, AL",Population (persons),Number of persons,48366,49676,51328,52405,53277,54135,54773,55227,54954,54727,54893,54864,55243,6877,14.218666
1,"""01001""",Alabama,"Autauga, AL",Per capita personal income (dollars),Dollars,27542,28779,29784,31472,32863,32590,33348,34337,35067,35538,36786,38849,39883,12341,44.80793
2,"""01003""",Alabama,"Baldwin, AL",Population (persons),Number of persons,156266,162183,168121,172404,175827,179406,183112,186558,190145,194885,199183,202939,207601,51335,32.851036
3,"""01003""",Alabama,"Baldwin, AL",Per capita personal income (dollars),Dollars,30361,32329,34879,36092,35717,34663,36143,37881,38259,38222,39602,41613,43143,12782,42.100063
4,"""01005""",Alabama,"Barbour, AL",Population (persons),Number of persons,28287,28027,27861,27757,27808,27657,27327,27341,27169,26937,26755,26283,25806,-2481,-8.770813


In [239]:
gdp.loc[(gdp.GeoName == 'Autauga, AL')]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,State,2004,2005,2006,2007,2008,...,2009-Share,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share,Total_Ab,Total_Re
0,"""01001""","Autauga, AL",Utilities,Billions of dollars,Alabama,0.081339,0.067098,0.111611,0.129539,0.10681,...,12.972654,17.808237,23.210284,29.830183,25.800096,25.116652,27.957215,26.587851,0.374971,460.997799
1,"""01001""","Autauga, AL",Construction,Billions of dollars,Alabama,0.049238,0.045636,0.049644,0.068217,0.050474,...,3.958541,3.168697,3.378915,3.102288,3.08438,2.787066,2.678835,2.755858,-0.001941,-3.942077
2,"""01001""","Autauga, AL",Manufacturing,Billions of dollars,Alabama,0.192625,0.207433,0.22814,0.208033,0.185294,...,18.141779,17.050426,14.458899,13.520692,13.1702,15.083891,15.132207,16.347586,0.087938,45.652433
3,"""01001""","Autauga, AL",Transportation,Billions of dollars,Alabama,0.014354,0.016462,0.019876,0.020234,0.015024,...,1.059061,1.026057,1.134835,1.107235,1.058448,0.970788,1.016382,0.939498,0.00177,12.331058
4,"""01001""","Autauga, AL",Information,Billions of dollars,Alabama,0.011902,0.011854,0.012418,0.011649,0.009022,...,0.554873,0.46319,0.395795,0.599881,0.94704,1.133927,1.059923,1.504106,0.013912,116.887918
5,"""01001""","Autauga, AL",Finance,Billions of dollars,Alabama,0.16207,0.162654,0.168607,0.185357,0.177828,...,15.479276,15.264394,14.442116,13.675151,14.325787,14.000841,13.344972,13.141673,0.063472,39.163324
6,"""01001""","Autauga, AL",Professional,Billions of dollars,Alabama,0.027695,0.032596,0.035347,0.037168,0.040336,...,3.724676,3.498264,3.205242,2.798683,2.761542,2.439772,2.378082,2.573424,0.016471,59.472829
7,"""01001""","Autauga, AL",Education and Health,Billions of dollars,Alabama,0.036768,0.039298,0.042385,0.047684,0.056765,...,5.583591,5.483983,5.186116,4.801671,4.854428,4.752518,4.464685,4.592495,0.04205,114.365753
8,"""01001""","Autauga, AL",Food and Recreation,Billions of dollars,Alabama,0.038479,0.037655,0.038293,0.037735,0.036903,...,3.345246,3.203809,2.913711,2.734802,2.932496,3.254803,2.910332,3.215352,0.016704,43.410692
9,"""01001""","Autauga, AL",Other,Billions of dollars,Alabama,0.032253,0.034738,0.037279,0.038397,0.041144,...,3.817777,3.737721,3.279738,2.905466,3.045483,2.927993,2.698106,2.509738,0.01082,33.547267


In [240]:
#Replicate the rows in vote_groups twice before adding the PartyPref column to inc
v1 = pd.concat([vote_groups]*2, ignore_index=True)
v1 = v1.sort_values(by=['State','Area']).reset_index(drop=True)
inc = pd.concat([inc, v1['PartyPref']], axis=1)
inc.head()

Unnamed: 0,GeoFIPS,State,GeoName,Description,Unit,2004,2005,2006,2007,2008,...,2010,2011,2012,2013,2014,2015,2016,Total_Ab,Total_Re,PartyPref
0,"""01001""",Alabama,"Autauga, AL",Population (persons),Number of persons,48366,49676,51328,52405,53277,...,54773,55227,54954,54727,54893,54864,55243,6877,14.218666,Solid R
1,"""01001""",Alabama,"Autauga, AL",Per capita personal income (dollars),Dollars,27542,28779,29784,31472,32863,...,33348,34337,35067,35538,36786,38849,39883,12341,44.80793,Solid R
2,"""01003""",Alabama,"Baldwin, AL",Population (persons),Number of persons,156266,162183,168121,172404,175827,...,183112,186558,190145,194885,199183,202939,207601,51335,32.851036,Solid R
3,"""01003""",Alabama,"Baldwin, AL",Per capita personal income (dollars),Dollars,30361,32329,34879,36092,35717,...,36143,37881,38259,38222,39602,41613,43143,12782,42.100063,Solid R
4,"""01005""",Alabama,"Barbour, AL",Population (persons),Number of persons,28287,28027,27861,27757,27808,...,27327,27341,27169,26937,26755,26283,25806,-2481,-8.770813,Tossup


In [241]:
#Replicate the rows in vote_groups 14 times before adding the PartyPref column to gdp
v2 = pd.concat([vote_groups]*14, ignore_index=True)
v2 = v2.sort_values(by=['State','Area']).reset_index(drop=True)
gdp = pd.concat([gdp, v2['PartyPref']], axis=1)
gdp.loc[(gdp.GeoName == 'Autauga, AL')]

Unnamed: 0,GeoFIPS,GeoName,Description,Unit,State,2004,2005,2006,2007,2008,...,2010-Share,2011-Share,2012-Share,2013-Share,2014-Share,2015-Share,2016-Share,Total_Ab,Total_Re,PartyPref
0,"""01001""","Autauga, AL",Utilities,Billions of dollars,Alabama,0.081339,0.067098,0.111611,0.129539,0.10681,...,17.808237,23.210284,29.830183,25.800096,25.116652,27.957215,26.587851,0.374971,460.997799,Solid R
1,"""01001""","Autauga, AL",Construction,Billions of dollars,Alabama,0.049238,0.045636,0.049644,0.068217,0.050474,...,3.168697,3.378915,3.102288,3.08438,2.787066,2.678835,2.755858,-0.001941,-3.942077,Solid R
2,"""01001""","Autauga, AL",Manufacturing,Billions of dollars,Alabama,0.192625,0.207433,0.22814,0.208033,0.185294,...,17.050426,14.458899,13.520692,13.1702,15.083891,15.132207,16.347586,0.087938,45.652433,Solid R
3,"""01001""","Autauga, AL",Transportation,Billions of dollars,Alabama,0.014354,0.016462,0.019876,0.020234,0.015024,...,1.026057,1.134835,1.107235,1.058448,0.970788,1.016382,0.939498,0.00177,12.331058,Solid R
4,"""01001""","Autauga, AL",Information,Billions of dollars,Alabama,0.011902,0.011854,0.012418,0.011649,0.009022,...,0.46319,0.395795,0.599881,0.94704,1.133927,1.059923,1.504106,0.013912,116.887918,Solid R
5,"""01001""","Autauga, AL",Finance,Billions of dollars,Alabama,0.16207,0.162654,0.168607,0.185357,0.177828,...,15.264394,14.442116,13.675151,14.325787,14.000841,13.344972,13.141673,0.063472,39.163324,Solid R
6,"""01001""","Autauga, AL",Professional,Billions of dollars,Alabama,0.027695,0.032596,0.035347,0.037168,0.040336,...,3.498264,3.205242,2.798683,2.761542,2.439772,2.378082,2.573424,0.016471,59.472829,Solid R
7,"""01001""","Autauga, AL",Education and Health,Billions of dollars,Alabama,0.036768,0.039298,0.042385,0.047684,0.056765,...,5.483983,5.186116,4.801671,4.854428,4.752518,4.464685,4.592495,0.04205,114.365753,Solid R
8,"""01001""","Autauga, AL",Food and Recreation,Billions of dollars,Alabama,0.038479,0.037655,0.038293,0.037735,0.036903,...,3.203809,2.913711,2.734802,2.932496,3.254803,2.910332,3.215352,0.016704,43.410692,Solid R
9,"""01001""","Autauga, AL",Other,Billions of dollars,Alabama,0.032253,0.034738,0.037279,0.038397,0.041144,...,3.737721,3.279738,2.905466,3.045483,2.927993,2.698106,2.509738,0.01082,33.547267,Solid R


## Part 8: Exporting the Data 

In [242]:
#Export vote, inc, gdp, and vote_groups to Excel to make visualizations on Tableau
#vote.to_excel(r'vote_10-05.xlsx', index=True)
#inc.to_excel(r'inc_10-08.xlsx', index=True)
#gdp.to_excel(r'gdp_10-08.xlsx', index=True)
#vote_groups.to_excel(r'vote_groups_09-30.xlsx', index=True)

## References 

In [243]:
#https://www.datasciencemadesimple.com/repeat-or-replicate-the-dataframe-in-pandas-python/
#https://pythonexamples.org/pandas-dataframe-replace-values-in-column-based-on-condition/
#https://stackoverflow.com/questions/56759142/pandas-replace-the-asterisk-sign-from-two-columns-in-one-go
#https://stackoverflow.com/questions/50788508/how-can-i-replicate-rows-in-pandas
#https://stackoverflow.com/questions/42523287/pandas-groupby-and-calculate-time-difference-from-first-element-in-each-group
#https://stackoverflow.com/questions/17477979/dropping-infinite-values-from-dataframes-in-pandas
#https://en.wikipedia.org/wiki/Milton,_Georgia
#https://en.wikipedia.org/wiki/Kalawao_County,_Hawaii
#https://en.wikipedia.org/wiki/Maui_County,_Hawaii
#https://en.wikipedia.org/wiki/Adams_County,_Indiana
#https://en.wikipedia.org/wiki/Allen_County,_Indiana
#https://en.wikipedia.org/wiki/Bartholomew_County,_Indiana
#https://en.wikipedia.org/wiki/Brown_County,_Indiana
#https://en.wikipedia.org/wiki/Carroll_County,_Indiana
#https://en.wikipedia.org/wiki/Cass_County,_Indiana
#https://en.wikipedia.org/wiki/Clark_County,_Indiana
#https://en.wikipedia.org/wiki/Clay_County,_Indiana
#https://en.wikipedia.org/wiki/Crawford_County,_Indiana
#https://en.wikipedia.org/wiki/Decatur_County,_Indiana
#https://en.wikipedia.org/wiki/DeKalb_County,_Indiana
#https://en.wikipedia.org/wiki/Delaware_County,_Indiana
#https://en.wikipedia.org/wiki/Elkhart_County,_Indiana
#https://en.wikipedia.org/wiki/Floyd_County,_Indiana
#https://en.wikipedia.org/wiki/Fountain_County,_Indiana
#https://en.wikipedia.org/wiki/Franklin_County,_Indiana
#https://en.wikipedia.org/wiki/Gibson_County,_Indiana
#https://en.wikipedia.org/wiki/Grant_County,_Indiana
#https://en.wikipedia.org/wiki/Hamilton_County,_Indiana
#https://en.wikipedia.org/wiki/Hancock_County,_Indiana
#https://en.wikipedia.org/wiki/Harrison_County,_Indiana
#https://en.wikipedia.org/wiki/Hendricks_County,_Indiana
#https://en.wikipedia.org/wiki/Jackson_County,_Indiana
#https://en.wikipedia.org/wiki/Jefferson_County,_Indiana
#https://en.wikipedia.org/wiki/Jennings_County,_Indiana
#https://en.wikipedia.org/wiki/Kosciusko_County,_Indiana
#https://en.wikipedia.org/wiki/Lake_County,_Indiana
#https://en.wikipedia.org/wiki/LaPorte_County,_Indiana
#https://en.wikipedia.org/wiki/Marion_County,_Indiana
#https://en.wikipedia.org/wiki/Marshall_County,_Indiana
#https://en.wikipedia.org/wiki/Monroe_County,_Indiana
#https://en.wikipedia.org/wiki/Montgomery_County,_Indiana
#https://en.wikipedia.org/wiki/Morgan_County,_Indiana
#https://en.wikipedia.org/wiki/Owen_County,_Indiana
#https://en.wikipedia.org/wiki/Porter_County,_Indiana
#https://en.wikipedia.org/wiki/Posey_County,_Indiana
#https://en.wikipedia.org/wiki/Randolph_County,_Indiana
#https://en.wikipedia.org/wiki/St._Joseph_County,_Indiana
#https://en.wikipedia.org/wiki/Steuben_County,_Indiana
#https://en.wikipedia.org/wiki/Tippecanoe_County,_Indiana
#https://en.wikipedia.org/wiki/Vanderburgh_County,_Indiana
#https://en.wikipedia.org/wiki/Vigo_County,_Indiana
#https://en.wikipedia.org/wiki/Warrick_County,_Indiana
#https://en.wikipedia.org/wiki/Washington_County,_Indiana
#https://en.wikipedia.org/wiki/Wells_County,_Indiana
#https://en.wikipedia.org/wiki/White_County,_Indiana
#https://en.wikipedia.org/wiki/Whitley_County,_Indiana
#https://en.wikipedia.org/wiki/Kansas_City,_Missouri
#https://en.wikipedia.org/wiki/Dutchess_County,_New_York
#https://en.wikipedia.org/wiki/Clark_County,_Ohio
#https://en.wikipedia.org/wiki/Oglala_Lakota_County,_South_Dakota
#https://en.wikipedia.org/wiki/2004_United_States_presidential_election_in_Hawaii
#https://en.wikipedia.org/wiki/2008_United_States_presidential_election_in_Hawaii
#https://en.wikipedia.org/wiki/2012_United_States_presidential_election_in_Hawaii
#https://en.wikipedia.org/wiki/2016_United_States_presidential_election_in_Hawaii
#https://en.wikipedia.org/wiki/2012_United_States_presidential_election_in_Ohio
#https://en.wikipedia.org/wiki/2016_United_States_presidential_election_in_Indiana
#https://en.wikipedia.org/wiki/2016_United_States_presidential_election_in_New_York
#https://en.wikipedia.org/wiki/2016_United_States_presidential_election_in_Rhode_Island