In [1]:
# library
import pandas as pd
import matplotlib

%matplotlib inline



In [2]:
# directories
dirData = '../data/'

dirDataExt = dirData + 'external/'
dirElection = dirDataExt + 'election/'

dirDataProc = dirData + 'processed/'

In [3]:
# read in the slimmed election data
iaElec = pd.read_csv(dirDataProc + '/elections/election-populations.csv', index_col=False,
                    thousands=',')

iaElec.head()

Unnamed: 0,Race,Total Population,Citizen Population,Registered Population,Voting Population,Year
0,Total,2320.0,2232.0,1745.0,1548.0,2012
1,Male,1135.0,1084.0,838.0,733.0,2012
2,Female,1186.0,1148.0,906.0,816.0,2012
3,White alone,2173.0,2123.0,1673.0,1482.0,2012
4,.White non-Hispanic alone,2067.0,2057.0,1644.0,1455.0,2012


### Formatting

In [4]:
# year 2000 has 'All' instead of total
iaElec['Race'] = iaElec['Race'].apply(lambda x: 'Total' if (x == 'All') else x)

# get rid of . in rows
iaElec['Race'] = iaElec['Race'].apply(lambda x: x.replace('.', ''))

#clean races for reading
iaElec['Race'] = iaElec['Race'].apply(lambda x: x.replace(' alone', ''))

# And normalize abbreviations
iaElec['Race'] = iaElec['Race'].apply(lambda x: x[4:] + ' non-Hispanic' if (x[:3] == 'N-H') else x)

In [6]:
iaElec.head(50)

Unnamed: 0,Race,Total Population,Citizen Population,Registered Population,Voting Population,Year
0,Total,2320.0,2232.0,1745.0,1548.0,2012
1,Male,1135.0,1084.0,838.0,733.0,2012
2,Female,1186.0,1148.0,906.0,816.0,2012
3,White,2173.0,2123.0,1673.0,1482.0,2012
4,White non-Hispanic,2067.0,2057.0,1644.0,1455.0,2012
5,Black,54.0,51.0,35.0,33.0,2012
6,Asian,61.0,35.0,21.0,17.0,2012
7,Hispanic,120.0,71.0,32.0,30.0,2012
8,White or in combination,2197.0,2138.0,1685.0,1494.0,2012
9,Black or in combination,67.0,59.0,41.0,39.0,2012


In [7]:
# Get just the populations we need
iaPrimaryRaces = iaElec[iaElec['Race'].isin(['Total', 'Asian', 'Hispanic', 'White', 'Black'])]


In [43]:
# write it out
iaPrimaryRaces.to_csv(dirDataProc + 'elections/election-primary-races.csv', index=False)

In [21]:
test = ia.groupby('Year')

holder = {}

for name, group in test:
    
    # Total first, then hispanic (group.iloc[0])
    print (name)
    print ('Eligible Population: {}'.format(group.iloc[0]['Citizen Population']))
    print ('Hispanic Population: {}'.format(group.iloc[1]['Citizen Population']))
    print ('\t% Hispanics who voted: {}%'.format((group.iloc[1]['Voting Population'] / group.iloc[1]['Citizen Population'])*100))
    print ('\t% Voters who were hispanic: {}'.format((group.iloc[1]['Citizen Population'] / group.iloc[0]['Citizen Population'])*100))
    print ('\tHispanics % contribution of all of the votes: {}'.format((group.iloc[1]['Voting Population'] / group.iloc[0]['Voting Population'])*100))
    print ('\n')


1996
Eligible Population: 1948.0
Hispanic Population: 24.0
	% Hispanics who voted: 25.0%
	% Voters who were hispanic: 1.23203285421
	Hispanics % contribution of all of the votes: 0.464756003098


2000
Eligible Population: 2008.0
Hispanic Population: 29.0
	% Hispanics who voted: 65.5172413793%
	% Voters who were hispanic: 1.44422310757
	Hispanics % contribution of all of the votes: 1.40428677014


2004
Eligible Population: 2136.0
Hispanic Population: 24.0
	% Hispanics who voted: 50.0%
	% Voters who were hispanic: 1.12359550562
	Hispanics % contribution of all of the votes: 0.788436268068


2008
Eligible Population: 2137.0
Hispanic Population: 31.0
	% Hispanics who voted: 64.5161290323%
	% Voters who were hispanic: 1.45063172672
	Hispanics % contribution of all of the votes: 1.33244503664


2012
Eligible Population: 2232.0
Hispanic Population: 71.0
	% Hispanics who voted: 42.2535211268%
	% Voters who were hispanic: 3.18100358423
	Hispanics % contribution of all of the votes: 1.9379844961

In [11]:
iaPrimaryRaces['Engagement'] = iaPrimaryRaces.apply(lambda x: iaPrimaryRaces[iaPrimaryRaces.Race == 'Total']

Unnamed: 0,Race,Total Population,Citizen Population,Registered Population,Voting Population,Year
0,Total,2320.0,2232.0,1745.0,1548.0,2012
11,Total,2244.0,2137.0,1630.0,1501.0,2008
22,Total,2212.0,2136.0,1674.0,1522.0,2004
34,Total,2110.0,2008.0,1524.0,1353.0,2000
42,Total,2848.0,1948.0,1543.0,1291.0,1996


In [17]:
# Write out thise weird long thing
lineChart = iaPrimaryRaces.pivot(index='Year', columns='Race', values='Voting Population').transpose().reset_index()

lineChart.head()

Year,Race,1996,2000,2004,2008,2012
0,Asian,,,11.0,11.0,17.0
1,Black,20.0,34.0,23.0,42.0,33.0
2,Hispanic,6.0,19.0,12.0,20.0,30.0
3,Total,1291.0,1353.0,1522.0,1501.0,1548.0
4,White,1265.0,1320.0,1473.0,1422.0,1482.0


In [18]:
# Write out this linechart ish
#iaPrimaryRaces.pivot(index='Year', columns='Race', values='Voting Population').to_csv(dirDataProc + 'elections/election-voting-populations.csv')

lineChart.to_csv(dirDataProc + 'elections/election-voting-populations.csv', index=False)