# This notebook downloads the 2012/2016 county-level vote data

In [1]:
from bs4 import BeautifulSoup # for scraping 2016 results

import pandas as pd
import urllib

## 2012: Guardian consolidated county-level vote data

In [2]:
# import Guardian data
# https://www.theguardian.com/news/datablog/2012/nov/07/us-2012-election-county-results-download#data
# NOTE: xlrd package needed (not installed by deafault) 
# to read data and format information from older Excel files (ie: .xls)
# get all data within the 'FULL DATA' worksheet and pass it into a DataFrame
guardian_full = pd.read_excel('http://image.guardian.co.uk/sys-files/Guardian/documents/2012/11/14/US_elect_county.xls',
                     sheetname = 'FULL DATA',
                     header = 0,
                     index_col=None,
                     convert_float = True,
                     converters={'FIPS Code':str},
                     parse_cols = "A, D, E, K, M, T, Y, AF, AK, AR, AW, BD, BI, BP")
                     
guardian_full.head()



Unnamed: 0,State Postal,FIPS Code,County Name,TOTAL VOTES CAST,Party,Votes,Party.1,Votes.1,Party.2,Votes.2,Party.3,Votes.3,Party.4,Votes.4
0,AK,0,Alaska,220596,Dem,91696,GOP,121234,Lib,5539.0,Grn,2127.0,,
1,AK,2000,Alaska,220596,Dem,91696,GOP,121234,Lib,5539.0,Grn,2127.0,,
2,AL,0,Alabama,2064699,Dem,793620,GOP,1252453,Ind,2961.0,Ind,12283.0,Ind,3382.0
3,AL,1001,Autauga,23909,Dem,6354,GOP,17366,Ind,31.0,Ind,136.0,Ind,22.0
4,AL,1003,Baldwin,84988,Dem,18329,GOP,65772,Ind,122.0,Ind,597.0,Ind,168.0


In [3]:
guardian_full.to_csv('2012_data_Guardian.csv', sep=',')

## 2016: Townhall county-by-county vote data

In [4]:
# each page has a summary table that rolls up results at the state level
# get rid of it
def cond(x):
    if x:
        return x.startswith("table ec-table") and not "table ec-table ec-table-summary" in x
    else:
        return False

In [5]:
# list of state abbreviations
states = ['AL','AK','AZ','AR','CA','CO','CT','DC','DE','FL','GA','HI','ID','IL','IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI','SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']

# headers for csv export
data = [['state_abbr', 'county_name', 'party', 'votes_total_2016']]

In [6]:
# loop through each state's web page http://townhall.com/election/2016/president/%s/county, where %s is the state abbr
for state in states:
    r = urllib.request.urlopen('http://townhall.com/election/2016/president/' + state + '/county').read()
    soup = BeautifulSoup(r, "html.parser")

    # loop through each <table> tag with .ec-table class
    tables = soup.findAll('table', attrs={'class':cond})

    for table in tables:
        if table.findParent("table") is None:
            table_body = table.find('tbody')

            rows = table_body.find_all('tr')
            for row in rows:
                cols = row.find_all('td')
                # first tbody tr has four td
                if len(cols) == 4:
                    # strip text from each td
                    divs = cols[0].find_all('div')
                    county = divs[0].text.strip()
                    party = cols[1]['class'][0]
                    total_votes_2016 = int(cols[2].text.strip().replace(',','').replace('-','0'))
                # all other tbody tr have three td
                else:
                    party = cols[1]['class'][0]
                    total_votes_2016 = int(cols[1].text.strip().replace(',','').replace('-','0'))
                    
                #combine each row's results
                rowData = [state,county,party,total_votes_2016]
                data.append(rowData)

In [7]:
townhall = pd.DataFrame(data) # throw results in dataframe
new_header = townhall.iloc[0] #grab the first row for the header
townhall = townhall[1:] #take the data less the header row
townhall.columns = new_header #set the header row as the df header
townhall['votes_total_2016'] = townhall['votes_total_2016'].astype('float64')
print(townhall.shape[0])
townhall.head()

14188


Unnamed: 0,state_abbr,county_name,party,votes_total_2016
1,AL,Autauga,GOP,18110.0
2,AL,Autauga,DEM,5908.0
3,AL,Autauga,IND,538.0
4,AL,Autauga,IND,105.0
5,AL,Baldwin,GOP,72780.0


In [8]:
townhall.to_csv('2016_data_Townhall.csv', sep=',')