In [133]:
import requests
import pandas as pd
import numpy as np
import json

from collections import Counter

In [134]:
# candidate = 'Yang'

def load_and_process_candidate_data(candidate_name):
    contributions = pd.read_csv('candidate_data/{}-2017-2018.csv'.format(candidate))

    is_individual = contributions['entity_type_desc']=='INDIVIDUAL'
    contributions = contributions[is_individual]
    contributions.head(10)

    contributions['contributor_compound_key'] = contributions[['contributor_name', 'contributor_street_1']].apply(lambda x : '{} {}'.format(x[0],x[1]), axis=1)
    contributions['Full_City'] = contributions[['contributor_city', 'contributor_state']].apply(lambda x : '{}, {}'.format(x[0],x[1]), axis=1)
    contributions['formatted_zip'] = contributions['contributor_zip'].apply(str).str.slice(0,5)

    print(contributions['contributor_compound_key'].nunique())
    return contributions

In [135]:
# Fuzzy Search

cities = pd.read_csv('data/uscities.csv', encoding="ISO-8859-1")
zipcodes = pd.read_csv('data/uszips.csv', encoding="ISO-8859-1")

def find_county_by_city(city):   
        try:
            index = cities.index[cities['contribution_data_format'] == city][0]
            return cities.get_value(index, 'county_name')
        except IndexError:
            return None

def find_county_by_zip(zipcode):

    try:
        index = zipcodes.index[zipcodes['zip'] == int(zipcode)][0]
        return zipcodes.get_value(index, 'county_name')
    except IndexError:
        return None
    except ValueError:
        return None

def find_county(city, zipcode, city_dict, zipcode_dict):

    if city in ny_dict.keys():
        return ny_dict[city]

    try:
        return zipcode_dict[str(int(zipcode))]

    except:
        if city_dict[city] != None:
            return city_dict[city]
        else:
            return None

ny_dict = {
    'QUEENS, NY': 'Queens',
    'BROOKLYN, NY': 'Kings',
    'MANHATTAN, NY': 'New York',
    'STATEN ISLAND, NY': 'Richmond',
    'BRONX, NY': 'Bronx'
}


def encode_counties(contributions, candidate):
    
    with open('data/city_county_dict.json', 'r') as f:
        city_county_dict = json.load(f)

    with open('data/zip_county_dict.json', 'r') as f:
        zip_county_dict = json.load(f)

    unique_cities = list(contributions.Full_City.unique())
    unique_zipcodes = list(contributions.formatted_zip.unique())

    for city in unique_cities:
        if city not in city_county_dict.keys(): 
            city_county_dict[city] = find_county_by_city(city)

    for zipcode in unique_zipcodes:
            try:
                if str(int(zipcode)) not in zip_county_dict.keys():
                    zip_county_dict[str(int(zipcode))] = find_county_by_zip(zipcode)
            except ValueError:
                pass

#     print('Finished Constructing County Dicts')

    contributions['County'] = contributions[['Full_City', 'formatted_zip']].apply(lambda x: find_county(x[0], x[1], city_county_dict, zip_county_dict), axis=1)
    contributions.to_csv('data/{}_Counties.csv'.format(candidate))
    
    with open('data/city_county_dict.json', 'w') as f:
        city_county_dict = json.dump(city_county_dict, f)

    with open('data/zip_county_dict.json', 'w') as f:
        zip_county_dict = json.dump(zip_county_dict, f)

    return contributions



In [136]:
# by_county = contributions.groupby(['County'])['contribution_receipt_amount'].sum().reset_index()
# by_county.sort_values(['contribution_receipt_amount'])

In [137]:
with open('data/state_id_codes.json', 'r') as f:
    state_codes = json.load(f)
    
def get_county_counts(contributions):

#     contributions = pd.read_csv('data/{}_Counties.csv'.format(candidate))

    contributions['Encoded_County'] = contributions[['County', 'contributor_state']].apply(lambda x : '{} - {} - {}'.format(state_codes[x[1]],x[0],x[1]) if x[1] in state_codes.keys() else None, axis=1)

    out_dict = Counter([x for x in list(contributions['Encoded_County']) if x != None and ' - nan - ' not in x])
    return out_dict


In [156]:
with open('data/state_id_codes.json', 'r') as f:
    state_codes = json.load(f)

def get_county_funding(contributions):
    contributions['County_State'] = contributions[["County", "contributor_state"]].apply(lambda x: '{} - {} - {}'.format(state_codes[x[1]], x[0], x[1]) if x[1] in state_codes.keys() else None, axis=1)    
    
    funds = contributions.groupby("County_State").sum()[['contribution_receipt_amount']]
    funds = funds.reset_index()
    funds['County'] = funds[['County_State']].apply(lambda x: x[0].split(' - ')[1], axis=1)
    funds['State'] = funds[['County_State']].apply(lambda x: x[0].split(' - ')[2], axis=1)
    funds = funds.set_index('County_State')
    return funds[['County', 'State', 'contribution_receipt_amount']]


{'Alameda - CA': 3200.0,
 'Alexandria (city) - VA': 1000.0,
 'Anne Arundel - MD': 500.0,
 'Arlington - VA': 2000.0,
 'Bexar - TX': 81300.0,
 'Dallas - TX': 1000.0,
 'Denver - CO': 300.0,
 'District of Columbia - DC': 6950.0,
 'Harris - TX': 42875.0,
 'Hidalgo - TX': 2700.0,
 'Kings - NY': 500.0,
 'Marin - CA': 250.0,
 'Montgomery - MD': 1000.0,
 'Montgomery - TX': 2700.0,
 'New York - NY': 1000.0,
 'None - TX': 5400.0,
 'Orange - CA': 2700.0,
 'Orleans - LA': 2700.0,
 'Pulaski - AR': 250.0,
 'San Francisco - CA': 1000.0,
 'San Mateo - CA': 6900.0,
 'Santa Clara - CA': 250.0,
 'Travis - TX': 25550.0}

In [139]:

out_dict = {}
meta_data = {}
candidates = ['Trump', 'Yang', 'Delaney', 'Castro']

for candidate in candidates:
    meta_data[candidate] = {}

    contributions = load_and_process_candidate_data(candidate)
    contributions = encode_counties(contributions, candidate)
    
    county_counts = get_county_counts(contributions)
    county_fundraising = get_county_funding(contributions)
    
    top_counties = county_fundraising.sort_values(['contribution_receipt_amount'], ascending=False)
    top_states = county_fundraising.groupby(['State']).sum().sort_values(['contribution_receipt_amount'], ascending=False).to_dict()
    
    meta_data[candidate]['Counties'] = {' - '.join(k.split(' - ')[1:]): v for k,v in (a.sort_values(['contribution_receipt_amount'], ascending=False).to_dict()['contribution_receipt_amount']).items()}
    meta_data[candidate]['States'] = top_states['contribution_receipt_amount']
    
    for k, v in county_counts.items():
        if k in out_dict.keys():
            out_dict[k]['Count'][candidate] = v
            out_dict[k]['Fundraising'][candidate] = county_fundraising.loc[k, 'contribution_receipt_amount']
        else:
            out_dict[k] = {'Count': {candidate: v}, 'Fundraising': {candidate: county_fundraising.loc[k, 'contribution_receipt_amount']}, 'State_ID': k.split(' - ')[0], 'County': k.split(' - ')[1], 'State': k.split(' - ')[2]}

for k, v in out_dict.items():
    for candidate in candidates:
        if candidate not in v['Count'].keys():
            v['Count'][candidate] = 0
        if candidate not in v['Fundraising'].keys():
            v['Fundraising'][candidate] = 0.0

with open('processed_data/new_county_counts.json', 'w') as f:
    json.dump(out_dict, f)

with oepn('processed_data/candidate_meta_data.json', 'w') as f:
    json.dump(meta_data, f)

out_dict

  if self.run_code(code, result):


92741
2364
795
113


{'48 - Williamson - TX': {'Count': {'Castro': 0,
   'Delaney': 0,
   'Trump': 767,
   'Yang': 1},
  'County': 'Williamson',
  'Fundraising': {'Castro': 0.0,
   'Delaney': 0.0,
   'Trump': 46561.790000000015,
   'Yang': 1.0},
  'State': 'TX',
  'State_ID': '48'},
 '13 - DeKalb - GA': {'Count': {'Castro': 0,
   'Delaney': 3,
   'Trump': 422,
   'Yang': 5},
  'County': 'DeKalb',
  'Fundraising': {'Castro': 0.0,
   'Delaney': 1500.0,
   'Trump': 17756.03,
   'Yang': 90.0},
  'State': 'GA',
  'State_ID': '13'},
 '27 - St. Louis - MN': {'Count': {'Castro': 0,
   'Delaney': 0,
   'Trump': 126,
   'Yang': 1},
  'County': 'St. Louis',
  'Fundraising': {'Castro': 0.0,
   'Delaney': 0.0,
   'Trump': 7816.81,
   'Yang': 20.0},
  'State': 'MN',
  'State_ID': '27'},
 '47 - Davidson - TN': {'Count': {'Castro': 0,
   'Delaney': 0,
   'Trump': 1078,
   'Yang': 8},
  'County': 'Davidson',
  'Fundraising': {'Castro': 0.0,
   'Delaney': 0.0,
   'Trump': 79248.52999999998,
   'Yang': 53.0},
  'State': 'TN'

In [140]:
with open('data/counties.geojson', 'r', encoding="ISO-8859-1") as f:
    county_geojson = json.load(f)

    
for i, geo_county in enumerate(county_geojson['features']):
    geo_county['properties']['COUNT'] = {candidate: 0 for candidate in candidates}
    geo_county['properties']['FUNDRAISING'] = {candidate: 0.0 for candidate in candidates}
    
    for county in out_dict.values():
        if county['County'] == geo_county['properties']['NAME'] and county['State_ID'] == geo_county['properties']['STATE']:
            geo_county['properties']['COUNT'] = county['Count']
            geo_county['properties']['FUNDRAISING'] = {k: round(v, 0) for k, v in county['Fundraising'].items()}
            
    
    county_geojson['features'][i] = geo_county


In [141]:
with open('processed_data/counties.geojson', 'w') as f:
    json.dump(county_geojson, f)

In [142]:
funds = 0
people = 0

for county in county_geojson['features']:
    for candidate in county['properties']['COUNT'].keys():
        people += county['properties']['COUNT'][candidate]
        funds += county['properties']['FUNDRAISING'][candidate]

print(funds, people, 1.0*funds/people)

24811682.0 312349 79.4357657620162


### Top Zipcodes

In [None]:
# by_zip = contributions.groupby(['formatted_zip'])['contribution_receipt_amount'].sum().reset_index()
# by_zip.sort_values(['contribution_receipt_amount'])
# by_zip[['formatted_zip', 'contribution_receipt_amount']].head()