# Donations to L.A. County 2nd District supervisorial candidates

By [Matt Stiles](https://www.latimes.com/people/matt-stiles) / Los Angeles Times

Questions? matt.stiles@latimes.com

### Load Python libraries

In [1]:
import pandas as pd
import geopandas as gpd
from urllib.request import urlopen 
import pyarrow
import jenkspy
import matplotlib.pyplot as plt
%matplotlib inline
import json
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
alt.renderers.set_embed_options(actions=False)

RendererRegistry.enable('default')

### Read all donations candidates since 2007, downloaded [here](https://efs.lacounty.gov/public_search_results.cfm?viewtype=xl&requesttimeout=1000&showall=yes&rept_type=ALLCon&CITY=la&LNM_CRIT=&FNM_CRIT=&CNM_CRIT=&ST_CRIT=&ZIP_CRIT=&ENM_CRIT=&OCC_CRIT=&S_USER=&S_OFFICE=&CID_CRIT=&CMT_CRIT=&D_BDATE=&D_EDATE=&S_BAMT=&S_EAMT=&ELECTION_ID=29&SCHEDULE=A%2CB%2CC&SUBMITBTN=Search%20Now%20%3D%3D%3E&TO_ELEC_DATE=&FROM_RPT_DATE=&TO_RPT_DATE=), and clean headers

In [2]:
src = pd.read_csv('input/donations_20201026.csv',\
                  dtype={'contributor_zip_code': 'str'}, low_memory=False)

In [3]:
src.columns = src.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

### How many records we talking?

In [4]:
len(src)

90117

### Create a clean dataframe from the original import

In [5]:
contributions = pd.DataFrame(src)

### Clean up null values the table and other quirks

In [6]:
contributions.contributor_address_2 = contributions.contributor_address_2.fillna('')
contributions.occupation = contributions.occupation.fillna('')
contributions.employer = contributions.employer.fillna('')
contributions.description = contributions.description.fillna('')
contributions.contributor_first_name = contributions.contributor_first_name.fillna('')
contributions.contributor_zip_code_ext = contributions.contributor_zip_code_ext.fillna('')

In [7]:
contributions['amount_rcvd'] = contributions['amount_rcvd'].astype(int)
contributions['monthyear'] = pd.to_datetime(contributions['date']).map(lambda dt: dt.replace(day=1))
contributions['type'] = contributions['type'].str.replace("Small Contributor Committee", "Small Donor Committee")
contributions['first_name'] = contributions['first_name'].str.upper()
contributions['last_name'] = contributions['last_name'].str.upper()
contributions['committee_name'] = contributions['committee_name'].str.upper()
contributions['contributor_zip_code'] = contributions['contributor_zip_code'].astype(str).str.zfill(5)
contributions['contributor_zip_code'] = contributions['contributor_zip_code'].str.replace('.0','',regex=False)

In [8]:
contributions['candidate_name'] =  contributions["first_name"].str.title() + ' ' + contributions["last_name"].str.title()
contributions['candidate_name'] = contributions['candidate_name'].str.replace('Holly J. Mitchell', 'Holly Mitchell')
contributions['first_name'] = contributions['first_name'].str.replace('HOLLY J.', 'HOLLY')

### Change the zip code field so it merges with other tables later

In [9]:
contributions.rename(columns={"contributor_zip_code": "zipcode"}, inplace=True)

### What do the records look like?

In [10]:
contributions.sort_values(by='date', ascending=False).head()

Unnamed: 0,last_name,first_name,committee_id,committee_name,office_type,district_number,schedule,type,period_beg_date,period_end_date,...,int_city,int_state,int_zip_code,int_occupation,int_employer,doc_id,rec_id,x,monthyear,candidate_name
0,COOLEY,STEVE,1235308,D.A. STEVE COOLEY OFFICEHOLDER ACCOUNT,DAT,,A,O,00:00.0,00:00.0,...,,,,,,,31.0,2523.0,2020-11-01,Steve Cooley
67586,HAHN,JANICE,1376011,JANICE HAHN FOR SUPERVISOR 2016,BSU,C04,A,I,00:00.0,00:00.0,...,,,,,,,2327.0,156075.0,2020-11-01,Janice Hahn
60082,ENGLANDER,MITCHELL,1377028,MITCHELL ENGLANDER FOR SUPERVISOR 2016,CSU,C05,A,I,00:00.0,00:00.0,...,,,,,,,2212.0,147635.0,2020-11-01,Mitchell Englander
60081,ENGLANDER,MITCHELL,1377028,MITCHELL ENGLANDER FOR SUPERVISOR 2016,CSU,C05,A,I,00:00.0,00:00.0,...,,,,,,,2212.0,147631.0,2020-11-01,Mitchell Englander
60080,CARR,ELAN,1375984,CARR FOR SUPERVISOR 2016,CSU,,A,I,00:00.0,00:00.0,...,,,,,,,2189.0,146063.0,2020-11-01,Elan Carr


### Sluggify candidate names

In [11]:
contributions['candidate_slug'] =  \
    contributions["first_name"].str.lower() + '_' + contributions["last_name"].str.lower()
contributions['candidate_slug']\
= contributions['candidate_slug'].\
str.strip().str.lower().str.replace(\
                    ' ', '_').str.replace('(', '').str.replace(')', '')\
.str.replace('.', '').str.replace('\'', '')

### Process the dates now to make life easier througout

In [12]:
contributions['date'] = pd.to_datetime(contributions['date'])
contributions['year'], contributions['month'] = contributions['date'].dt.year, contributions['date'].dt.month
contributions['year'] = contributions['year'].astype(str)
contributions['month'] = contributions['month'].astype(str)
contributions['months'] = contributions['date'].dt.strftime('%m/%Y').astype(str)
contributions['week'] = contributions['date'].dt.isocalendar().week
contributions['week'] = contributions['week'].astype(str)

### Define the candidates

In [13]:
contributions.loc[contributions['candidate_name'] == 'Jaqueline Lacey', 'candidate_name'] = 'Jackie Lacey'

In [14]:
contributions.loc[contributions['candidate_name'] == 'Jacqueline Lacey', 'candidate_name'] = 'Jackie Lacey'

In [15]:
candidates = (['Jaqueline Lacey', 'Jacqueline Lacey', 'Herb Wesson', 'Holly Mitchell', 'George Gascon', 'Jackie Lacey'])

### Filter data for recent contributions

In [16]:
recent_contributions = contributions[(contributions['date'] > '2018-06-30')]

### Make a dataframe with our donations

In [17]:
our_candidates = pd.DataFrame(recent_contributions[recent_contributions['candidate_name'].isin(candidates)])

### How many donations are we talking about? 

In [18]:
len(our_candidates)

9922

In [19]:
our_candidates.candidate_name.value_counts()

Holly Mitchell    3402
George Gascon     2852
Herb Wesson       2586
Jackie Lacey      1082
Name: candidate_name, dtype: int64

### How much did the candidates raise individually? 

In [20]:
our_totals = our_candidates.groupby(['candidate_name'])['amount_rcvd']\
    .sum().reset_index().sort_values(by='amount_rcvd', ascending=False)

In [21]:
our_totals

Unnamed: 0,candidate_name,amount_rcvd
1,Herb Wesson,2563607
2,Holly Mitchell,1635217
0,George Gascon,1164325
3,Jackie Lacey,689971


In [22]:
sum_all_donations = our_totals['amount_rcvd'].sum()

### And how much has been raised by the candidates in total?

In [23]:
our_totals['amount_rcvd'].sum()

6053120

In [24]:
avg_totals = our_candidates.groupby(['candidate_name']).agg({'amount_rcvd':'mean'}).reset_index().sort_values(by='amount_rcvd', ascending=False)

### What is each candidate's average donation size? 

In [25]:
avg_totals.round(2)

Unnamed: 0,candidate_name,amount_rcvd
1,Herb Wesson,991.34
3,Jackie Lacey,637.68
2,Holly Mitchell,480.66
0,George Gascon,408.25


In [26]:
our_candidates.to_csv('output/our_candidates.csv')

### How many individual donations did each candidate receive? 

In [27]:
# Crudely remap donation 'type' codes to be more descriptive
abbr_indiv_else = {'C' : 'Other', 'I' : 'Individual', 'O' : 'Other', 'S' : 'Other'}
our_candidates = our_candidates.replace({"type": abbr_indiv_else})

In [28]:
our_candidates_indiv = pd.pivot_table(our_candidates, values='amount_rcvd', \
                    index=['candidate_name'], columns=['type'], aggfunc=[np.size], fill_value='0').reset_index()

In [29]:
our_candidates_indiv = pd.DataFrame(our_candidates_indiv.to_records())

In [30]:
our_candidates_indiv.columns = our_candidates_indiv.columns.str.strip().str.lower().str.replace(',_', '')\
                    .str.replace('(', '').str.replace(')', '').str.replace("'", '')\
                    .str.replace('size, ','').str.replace(',', '')

In [31]:
our_candidates_indiv = pd.DataFrame(our_candidates_indiv)

### Which share of each candidates donations came from individuals?

In [32]:
our_candidates_indiv.head()

Unnamed: 0,index,candidate_name,individual,other
0,0,George Gascon,2785,67
1,1,Herb Wesson,1821,765
2,2,Holly Mitchell,2870,532
3,3,Jackie Lacey,880,201


In [33]:
our_candidates_indiv['share_indy'] = \
    ((our_candidates_indiv['individual'] /\
      (our_candidates_indiv['individual'] + our_candidates_indiv['other']))*100).round()

In [34]:
our_candidates_indiv['share_other'] = \
    ((our_candidates_indiv['other'] /\
      (our_candidates_indiv['individual'] + our_candidates_indiv['other']))*100).round()

In [35]:
our_candidates_indiv.head()

Unnamed: 0,index,candidate_name,individual,other,share_indy,share_other
0,0,George Gascon,2785,67,98.0,2.0
1,1,Herb Wesson,1821,765,70.0,30.0
2,2,Holly Mitchell,2870,532,84.0,16.0
3,3,Jackie Lacey,880,201,81.0,19.0


### Normalized bar chart for the share of individual donations

In [36]:
ind_other_totals = our_candidates.groupby(['type', 'candidate_name']).agg({'amount_rcvd':'sum'}).reset_index()

In [37]:
ind_other_totals

Unnamed: 0,type,candidate_name,amount_rcvd
0,Individual,George Gascon,1106175
1,Individual,Herb Wesson,1693879
2,Individual,Holly Mitchell,1071413
3,Individual,Jackie Lacey,525920
4,Other,George Gascon,58150
5,Other,Herb Wesson,869728
6,Other,Holly Mitchell,563804
7,Other,Jackie Lacey,144051


In [38]:
bars = alt.Chart(ind_other_totals, title='Share of donations from individuals').mark_bar().encode(
    x=alt.X('sum(amount_rcvd)', stack="normalize", title="Share of donations",axis=alt.Axis(format='%', tickCount=6)),
    y=alt.Y('candidate_name', title=""),
    color=alt.Color('type',legend=alt.Legend(title="", orient='top')),
    order=alt.Order('sum(amount_rcvd)', sort='ascending')
)
(bars).properties(height=175, width=800)

## When exactly is each donation? 

### How much did each candidate raise — by month — in 2019 and 2020? 

In [39]:
our_candidates_time = our_candidates.groupby(['monthyear','year','month', 'candidate_name'], as_index=False).\
            agg({'amount_rcvd': ['size', 'mean', 'sum']}, as_index=False)

In [40]:
flat_columns = [''.join(t) for t in our_candidates_time.columns]
our_candidates_time.columns = flat_columns

In [41]:
our_candidates_time['amount_rcvdmean'] = our_candidates_time['amount_rcvdmean'].astype(int)
our_candidates_time['amount_rcvdmean'] = our_candidates_time['amount_rcvdmean'].round()
our_candidates_time.head(10)

Unnamed: 0,monthyear,year,month,candidate_name,amount_rcvdsize,amount_rcvdmean,amount_rcvdsum
0,2020-11-01,2020.0,11.0,George Gascon,2852,408,1164325
1,2020-11-01,2020.0,11.0,Herb Wesson,2586,991,2563607
2,2020-11-01,2020.0,11.0,Holly Mitchell,3402,480,1635217
3,2020-11-01,2020.0,11.0,Jackie Lacey,1082,637,689971


In [42]:
our_candidates_time['months'] = our_candidates_time['monthyear'].dt.strftime('%b').astype(str)
our_candidates_time['week'] = our_candidates_time['monthyear'].dt.isocalendar().week
our_candidates_time['year'] = our_candidates_time['monthyear'].dt.strftime('%Y').astype(str)

In [43]:
our_candidates_time

Unnamed: 0,monthyear,year,month,candidate_name,amount_rcvdsize,amount_rcvdmean,amount_rcvdsum,months,week
0,2020-11-01,2020,11.0,George Gascon,2852,408,1164325,Nov,44
1,2020-11-01,2020,11.0,Herb Wesson,2586,991,2563607,Nov,44
2,2020-11-01,2020,11.0,Holly Mitchell,3402,480,1635217,Nov,44
3,2020-11-01,2020,11.0,Jackie Lacey,1082,637,689971,Nov,44


In [44]:
our_candidates.tail()

Unnamed: 0,last_name,first_name,committee_id,committee_name,office_type,district_number,schedule,type,period_beg_date,period_end_date,...,doc_id,rec_id,x,monthyear,candidate_name,candidate_slug,year,month,months,week
90005,WESSON,HERB,1414475,WESSON FOR SUPERVISOR 2020,CSU,C02,C,Other,00:00.0,00:00.0,...,,2752.0,1502.0,2020-11-01,Herb Wesson,herb_wesson,2020.0,11.0,11/2020,45
90012,WESSON,HERB,1414475,WESSON FOR SUPERVISOR 2020,CSU,C02,C,Individual,00:00.0,00:00.0,...,,2581.0,1419.0,2020-11-01,Herb Wesson,herb_wesson,2020.0,11.0,11/2020,45
90018,LACEY,JACKIE,1334856,COMMITTEE TO ELECT JACKIE LACEY FOR DA 2012,DAT,,C,Other,00:00.0,00:00.0,...,,1559.0,669.0,2020-11-01,Jackie Lacey,jackie_lacey,2020.0,11.0,11/2020,45
90089,LACEY,JACKIE,1334856,COMMITTEE TO ELECT JACKIE LACEY FOR DA 2012,DAT,,C,Individual,00:00.0,00:00.0,...,,1559.0,668.0,2020-11-01,Jackie Lacey,jackie_lacey,2020.0,11.0,11/2020,45
90103,MITCHELL,HOLLY,1415889,HOLLY J. MITCHELL FOR COUNTY SUPERVISOR 2020,CSU,C02,C,Individual,00:00.0,00:00.0,...,,2710.0,1490.0,2020-11-01,Holly Mitchell,holly_mitchell,2020.0,11.0,11/2020,45


In [45]:
alt.Chart(districttwo_time).mark_bar().encode(
    y='amount_rcvdsum:Q',
    x=alt.X('month:N', timeUnit='month', title='')
).properties(
    width=250,
    height=180
).facet(
    facet='candidate_name:N',
    columns=3
)

NameError: name 'districttwo_time' is not defined

In [None]:
districttwo_time.to_csv('output/districttwo_time.csv')

### How much did they raise by ZIP? 

In [None]:
zip_totals = districttwo.groupby(['zipcode'])['amount_rcvd']\
    .sum().reset_index().sort_values(by='amount_rcvd', ascending=False)

In [None]:
zip_totals.head()

In [None]:
zip_totals_candidate = districttwo[districttwo['candidate_name'] == 'Albert Robles'].groupby(['zipcode'])['amount_rcvd']\
    .sum().reset_index().sort_values(by='amount_rcvd', ascending=False)
zip_totals_candidate.head()

In [None]:
zip_totals.to_csv('output/zip_totals.csv')

In [None]:
zip_totals_candidates = districttwo.groupby(['candidate_name', 'zipcode'])['amount_rcvd']\
    .sum().reset_index().sort_values(by='amount_rcvd', ascending=False)

---

## Geography

### Load 2nd District boundary data

In [None]:
#http://public.gis.lacounty.gov/public/rest/services/LACounty_Dynamic/Demographics/MapServer/3
district = gpd.read_file('/Users/mhustiles/data/data/GIS/LA/second-district.geojson')

### Read the zip codes boundaries

In [None]:
#http://public.gis.lacounty.gov/public/rest/services/LACounty_Dynamic/Administrative_Boundaries/MapServer/5
# zips_old = gpd.read_file('/Users/mhustiles/data/github/notebooks/\
# campaign-finance/input/la_county_gis_zip_codes/la_county_gis_zip_codes_1569271283260.geojson')
zips = gpd.read_file('/Users/mhustiles/data/github/AGStoShapefile/\
backupdir/WealthiestZipCodes2017/WealthiestZipCodesCA.geojson')

In [None]:
zips.plot()

### We only need L.A. County

In [None]:
zips = gpd.GeoDataFrame(zips[zips['COUNTY_NAME'].str.contains('Los Angeles County')])

In [None]:
zips.plot()

In [None]:
zips.columns = zips.columns.str.strip().str.lower().str.replace(' ', '_')\
    .str.replace('(', '').str.replace(')', '')

In [None]:
zips.rename(columns={"id": "zipcode"}, inplace=True)
zips = zips.set_crs("EPSG:4326", allow_override=True)
zips = zips.set_crs(epsg=4326)

### We don't need all the income demographics for this story

In [None]:
zips.drop(['objectid',
           'st_abbrev',
          'hai_cy',
          'incmort_cy',
          'wlthrnk_cy',
          'domstate',
           'domcbsa',
           'cbsa_name',
           'shape__area',
           'shape__length',
           'avgnw_cy',
          'county_name'], axis=1, inplace=True)

In [None]:
zips.loc[0]

In [None]:
zips_in_district = gpd.sjoin(zips, district, how="inner", op='intersects')

### Export 2nd District boundary data to MBTiles

In [None]:
district.plot()

### Only zips that overlap with the district

In [None]:
zips_in_district.plot()

In [None]:
zips_in_district.rename(columns={"id": "zipcode"}, inplace=True)

In [None]:
zips_in_district_geo = zips_in_district.merge(zip_totals, on='zipcode')

In [None]:
zips_in_district_geo['amount_rcvd'] = zips_in_district_geo['amount_rcvd'].astype(int).fillna('0')

In [None]:
zips_in_district_geo.sort_values(by='amount_rcvd', ascending=False).head(1)

In [None]:
zips_in_district_geo.dtypes

In [None]:
zips_in_district_export = gpd.GeoDataFrame(zips_in_district_geo)

### Export file showing all donations by distrit zips

In [None]:
zips_in_district_export.to_file('output/zips_in_district_export.shp')

---

### How much did each of the candidates raise, by zip?

In [None]:
# pivot to a wide table for mapping

sum_by_zip = pd.pivot_table(districttwo, values='amount_rcvd', index=['zipcode'], 
                            columns=['candidate_slug'], aggfunc=np.sum, fill_value=0).reset_index()

In [None]:
sum_by_zip["total"] = sum_by_zip.sum(axis=1)

In [None]:
# flatten the multiindex frame

sum_by_zip.columns = sum_by_zip.columns.to_series().str.join('')

### How much did all of the candidates raise by zip?

In [None]:
sum_by_zip = sum_by_zip.sort_values("total", ascending=False)

In [None]:
sum_by_zip.head()

### Top ZIP Codes

In [None]:
sum_by_zip[['zipcode', 'total']].sort_values(by='total', ascending=False).head()

### How many zips donated? 

In [None]:
sum_by_zip.total.count()

### Which 2nd District candidate received the most in each zip? 

In [None]:
#create a dummy field because i'm bad at idxmax; 
sum_by_zip['tie'] = '0'

In [None]:
#reshape dataframe
sum_by_zip_winner = pd.DataFrame(sum_by_zip[['zipcode', 'tie','herb_wesson','holly_mitchell','total']])

In [None]:
sum_by_zip_winner['tie'] = sum_by_zip_winner['tie'].astype(int)
sum_by_zip_winner['zipcode'] = sum_by_zip_winner['zipcode'].astype(str)

### Who won each zip?

In [None]:
sum_by_zip_winner['winner_dist_two'] = sum_by_zip_winner[['tie','herb_wesson','holly_mitchell']].idxmax(axis=1)

### How much did each zip winner raise from there? 

In [None]:
sum_by_zip_winner['winner_total'] = sum_by_zip_winner[['tie','herb_wesson','holly_mitchell']].max(axis=1)

In [None]:
sum_by_zip_winner['winner_dist_two'] = sum_by_zip_winner['winner_dist_two'].str.title().str.replace('_',' ')

In [None]:
sum_by_zip_winner['winner_dist_two'].value_counts('normalize')

### Which were the top zips for each candidate? 

In [None]:
sum_by_zip_winner.sort_values(by='winner_total', ascending=False).head()

In [None]:
sum_by_zip_winner.to_csv('output/sum_by_hood_winner.csv')

### Remove rows with no contributions

In [None]:
sum_by_zip_winner = sum_by_zip_winner[(sum_by_zip_winner['winner_total'] != 0)]

In [None]:
len(sum_by_zip_winner)

### How many zips did each candidate win?

In [None]:
sum_by_zip_winner.winner_dist_two.value_counts()

---

### How much did the candidates raise in the 2nd District ZIPs alone? 

In [None]:
donations_in_district_zips = zips_in_district.merge(zip_totals_candidates,on='zipcode')

### Share raised inside the district raised by all the candidates? 

In [None]:
((donations_in_district_zips.amount_rcvd.sum() /second_district_totals['amount_rcvd'].sum()) * 100).round(3)

### Share raised inside the district raised by each candidate? 

In [None]:
in_district_totals = donations_in_district_zips.groupby(['candidate_name']).agg('sum').reset_index()

In [None]:
in_district_totals[['candidate_name','amount_rcvd']]

In [None]:
in_district_share = in_district_totals.merge(second_district_sum, on='candidate_name')
in_district_share_slim = pd.DataFrame(in_district_share[['candidate_name','amount_rcvd_y', 'amount_rcvd_x']])

In [None]:
in_district_share.head(10)

### What share did candidates raise from outside the district?

In [None]:
in_district_share_slim.rename(columns={"amount_rcvd_y": "total_raised",\
                                      "amount_rcvd_x": "inside_raised"}, inplace=True)

In [None]:
in_district_share_slim['share_inside'] = ((in_district_share_slim['inside_raised'] /\
                                   in_district_share_slim['total_raised'] ) *100).round()

In [None]:
in_district_share_slim['share_outside'] = 100 - in_district_share_slim['share_inside']

In [None]:
in_district_share_slim.head(6)

In [None]:
in_district_share_slim[['candidate_name','share_inside','share_outside']]\
    .to_csv('output/in_district_share_slim.csv')

### Merge the geography back onto our 2nd District winners table

In [None]:
totals_by_LA_zips = zips.merge(sum_by_zip_winner, on='zipcode')

In [None]:
totals_by_LA_zips_slim = totals_by_LA_zips[['zipcode','geometry','herb_wesson','holly_mitchell', 'total','winner_dist_two','winner_total']]

### Export geojson for web app and Turf

In [None]:
totals_by_LA_zips_slim.to_file('output/second-district-zips-update.geojson', driver='GeoJSON')
totals_by_LA_zips_slim.to_file('/Users/mhustiles/data/github/bigbuilder/bigbuilder/pages/county-supervisor-2nd-district-campaign-donations/static/second-district-zips-update.geojson', driver='GeoJSON')

### Generate winners by zips to MBTiles for Mapbox

In [None]:
!tippecanoe --generate-ids --force -r1 -pk -pf -o \
output/second-district-zips-update.mbtiles \
output/second-district-zips-update.geojson

---

### Wesson donations

In [None]:
wesson = districttwo[districttwo['candidate_name'] == 'Herb Wesson']

In [None]:
wessonoutside = wesson[wesson['type'] == 'Other']

In [None]:
wessonlarge = wesson[wesson['amount_rcvd'] == 1500]
len(wessonlarge)

### Mitchell donations

In [None]:
mitchell = districttwo[districttwo['candidate_name'] == 'Holly Mitchell']

In [None]:
mitchelloutside = mitchell[mitchell['type'] == 'Other']

In [None]:
mitchelllarge = mitchell[mitchell['amount_rcvd'] == 1500]
len(mitchelllarge)