# Donations to L.A. County candidates

By [Matt Stiles](https://www.latimes.com/people/matt-stiles) / Los Angeles Times

Questions? matt.stiles@latimes.com

### Load Python libraries

In [462]:
import pandas as pd
import geopandas as gpd
from urllib.request import urlopen 
import pyarrow
import jenkspy
import matplotlib.pyplot as plt
%matplotlib inline
import json
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
alt.renderers.set_embed_options(actions=False)

RendererRegistry.enable('default')

### Read all donations candidates since 2007, downloaded [here](https://efs.lacounty.gov/public_search_results.cfm?viewtype=xl&requesttimeout=1000&showall=yes&rept_type=ALLCon&CITY=la&LNM_CRIT=&FNM_CRIT=&CNM_CRIT=&ST_CRIT=&ZIP_CRIT=&ENM_CRIT=&OCC_CRIT=&S_USER=&S_OFFICE=&CID_CRIT=&CMT_CRIT=&D_BDATE=&D_EDATE=&S_BAMT=&S_EAMT=&ELECTION_ID=29&SCHEDULE=A%2CB%2CC&SUBMITBTN=Search%20Now%20%3D%3D%3E&TO_ELEC_DATE=&FROM_RPT_DATE=&TO_RPT_DATE=), and clean headers

In [463]:
src = pd.read_csv('input/donations_20201215.csv',\
                  dtype={'contributor_zip_code': 'str'}, low_memory=False)

In [464]:
src.columns = src.columns.str.strip().str.lower().str.replace(' ', '_')\
                    .str.replace('(', '').str.replace(')', '').str.replace('-','_')

### How many records we talking?

In [465]:
len(src)

90126

### Create a clean dataframe from the original import

In [466]:
contributions = pd.DataFrame(src)

### Clean up null values the table and other quirks

In [467]:
contributions.contributor_address_2 = contributions.contributor_address_2.fillna('')
contributions.occupation = contributions.occupation.fillna('')
contributions.employer = contributions.employer.fillna('')
contributions.description = contributions.description.fillna('')
contributions.contributor_first_name = contributions.contributor_first_name.fillna('')
contributions.contributor_zip_code_ext = contributions.contributor_zip_code_ext.fillna('')

In [468]:
date_cols = ['period_beg_date', 'period_end_date', 'election_date', 'date']

In [469]:
for d in contributions[date_cols]:
    contributions[d] = contributions[d].str.replace('00:00.0', '', regex=False)

In [470]:
contributions = contributions[~contributions['date'].notnull()]

In [471]:
contributions['amount_rcvd'] = contributions['amount_rcvd'].astype(int)
contributions['monthyear'] = pd.to_datetime(contributions['date']).map(lambda dt: dt.replace(day=1))
contributions['type'] = contributions['type'].str.replace("Small Contributor Committee", "Small Donor Committee")
contributions['first_name'] = contributions['first_name'].str.upper()
contributions['last_name'] = contributions['last_name'].str.upper()
contributions['committee_name'] = contributions['committee_name'].str.upper()
contributions['contributor_zip_code'] = contributions['contributor_zip_code'].astype(str).str.zfill(5)
contributions['contributor_zip_code'] = contributions['contributor_zip_code'].str.replace('.0','',regex=False)

In [472]:
contributions['candidate_name'] =  contributions["first_name"].str.title() + ' ' + contributions["last_name"].str.title()
contributions['candidate_name'] = contributions['candidate_name'].str.replace('Holly J. Mitchell', 'Holly Mitchell')
contributions['first_name'] = contributions['first_name'].str.replace('HOLLY J.', 'HOLLY')

### Change the zip code field so it merges with other tables later

In [473]:
contributions.rename(columns={"contributor_zip_code": "zipcode"}, inplace=True)

### What do the records look like?

In [474]:
contributions.iloc[0]

last_name                                       NaN
first_name                                      NaN
committee_id                                    NaN
committee_name                                  NaN
office_type                                     NaN
district_number                                 NaN
schedule                                          A
type                                            NaN
period_beg_date                                 NaN
period_end_date                                 NaN
election_date                                   NaN
date                                            NaN
amount_rcvd                                   42310
amount_pd                                       NaN
description                 Unitemized - Schedule A
contributor_first_name                             
contributor_last_name                           NaN
contributor_address                             NaN
contributor_address_2                              
contributor_

### Sluggify candidate names

In [214]:
contributions['candidate_slug'] =  \
    contributions["first_name"].str.lower() + '_' + contributions["last_name"].str.lower()
contributions['candidate_slug']\
= contributions['candidate_slug'].\
str.strip().str.lower().str.replace(\
                    ' ', '_').str.replace('(', '').str.replace(')', '')\
.str.replace('.', '').str.replace('\'', '')

### Process the dates now to make life easier througout

In [215]:
contributions['date'] = pd.to_datetime(contributions['date'])
contributions['year'], contributions['month'] = contributions['date'].dt.year, contributions['date'].dt.month
contributions['year'] = contributions['year'].astype(str)
contributions['month'] = contributions['month'].astype(str)
contributions['months'] = contributions['date'].dt.strftime('%m/%Y').astype(str)
contributions['week'] = contributions['date'].dt.isocalendar().week
contributions['week'] = contributions['week'].astype(str)

### Filter data for recent contributions

In [219]:
recent_contributions = contributions[(contributions['date'] > '2019-01-01')]

In [218]:
recent_contributions.dtypes

last_name                           object
first_name                          object
committee_id                        object
committee_name                      object
office_type                         object
district_number                     object
schedule                            object
type                                object
period_beg_date                     object
period_end_date                     object
election_date                       object
date                        datetime64[ns]
amount_rcvd                          int64
amount_pd                          float64
description                         object
contributor_first_name              object
contributor_last_name               object
contributor_address                float64
contributor_address_2               object
contributor_city                    object
contributor_state                   object
zipcode                             object
contributor_zip_code_ext            object
occupation 