# Analyzing fire department payrolls in California: 2011-2018

This notebook analyzes government payroll [data](https://publicpay.ca.gov/Reports/RawExport.aspx) compiled and released annually by the California state controller's office. The data include anonymized salary information for all employees at cities, counties, special districts and state government.

---

### Load python tools

In [1]:
import pandas as pd
import geopandas as gpd
import cpi
from urllib.request import urlopen 
import pyarrow
import jenkspy
import matplotlib.pyplot as plt
%matplotlib inline
import json
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.float_format = '{:,.0f}'.format

### Read controller payroll data

In [2]:
# processed here: 00-california-payroll-processing
src = pd.read_feather('/Users/mhustiles/data/data/controller/output/payroll.feather')

In [3]:
src['year'] = src['year'].astype(str)

In [4]:
payroll = pd.DataFrame(src[(src['year'] != '2009') & (src['year'] != '2010')])

### How many records, years in the dataframe? 

In [5]:
len(payroll)

9756210

In [6]:
payroll.head(1)

Unnamed: 0,index,year,type,population,employer,department,position,overtime,wages,benefits,employercounty,basewages,adjusted_overtime,adjusted_wages,adjusted_benefits,adjusted_basewages
374773,374773,2017,STATE,0,"ADMINISTRATIVE LAW, OFFICE OF",NOT LISTED,ASSISTANT CHIEF COUNSEL,0,169110,46725,NAN,169110,0,176379,48733,176379


### Combine employer name, type (ie "Los Angeles" vs. "Los Angeles County"

In [7]:
payroll['employerfull'] = (payroll['employer'] + '_' + payroll['type']).str.replace(' ','_')

### Read in our processed table of fire titles, convert to a list

In [8]:
positions = pd.read_csv('input/positions.csv')

In [9]:
positions_list = positions[positions['category'] == 'firefighter']['position'].tolist()

### Tag each record it the employee's title matches our 'firefighter' list

In [10]:
# payroll['fire'] = \
#     (payroll['position'].isin(positions_list) & payroll['department'].str.contains('FIRE')) | \
#     (payroll['position'].isin(positions_list) & payroll['employer'].str.contains('FIRE'))

In [11]:
payroll_fire = pd.DataFrame(payroll[(payroll['position'].isin(positions_list) &\
                                     payroll['department'].str.contains('FIRE')) |\
                                    (payroll['position'].isin(positions_list) &\
                                     payroll['employer'].str.contains('FIRE')) |\
                                   (payroll['position'].isin(positions_list) &\
                                     payroll['position'].str.contains('FIRE'))])

In [12]:
len(payroll_fire)

347574

### Read just the 'firefighter' payroll records that included some amount of overtime into a dataframe

In [13]:
# payroll_fire = pd.DataFrame(payroll[(payroll['fire'] == True)])
# non_payroll_fire = pd.DataFrame(payroll[(payroll['fire'] == False)])

### How many fire agenies

In [14]:
entities = payroll_fire.groupby(['employer']).agg('size')
years = payroll_fire.groupby(['year']).agg('size')

In [15]:
years.head()

year
2011    37726
2012    37671
2013    37562
2014    37417
2015    38441
dtype: int64

In [16]:
len(entities)

586

### Export fire payroll dataframe to csv 

In [17]:
payroll_fire.to_csv('/Users/mhustiles/data/data/controller/output/payroll_fire.csv')

---

### Statewide aggregates

In [18]:
state = payroll_fire.groupby(['year']).agg({'adjusted_overtime':'sum', \
                                            'adjusted_overtime':'sum', \
                                            'adjusted_wages':'sum', \
                                            'adjusted_basewages':'sum', \
                                            'adjusted_benefits':'sum'}).reset_index()

In [19]:
state.head(8)

Unnamed: 0,year,adjusted_overtime,adjusted_wages,adjusted_basewages,adjusted_benefits
0,2011,838877369,4182727880,3343850511,1294307252
1,2012,951936641,4255059134,3303122494,1346184409
2,2013,1021348278,4264986368,3243638090,1403209497
3,2014,1094546068,4268715795,3174169727,1464576166
4,2015,1221411273,4619236939,3397825665,1577961644
5,2016,1269955539,4770777111,3500821572,1648770858
6,2017,1354261182,4905734068,3551472886,1542622657
7,2018,1388759908,4989322946,3600563038,1610651254


### How much has overtime increased statewide? 

In [20]:
'{:,.0f}%'.format((((state.iloc[7,1] - state.iloc[0,1]) / state.iloc[0,1])*100).round(2))

'66%'

In [21]:
'${:,.0f}'.format((state.iloc[7,1] - state.iloc[0,1]))

'$549,882,539'

In [22]:
state_bar = alt.Chart(state).mark_bar().encode(
    x=alt.X('year:N', axis=alt.Axis(format='', tickCount=5)),
    y=alt.Y("sum(adjusted_overtime):Q", title=" ", axis=alt.Axis(format='$,N', tickCount=6))
)

In [23]:
state_bar_text = state_bar.mark_text(
    align='center',
    baseline='top',
    dx=0,
    dy=-20 # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text=alt.Text('sum(adjusted_overtime):Q', format="$,.3s"))

In [24]:
(state_bar + state_bar_text)\
.properties(height=400, width=600, title='Statewide firefighter overtime spending, 2011-19')

---

### Group by employer/type and sum overtime expenses by year

In [25]:
payroll_fire_mean = payroll_fire.groupby(['year', 'type']).agg({'adjusted_overtime':'mean', 'adjusted_basewages':'mean'}).reset_index()

In [26]:
payroll_fire_mean.head(100)

Unnamed: 0,year,type,adjusted_overtime,adjusted_basewages
0,2011,CITY,23962,105657
1,2011,COUNTY,33913,89702
2,2011,SPECIAL DISTRICT,14664,65147
3,2011,STATE,12787,59215
4,2012,CITY,27840,104543
5,2012,COUNTY,33225,82974
6,2012,SPECIAL DISTRICT,15588,63790
7,2012,STATE,18575,64370
8,2013,CITY,30863,103929
9,2013,COUNTY,36545,82445


### Pivot on employer type and widen the dataframe across the years

In [27]:
payroll_fire_mean_pivot = pd.pivot_table(payroll_fire_mean, \
                            values='adjusted_overtime', index=['type'], columns=['year']).reset_index().fillna(0)

In [28]:
payroll_fire_mean_pivot.columns = payroll_fire_mean_pivot.columns.map(str)

In [29]:
payroll_fire_mean_pivot.head()

year,type,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,CITY,23962,27840,30863,33081,36494,36335,37891,38254,35373
1,COUNTY,33913,33225,36545,38581,42511,46739,48338,47438,45746
2,SPECIAL DISTRICT,14664,15588,15185,17292,18659,18994,22065,22663,22005
3,STATE,12787,18575,19100,21572,21863,20058,23871,27120,19357


### Trim the data set to early, middle and current years

In [30]:
fire_pivot = payroll_fire_mean_pivot[['type', '2011','2014','2019']].reset_index()

### Select only those places that had some over time

In [31]:
fire_pivot = fire_pivot[(fire_pivot['2011'] > 1) &\
                        (fire_pivot['2014'] > 1) &\
                        (fire_pivot['2019'] > 1)]

In [32]:
fire_pivot = fire_pivot[['type','2011','2014','2019']]

### What was the percentage increase in adjusted overtime, by place?

In [33]:
fire_pivot['increase_adjusted_pct'] = (((fire_pivot['2019'] - \
                                    fire_pivot['2011'])/fire_pivot['2011'])*100).astype(int)

In [34]:
fire_pivot.sort_values(by='increase_adjusted_pct', ascending=False).head(10)

year,type,2011,2014,2019,increase_adjusted_pct
3,STATE,12787,21572,19357,51
2,SPECIAL DISTRICT,14664,17292,22005,50
0,CITY,23962,33081,35373,47
1,COUNTY,33913,38581,45746,34


In [35]:
fire_pivot.to_csv('output/fire_pivot.csv')

### How much did overtime, in adjusted dollars, go up for each agency type?

In [36]:
employers_grouped = payroll_fire.groupby(['type','year']).agg({'overtime': 'mean'}).reset_index()

In [37]:
employers_grouped.dtypes

type         object
year         object
overtime    float64
dtype: object

In [38]:
chart = alt.Chart(employers_grouped).mark_bar().encode(
    x=alt.X('year:N', title='', axis=alt.Axis(format='', tickCount=6)),
    y=alt.Y('overtime:Q', title='', axis=alt.Axis(format='$,N', tickCount=6)),
    facet=alt.Facet('type:N', title='Average firefighter overtime, by CA fire agency type and year')
).properties(width=250,height=250)

In [39]:
chart.configure_header(
    titleColor='Black',
    titleFontSize=18,
    labelColor='Black',
    labelFontSize=11
)

### How much has each agency spent — in CPI-inflated dollars — on overtime in the past decade? 

In [40]:
agency_overtime = payroll_fire.groupby(['employerfull', 'type', 'employercounty','year'])\
                    .agg({'adjusted_overtime':'sum', \
                          'adjusted_wages':'sum',\
                          'adjusted_basewages':'sum',\
                          'overtime':'sum', \
                          'wages':'sum'}).reset_index()

In [41]:
agency_overtime.head()

Unnamed: 0,employerfull,type,employercounty,year,adjusted_overtime,adjusted_wages,adjusted_basewages,overtime,wages
0,ALAMEDA_CITY,CITY,ALAMEDA,2011,803203,14023072,13219868,706696,12338155
1,ALAMEDA_CITY,CITY,ALAMEDA,2012,557327,13023898,12466571,500510,11696174
2,ALAMEDA_CITY,CITY,ALAMEDA,2013,638283,13825347,13187065,581609,12597783
3,ALAMEDA_CITY,CITY,ALAMEDA,2014,1259160,14540816,13281656,1165970,13464660
4,ALAMEDA_CITY,CITY,ALAMEDA,2015,2342254,15850867,13508612,2171480,14695177


### What's the share of each agency's wages that is overtime?

In [42]:
agency_overtime['ot_share_wages'] = ((agency_overtime['overtime'] /\
                                      agency_overtime['wages'])*100).round(2)

In [43]:
agency_overtime = pd.DataFrame(agency_overtime\
#                                [agency_overtime['adjusted_overtime'] > 0]\
                              )

In [44]:
agency_overtime.to_csv('/Users/mhustiles/data/data/controller/output/agency_overtime.csv')

### What about agencies in our area? 

In [45]:
area_counties = ['LOS ANGELES', 'ORANGE', 'KERN', 'VENTURA', 'RIVERSIDE', 'SAN BERNARDINO']

In [46]:
la_agency_overtime = agency_overtime[(agency_overtime['employercounty'] == 'LOS ANGELES') &\
                                         (agency_overtime['type'] != 'SPECIAL DISTRICT')]

In [47]:
la_area_agency_overtime = agency_overtime[(agency_overtime['employercounty'].isin(area_counties)) &\
                                         (agency_overtime['type'] != 'SPECIAL DISTRICT')]

In [48]:
la_agency_overtime.head(5)

Unnamed: 0,employerfull,type,employercounty,year,adjusted_overtime,adjusted_wages,adjusted_basewages,overtime,wages,ot_share_wages
26,ALHAMBRA_CITY,CITY,LOS ANGELES,2011,2422368,8747377,6325009,2131313,7696352,28
27,ALHAMBRA_CITY,CITY,LOS ANGELES,2012,2620875,8626075,6005200,2353689,7746688,30
28,ALHAMBRA_CITY,CITY,LOS ANGELES,2013,2478488,8716864,6238376,2258421,7942886,28
29,ALHAMBRA_CITY,CITY,LOS ANGELES,2014,2548421,8531589,5983168,2359814,7900172,30
30,ALHAMBRA_CITY,CITY,LOS ANGELES,2015,2214889,8262582,6047693,2053401,7660156,27


In [49]:
agency_overtime[agency_overtime.employerfull.str.contains('')].\
    sort_values(by='overtime', ascending=False).head(10)

Unnamed: 0,employerfull,type,employercounty,year,adjusted_overtime,adjusted_wages,adjusted_basewages,overtime,wages,ot_share_wages
2223,LOS_ANGELES_CITY,CITY,LOS ANGELES,2019,207856755,628542751,420685996,207856755,628542751,33
2232,LOS_ANGELES_COUNTY,COUNTY,LOS ANGELES,2019,206244625,552503321,346258696,206244625,552503321,37
2231,LOS_ANGELES_COUNTY,COUNTY,LOS ANGELES,2018,203838869,547049838,343210969,200211091,537313837,37
2230,LOS_ANGELES_COUNTY,COUNTY,LOS ANGELES,2017,206097766,544509701,338411935,197603369,522067528,38
2221,LOS_ANGELES_CITY,CITY,LOS ANGELES,2017,204805434,612039575,407234140,196364301,586814132,33
2222,LOS_ANGELES_CITY,CITY,LOS ANGELES,2018,198411726,606685551,408273826,194880536,595888197,33
2229,LOS_ANGELES_COUNTY,COUNTY,LOS ANGELES,2016,200750586,529889247,329138660,188461673,497452166,38
2220,LOS_ANGELES_CITY,CITY,LOS ANGELES,2016,198223610,583796306,385572696,186089385,548059314,34
2219,LOS_ANGELES_CITY,CITY,LOS ANGELES,2015,199022322,582418739,383396417,184511567,539954479,34
506,CALFIRE_STATE,STATE,NAN,2018,185538996,686704325,501165329,182236906,674482853,27


In [50]:
agency_overtime[agency_overtime.type == 'CITY'].sort_values(by='adjusted_overtime', ascending=False).head(10)

Unnamed: 0,employerfull,type,employercounty,year,adjusted_overtime,adjusted_wages,adjusted_basewages,overtime,wages,ot_share_wages
2223,LOS_ANGELES_CITY,CITY,LOS ANGELES,2019,207856755,628542751,420685996,207856755,628542751,33
2221,LOS_ANGELES_CITY,CITY,LOS ANGELES,2017,204805434,612039575,407234140,196364301,586814132,33
2219,LOS_ANGELES_CITY,CITY,LOS ANGELES,2015,199022322,582418739,383396417,184511567,539954479,34
2222,LOS_ANGELES_CITY,CITY,LOS ANGELES,2018,198411726,606685551,408273826,194880536,595888197,33
2220,LOS_ANGELES_CITY,CITY,LOS ANGELES,2016,198223610,583796306,385572696,186089385,548059314,34
2218,LOS_ANGELES_CITY,CITY,LOS ANGELES,2014,174201837,514491846,340290009,161309278,476414656,34
2217,LOS_ANGELES_CITY,CITY,LOS ANGELES,2013,154272930,534463557,380190626,140574907,487008088,29
2216,LOS_ANGELES_CITY,CITY,LOS ANGELES,2012,125749783,511540078,385790295,112930198,459391030,25
2215,LOS_ANGELES_CITY,CITY,LOS ANGELES,2011,109199314,514016858,404817544,96078670,452256101,21
3655,SAN_FRANCISCO_CITY,CITY,SAN FRANCISCO,2013,48060755,256643251,208582496,43793400,233855681,19


In [51]:
agency_overtime.to_csv('output/agency_overtime_share_ot.csv')

In [52]:
novato = agency_overtime[agency_overtime['employerfull'].str.contains('NOVATO')]

In [53]:
novato.head(10)

Unnamed: 0,employerfull,type,employercounty,year,adjusted_overtime,adjusted_wages,adjusted_basewages,overtime,wages,ot_share_wages
2837,NOVATO_FIRE_PROTECTION_DISTRICT_SPECIAL_DISTRICT,SPECIAL DISTRICT,MARIN,2011,1213118,7245158,6032040,1067358,6374629,17
2838,NOVATO_FIRE_PROTECTION_DISTRICT_SPECIAL_DISTRICT,SPECIAL DISTRICT,MARIN,2012,1572424,6504211,4931787,1412123,5841138,24
2839,NOVATO_FIRE_PROTECTION_DISTRICT_SPECIAL_DISTRICT,SPECIAL DISTRICT,MARIN,2013,2268955,9772383,7503428,2067493,8904685,23
2840,NOVATO_FIRE_PROTECTION_DISTRICT_SPECIAL_DISTRICT,SPECIAL DISTRICT,MARIN,2014,2435562,10452658,8017096,2255308,9679064,23
2841,NOVATO_FIRE_PROTECTION_DISTRICT_SPECIAL_DISTRICT,SPECIAL DISTRICT,MARIN,2015,3071860,11025250,7953390,2847890,10221397,28
2842,NOVATO_FIRE_PROTECTION_DISTRICT_SPECIAL_DISTRICT,SPECIAL DISTRICT,MARIN,2016,3617766,11539930,7922164,3396305,10833515,31
2843,NOVATO_FIRE_PROTECTION_DISTRICT_SPECIAL_DISTRICT,SPECIAL DISTRICT,MARIN,2017,4014726,11914247,7899521,3849258,11423197,34
2844,NOVATO_FIRE_PROTECTION_DISTRICT_SPECIAL_DISTRICT,SPECIAL DISTRICT,MARIN,2018,3937495,11634550,7697056,3867418,11427487,34
2845,NOVATO_FIRE_PROTECTION_DISTRICT_SPECIAL_DISTRICT,SPECIAL DISTRICT,MARIN,2019,3306306,11378702,8072396,3306306,11378702,29


In [54]:
novato.iloc[7,4] 

3937494.707937253

In [55]:
'{:,.0f}%'.format((((novato.iloc[7,4] - novato.iloc[0,4]) / novato.iloc[0,4])*100).round(2))

'225%'

---

### Slice out specific large employer to CSVs to share for interviews

adjust this so it only gets the biggest departments, not just the biggest cities, counties

In [56]:
agencies_totals = agency_overtime.groupby('employerfull').agg({'adjusted_overtime':'sum'})\
    .sort_values(by='adjusted_overtime', ascending=False).reset_index().head(30)

In [57]:
agencies_totals

Unnamed: 0,employerfull,adjusted_overtime
0,LOS_ANGELES_COUNTY,1614796620
1,LOS_ANGELES_CITY,1571743711
2,CALFIRE_STATE,1194523149
3,OC_FIRE_AUTHORITY_SPECIAL_DISTRICT,403808457
4,SAN_FRANCISCO_CITY,360208960
5,SAN_DIEGO_CITY,323278734
6,KERN_COUNTY,193652296
7,VENTURA_COUNTY,179788937
8,SACRAMENTO_METRO_FIRE_SPECIAL_DISTRICT,177971688
9,OAKLAND_CITY,165893983


In [58]:
lg_agencies_list = list(agencies_totals.employerfull)

In [59]:
lg_agencies_list = ['LOS_ANGELES_COUNTY',
 'LOS_ANGELES_CITY',
 'CALFIRE_STATE',
 'OC_FIRE_AUTHORITY_SPECIAL_DISTRICT',
 'SAN_FRANCISCO_CITY',
 'SAN_DIEGO_CITY',
 'KERN_COUNTY',
 'VENTURA_COUNTY',
 'SACRAMENTO_METRO_FIRE_SPECIAL_DISTRICT',
 'LONG_BEACH_CITY',
 'OAKLAND_CITY',
 'SACRAMENTO_CITY',
 'CONTRA_COSTA_COUNTY',
 'SAN_JOSE_CITY',
 'ANAHEIM_CITY',
 'SANTA_BARBARA_COUNTY',
 'PASADENA_CITY',
 'GLENDALE_CITY',
 'SAN_RAMON_VALLEY_FIRE_PROTECTION_SPECIAL_DISTRICT',
 'RIVERSIDE_CITY',
 'ALAMEDA_COUNTY',
 'FRESNO_CITY',
 'ONTARIO_CITY',
 'SANTA_MONICA_CITY',
 'HUNTINGTON_BEACH_CITY',
 'CORONA_CITY',
 'SANTA_CLARA_COUNTY',
 'FREMONT_CITY',
 'CHULA_VISTA_CITY',
 'NEWPORT_BEACH_CITY',
'EL_SEGUNDO_CITY']

In [60]:
large_places = payroll_fire[(payroll_fire['population'] > 100000)]\
.groupby('employerfull').agg('size').reset_index(name='count')

In [61]:
large_places.head(5)

Unnamed: 0,employerfull,count
0,ALAMEDA_COUNTY,1479
1,ANAHEIM_CITY,1948
2,BAKERSFIELD_CITY,1647
3,BERKELEY_CITY,1187
4,BURBANK_CITY,1104


In [62]:
locations = []
for p in lg_agencies_list:
    locations.append(dict(location = p))

In [63]:
df = pd.DataFrame()

for l in locations:
    d = l['location']
    place = payroll_fire[payroll_fire['employerfull'] == d]
    place.to_csv(f'output/places/' + d.lower() + '_employee_list' + '.csv' , sep=',', index=False)
    place_slim = pd.DataFrame(place[['employer', 'type', 'overtime', 'wages', 'adjusted_overtime', 'year']])
    place_slim_grouped = place_slim.groupby(['employer', 'type', 'year'])\
                .agg({'overtime': 'sum', 'adjusted_overtime': 'sum', 'wages': 'sum'}).reset_index()
    place_slim_grouped.to_csv(f'output/places/' + d.lower() + '_summary' + '.csv' , sep=',', index=False)
    place_slim_grouped_pivot = pd.pivot_table(place_slim_grouped, \
                values='adjusted_overtime', index=['employer', 'type'], columns=['year']).reset_index().fillna(0)
    place_slim_grouped_pivot_slim = pd.DataFrame(place_slim_grouped_pivot[['employer','type','2011','2018']])
    place_slim_grouped_pivot_slim['adjusted_ot_pct_change'] = \
                ((place_slim_grouped_pivot_slim['2018']-place_slim_grouped_pivot_slim['2011'])/\
                 place_slim_grouped_pivot_slim['2011'])*100
    place_slim_grouped_pivot_slim.to_csv(f'output/places/' + d.lower() + '_pivot' + '.csv' , sep=',', index=False)

### Create a dataframe combining and pivoting listing all the large places

In [64]:
large_places_df = payroll_fire[(payroll_fire['population'] > 10000) \
                               & (payroll_fire['employer'] != 'FRESNO')].groupby(['employer', 'type', 'year'])\
                .agg({'adjusted_overtime': 'sum'}).reset_index()

In [65]:
large_places_df_pivot = pd.DataFrame(pd.pivot_table(large_places_df, \
                values='adjusted_overtime', index=['employer', 'type'], columns=['year']).reset_index().fillna(0))

In [66]:
large_places_df_pivot['adjusted_ot_pct_change'] = \
                ((large_places_df_pivot['2019']-large_places_df_pivot['2011'])/\
                 large_places_df_pivot['2011'])*100

In [67]:
places = large_places_df_pivot[(large_places_df_pivot['2011'] > 10000) & (large_places_df_pivot['2019'] > 10000)]

In [68]:
large_places_df_pivot.head()

year,employer,type,2011,2012,2013,2014,2015,2016,2017,2018,2019,adjusted_ot_pct_change
0,ALAMEDA,CITY,803203,557327,638283,1259160,2342254,1370063,1443485,2032825,2676033,233.0
1,ALAMEDA,COUNTY,0,0,0,0,14208990,16152871,3503457,17187372,16327468,inf
2,ALBANY,CITY,582145,784939,649931,615708,660770,0,809076,941092,913016,57.0
3,ALHAMBRA,CITY,2422368,2620875,2478488,2548421,2214889,3013562,3504914,3780845,2818008,16.0
4,AMADOR,COUNTY,0,0,0,0,0,0,510179,470262,401231,inf


In [69]:
large_places_df_pivot['adjusted_ot_pct_change'] = \
                ((large_places_df_pivot['2019']-large_places_df_pivot['2011'])/\
                 large_places_df_pivot['2014'])*100

In [70]:
places = large_places_df_pivot[(large_places_df_pivot['2014'] > 0) & (large_places_df_pivot['2019'] > 0)]

In [71]:
places.sort_values(by='2019', ascending=False).head()

year,employer,type,2011,2012,2013,2014,2015,2016,2017,2018,2019,adjusted_ot_pct_change
92,LOS ANGELES,CITY,109199314,125749783,154272930,174201837,199022322,198223610,204805434,198411726,207856755,57
93,LOS ANGELES,COUNTY,143138158,147633267,162505150,165835905,178752293,200750586,206097766,203838869,206244625,38
165,SAN DIEGO,CITY,22423000,34272611,34778859,34848758,33382613,35550931,40670330,44342226,43009407,59
167,SAN FRANCISCO,CITY,33152175,44611341,48060755,40919023,42820606,44492970,36227255,34856055,35068780,5
125,OAKLAND,CITY,187966,16571623,18161242,22341243,23179979,23263548,20376972,19384231,22427180,100


In [72]:
places.to_csv('output/places/large_places.csv')

---

### Aggregating agencies by county

In [73]:
counties = agency_overtime[agency_overtime['employercounty'] != 'NAN'].\
                           groupby(['employercounty', 'year']).agg({'adjusted_overtime':'sum'}).reset_index()

In [74]:
counties_pivot = pd.pivot_table(counties, \
                            values='adjusted_overtime',\
                            index=['employercounty'], \
                            columns=['year']).reset_index().fillna(0)

In [75]:
counties_pivot.columns = counties_pivot.columns.map(str)

In [76]:
counties_pivot.to_csv('output/counties_pivot.csv')

In [77]:
counties_pivot_trim = pd.DataFrame(counties_pivot[(counties_pivot['2011'] > 0) & (counties_pivot['2019'] > 0)])

In [78]:
counties_pivot_trim['pct_increase'] = ((((counties_pivot_trim['2019'] - counties_pivot_trim['2011']) \
                               / counties_pivot_trim['2011'])*100).round(2)).astype(int)

In [79]:
counties_pivot_trim.sort_values(by='pct_increase', ascending=False).head(10)

year,employercounty,2011,2012,2013,2014,2015,2016,2017,2018,2019,pct_increase
46,SISKIYOU,589,2499,10995,98201,123143,133053,22190,63410,42048,7042
17,LASSEN,11060,82916,61651,60923,81526,79224,72508,134199,109819,892
2,AMADOR,51839,6889,79018,87354,77556,67548,557894,577878,496375,857
0,ALAMEDA,13115421,32949541,31405543,35227669,57706039,58608590,44373626,58631691,60332524,360
34,SAN BENITO,329078,248813,533575,362562,313089,649259,1111315,1636208,1454117,341
57,YUBA,108668,163692,218380,273936,355351,356981,620753,836230,441818,306
9,FRESNO,3411184,4253048,6162108,8135120,11441704,12347859,13087641,14290071,11052194,224
15,KINGS,688088,762687,1035041,1688639,1837310,1998167,2439275,2581892,2169583,215
39,SAN LUIS OBISPO,1053795,1175645,3463727,3497022,3596145,4267473,4448278,4453278,3315199,214
47,SOLANO,3566697,6153356,5459485,7366730,8792127,8503254,11508399,12037461,10580617,196


In [80]:
counties_pivot_trim['year2019'] = (counties_pivot_trim['2019']).round(0).astype(int)

In [81]:
counties_pivot_trim.dtypes

year
employercounty     object
2011              float64
2012              float64
2013              float64
2014              float64
2015              float64
2016              float64
2017              float64
2018              float64
2019              float64
pct_increase        int64
year2019            int64
dtype: object

In [82]:
breaks_increase = jenkspy.jenks_breaks(list(counties_pivot_trim.pct_increase), nb_class=7)

In [83]:
breaks_2018 = jenkspy.jenks_breaks(list(counties_pivot_trim.year2018), nb_class=7)

AttributeError: 'DataFrame' object has no attribute 'year2018'

In [None]:
def group_breaks_increase(breaks_increase_value):
    for i, b in enumerate(breaks_increase):
        if breaks_increase_value <= breaks_increase[i+1]:
            return i

In [None]:
def get_group(value):
    for i, b in enumerate(breaks_2018):
        if value <= breaks_2018[i+1]:
            return i

In [None]:
counties_pivot_trim['increase_group'] = counties_pivot_trim.pct_increase.apply(group_breaks_increase)

In [None]:
counties_pivot_trim['2018_group'] = counties_pivot_trim.year2018.apply(get_group)

In [None]:
counties_pivot_trim.head(10)

### Add counties geodataframe

In [None]:
counties = gpd.read_file('http://s3-us-west-2.amazonaws.com/boundaries.\
latimes.com/archive/1.0/boundary-set/counties-2012.geojson')

In [None]:
counties.plot()

In [None]:
counties = counties.rename(columns={'name':'employercounty'})
counties['employercounty'] = counties['employercounty'].str.upper()

In [None]:
counties_overtime = counties.merge(counties_pivot_trim, on='employercounty', how='left')

In [None]:
counties_overtime.head()

In [None]:
lat.palette['schemes'] = {
    'div_fire': ['#fdd0a2','#dadaeb','#bcbddc','#9e9ac8','#807dba','#6a51a3','#4a1486'],
    'category-6': ['#3580b1','#ec8431','#ab7fb4','#c89d29','#adc839','#829eb1'],
    'category-6': ['#3580b1','#ec8431','#ab7fb4','#c89d29','#adc839','#829eb1'],
    'fire-7': ['#fbf2c7','#f9e39c','#f8d36e','#f4bb6a','#e68a4f','#d15a40','#ab4232'],
    'fireandice-6': ['#e68a4f','#f4bb6a','#f9e39c','#dadfe2','#a6b7c6','#849eae'],
    'ice-7': ['#edefee','#dadfe2','#c4ccd2','#a6b7c6','#849eae','#607785','#47525d']}

In [None]:
counties_overtime.to_file('output/counties_overtime.geojson', driver='GeoJSON')

In [None]:
geojson = json.loads(counties_overtime.to_json())

In [None]:
features = alt.Data(values=geojson['features'])

In [None]:
geoshape = alt.Chart(features).mark_geoshape(fill='#e6e6e6', stroke='white')

In [None]:
base = alt.Chart(features).mark_geoshape( fill='#e6e6e6', stroke='white', strokeWidth=0.1)\
        .encode().properties(
    width=400,
    height=500
)

In [None]:
increase = geoshape.encode( color=alt.Color( "properties.increase_group:N", 
      scale=alt.Scale(
      domain=[0,1, 2, 3, 4, 5, 6],
      range=lat.palette['schemes']['div_fire']
        ),legend=None),)

In [None]:
year2018 = geoshape.encode( color=alt.Color( "properties.2018_group:N", 
      scale=alt.Scale(
      domain=[0,1, 2, 3, 4, 5, 6],
      range=lat.palette['schemes']['fire-7']
        ),legend=None),)

In [None]:
change = (base + increase).properties(title="Increase in overtime spending")

In [None]:
total = (base + year2018).properties(title="Overtime spending, by county")

In [None]:
total | change

---

In [None]:
labars = pd.DataFrame(la_agency_overtime[['employerfull', 'year', 'ot_share_wages']])
labars.head()

In [None]:
alt.Chart(labars).mark_area().encode(
    x=alt.X('year:N', title='', axis=alt.Axis(format='', tickCount=2, values=(2011,2019))),
    y=alt.Y("ot_share_wages:Q", title="", axis=alt.Axis(format='', tickCount=10)),
    facet=alt.Facet('employerfull:N', columns=7)
).properties(height=100, width=100, title='')

Data source: https://publicpay.ca.gov/Reports/RawExport.aspx

---

### How many firefighters outside L.A. received six-figure salaries? 

In [None]:
state_high = payroll_fire[(payroll_fire['overtime'] > 100000) &\
                             (~payroll_fire['employer'].str.contains('LOS ANGELES'))].reset_index()

In [None]:
state_highest = payroll_fire[(payroll_fire['overtime'] > 200000) &\
                             (~payroll_fire['employer'].str.contains('LOS ANGELES'))].reset_index()

In [None]:
print('$200k:',len(state_highest),'| $100k:',len(state_high))

In [None]:
state_high.sort_values(by='adjusted_overtime', ascending=False).head()

In [None]:
state_highest_years = state_highest.groupby(['year', 'type']).agg('size').reset_index(name='count')
state_highest_years.sort_values(by='count', ascending=False).head()

In [None]:
state_high_years = state_high.groupby(['year', 'type']).agg('size').reset_index(name='count')
state_high_years.sort_values(by='count', ascending=False).head()

In [None]:
alt.Chart(state_high_years).mark_bar().encode(
    x=alt.X("year:N", title=' ', axis=alt.Axis(values=(2011, 2019))),
    y=alt.Y("count:Q", title=' ', axis=alt.Axis(tickCount=6, format='2f')),
    facet='type:O'
).properties(width=200, height=200, title='Calif. $100,000 overtime firefighters, by agency type')

---

Data source: https://publicpay.ca.gov/Reports/RawExport.aspx