In [1]:
import pandas as pd
import numpy as np

***GDP DATA***

In [2]:
#load gdp dataset, convert columns to county fip code
realgdp_ca = pd.read_csv('gdp-by-county-ca2.csv')
realgdp_ca.columns = [int(col.replace('REALGDPALL', '')) if col.startswith('REALGDPALL') else col for col in realgdp_ca.columns]

In [3]:
#load list of fip codes, replace fip codes in gdp dataset with county names
fips_df = pd.read_csv('https://raw.githubusercontent.com/kjhealy/fips-codes/master/state_and_county_fips_master.csv')
fips_ca = fips_df[fips_df['state'] == 'CA']
fips_county = dict(zip(fips_ca['fips'], fips_ca['name']))
realgdp_ca.rename(columns=fips_county, inplace=True)
#units (Thousands of Chained 2017 USD, Not Seasonally Adjusted)
realgdp_ca.attrs['units'] = 'Thousands of Chained 2017 USD'

In [4]:
realgdp_ca

Unnamed: 0,observation_date,Alameda County,Alpine County,Amador County,Butte County,Calaveras County,Colusa County,Contra Costa County,Del Norte County,El Dorado County,...,Sonoma County,Stanislaus County,Sutter County,Tehama County,Trinity County,Tulare County,Tuolumne County,Ventura County,Yolo County,Yuba County
0,2001-01-01,86924126,84720,1360828,6790163,1098393,908729,64845726,659710,5655039,...,22424276,16559172,2532153,1419839,327124,11635782,1914694,39016318,10021979,2204099
1,2002-01-01,87945920,87647,1572280,7281711,1193532,845631,64827092,667447,6145440,...,23360909,17211668,3067456,1518222,363755,10822959,2265038,40234620,10202456,2374643
2,2003-01-01,91564585,89449,1616440,7600395,1220905,989680,65646731,692526,6297920,...,23193391,17828749,3074716,1553812,386418,11393198,2296930,42820863,10568911,2502333
3,2004-01-01,92470454,86175,1685018,7713220,1201726,814567,68905840,728039,6564920,...,23112212,18906794,3111914,1628286,373886,12631250,2317515,46880813,11055339,2426950
4,2005-01-01,94611477,87711,1677506,7912397,1290494,853126,75014351,744646,6731682,...,23574318,19533906,3005880,1739505,352092,13871037,2433650,49219057,11230028,2461033
5,2006-01-01,97221046,88802,1669942,8195172,1246157,921396,75242782,773156,6937119,...,23737230,19987363,3235685,1657115,367853,13084688,2489599,54587334,11399368,2588931
6,2007-01-01,98832535,97501,1639393,8084613,1231004,1019449,73275314,791314,6825184,...,24262255,19706375,3384828,1688061,365651,14624649,2417989,59025486,12001509,2561887
7,2008-01-01,99236414,94188,1620129,7917794,1215672,1331160,90234175,858035,6820209,...,24112663,19117205,3719483,1580861,358199,13815747,2357542,54095039,12056502,2660591
8,2009-01-01,94214542,89805,1539261,8066507,1147589,1619244,78642291,807494,6575227,...,23049293,18863423,3905209,1563087,336077,12622564,2255875,53813657,11535085,2707581
9,2010-01-01,97503433,94445,1516280,8103725,1247463,1381681,70521397,806195,6588968,...,23744974,19014237,3613524,1666055,382431,13947483,2384417,53517947,11318089,2709494


Note: GDP is measured in Real Total GDP, meaning all the data is adjusted for inflation in 2017 dollars for a more accurate measure of economic growth

***MEDIAN INCOME DATA***

In [5]:
#load median income dataset, convert columns to county fip code
medianinc_ca = pd.read_csv('median-income-county-ca.csv')
medianinc_ca.columns = [col.replace('A052NCEN', '') if col.startswith('MHICA') else col for col in medianinc_ca.columns]
medianinc_ca.columns = [int(col.replace('MHICA', '')) if col.startswith('MHICA') else col for col in medianinc_ca.columns]
#replace fip codes in median income dataset with county names
medianinc_ca.rename(columns=fips_county, inplace=True)
#units (USD, Not Seasonally Adjusted)
medianinc_ca.attrs['units'] = 'USD, Unadjusted for inflation'

In [6]:
medianinc_ca

Unnamed: 0,observation_date,Alameda County,Alpine County,Amador County,Butte County,Calaveras County,Colusa County,Contra Costa County,Del Norte County,El Dorado County,...,Sonoma County,Stanislaus County,Sutter County,Tehama County,Trinity County,Tulare County,Tuolumne County,Ventura County,Yolo County,Yuba County
0,2001-01-01,54925,38401,41805,31342,40890,34722,64433,28841,51861,...,52873,39300,38013,30609,27464,31587,37745,56525,41851,29927
1,2002-01-01,55595,37691,43628,32124,42563,34556,65186,29028,53182,...,53230,40000,38585,31307,28170,32033,38770,57052,42412,30860
2,2003-01-01,56225,38825,44494,33528,43462,36579,64365,29990,54131,...,52088,41619,39718,32905,29063,33190,39620,57885,43612,32802
3,2004-01-01,57659,42827,47459,34891,46052,38350,65459,31502,56629,...,53645,43072,41289,34520,30307,34809,41067,59379,44810,34493
4,2005-01-01,60937,45283,52078,36303,47639,39186,69463,32724,62199,...,58110,46769,44914,33903,31434,38179,42381,66531,49378,35786
5,2006-01-01,64285,47515,50528,40023,52745,40240,74058,33765,67605,...,60656,48252,47174,35639,33070,41117,44991,71807,50027,37558
6,2007-01-01,68263,46136,54903,39466,51447,43882,76317,35910,64256,...,62279,50367,49104,36884,35439,40444,45478,72762,55988,40602
7,2008-01-01,70217,49320,53951,40308,52850,44622,78469,36729,67019,...,62314,50094,49146,38160,34726,44383,49151,76190,57877,46715
8,2009-01-01,68258,45391,54461,41196,51564,47472,75084,38252,68778,...,61985,48550,48073,38179,33546,39876,48027,71246,56120,40947
9,2010-01-01,66937,44241,49516,41168,50745,44981,73678,35438,65201,...,58703,47442,46188,38188,35207,42377,44751,71418,54433,41045


Note: Median Income is *not* adjusted for inflation

In [None]:
#assign list of california counties
ca_counties = [
    "Alameda County",
    "Alpine County",
    "Amador County",
    "Butte County",
    "Calaveras County",
    "Colusa County",
    "Contra Costa County",
    "Del Norte County",
    "El Dorado County",
    "Fresno County",
    "Glenn County",
    "Humboldt County",
    "Imperial County",
    "Inyo County",
    "Kern County",
    "Kings County",
    "Lake County",
    "Lassen County",
    "Los Angeles County",
    "Madera County",
    "Marin County",
    "Mariposa County",
    "Mendocino County",
    "Merced County",
    "Modoc County",
    "Mono County",
    "Monterey County",
    "Napa County",
    "Nevada County",
    "Orange County",
    "Placer County",
    "Plumas County",
    "Riverside County",
    "Sacramento County",
    "San Benito County",
    "San Bernardino County",
    "San Diego County",
    "San Francisco County",
    "San Joaquin County",
    "San Luis Obispo County",
    "San Mateo County",
    "Santa Barbara County",
    "Santa Clara County",
    "Santa Cruz County",
    "Shasta County",
    "Sierra County",
    "Siskiyou County",
    "Solano County",
    "Sonoma County",
    "Stanislaus County",
    "Sutter County",
    "Tehama County",
    "Trinity County",
    "Tulare County",
    "Tuolumne County",
    "Ventura County",
    "Yolo County",
    "Yuba County"
]
ca_counties2 = [
    "Alameda",
    "Alpine",
    "Amador",
    "Butte",
    "Calaveras",
    "Colusa",
    "Contra Costa",
    "Del Norte",
    "El Dorado",
    "Fresno",
    "Glenn",
    "Humboldt",
    "Imperial",
    "Inyo",
    "Kern",
    "Kings",
    "Lake",
    "Lassen",
    "Los Angeles",
    "Madera",
    "Marin",
    "Mariposa",
    "Mendocino",
    "Merced",
    "Modoc",
    "Mono",
    "Monterey",
    "Napa",
    "Nevada",
    "Orange",
    "Placer",
    "Plumas",
    "Riverside",
    "Sacramento",
    "San Benito",
    "San Bernardino",
    "San Diego",
    "San Francisco",
    "San Joaquin",
    "San Luis Obispo",
    "San Mateo",
    "Santa Barbara",
    "Santa Clara",
    "Santa Cruz",
    "Shasta",
    "Sierra",
    "Siskiyou",
    "Solano",
    "Sonoma",
    "Stanislaus",
    "Sutter",
    "Tehama",
    "Trinity",
    "Tulare",
    "Tuolumne",
    "Ventura",
    "Yolo",
    "Yuba"
]

In [None]:
#check to ensure every county is in the dataset
for county in ca_counties:
    assert county in realgdp_ca.columns
    assert county in medianinc_ca.columns

***PUBLIC SAFETY SPENDING DATA***

In [32]:
#
ca_expend = pd.read_csv('County_-_Expenditures_20250513.csv')
ca_expend = ca_expend[ca_expend['Entity Name'].isin(ca_counties2)]
ca_publicprot = ca_expend[ca_expend['Category'] == 'Public Protection']
justice_cat = ['Police Protection_Operating Expenditures', 'Detention and Correction', 'Judicial', 'Police Protection', 'Police Protection_Capital Outlay', 'Police Protection_Operating Expenditures']
ca_publicprot = ca_publicprot[ca_publicprot['Subcategory 1'].isin(justice_cat)]

In [None]:
#get total spending for public protection by year
publicprottotal = ca_publicprot.groupby(['Entity Name', 'Fiscal Year'])['Values'].sum().reset_index()
publicprottotal.rename(columns={'Values': 'Total Spending'}, inplace=True)

Unnamed: 0,Entity Name,Fiscal Year,Total Spending
0,Alameda,2003,393405118
1,Alameda,2004,403815290
2,Alameda,2005,412566039
3,Alameda,2006,439809386
4,Alameda,2007,489083040
...,...,...,...
1192,Yuba,2019,51346113
1193,Yuba,2020,55720542
1194,Yuba,2021,60285475
1195,Yuba,2022,63317701


***

In [14]:
#some bullshit