In [4]:
import pandas as pd
import numpy as np

## Prep personal income data

In [6]:
# data from Bureau of Economic Analysis

personal_income = pd.read_csv("CAINC1__ALL_AREAS_1969_2021.csv", encoding = 'unicode_escape', engine ='python')

In [7]:
personal_income = personal_income[personal_income['Description']=='Per capita personal income (dollars) 2/'].drop(
    ['Region','TableName','LineCode','IndustryClassification','Unit'],axis=1
)

In [8]:
personal_income = pd.melt(
    personal_income,id_vars=['GeoFIPS','GeoName','Description'],var_name='Year',value_name='Value'
)


In [9]:
personal_income = personal_income.drop('Description',axis=1)

In [10]:
personal_income['Value'] = personal_income['Value'].replace('(NA)',0).astype(float)

In [11]:
personal_income.rename(
    columns={'Value':'per capita personal income','Year':'year','GeoFIPS':'fips_state_county_code'},
    inplace=True
)

In [12]:
personal_income['year'] = personal_income['year'].astype(int)

In [13]:
# clean up FIPS state county code so we can merge personal income onto the crime data

personal_income['fips_state_county_code'] = \
personal_income['fips_state_county_code'].apply(lambda s:s.replace('"', "")).apply(lambda s: s.replace(' ',''))

In [14]:
personal_income = personal_income[['fips_state_county_code','year','per capita personal income']]

In [15]:
personal_income

Unnamed: 0,fips_state_county_code,year,per capita personal income
0,00000,1969,3931.0
1,01000,1969,2831.0
2,01001,1969,2780.0
3,01003,1969,2760.0
4,01005,1969,2147.0
...,...,...,...
169595,94000,2021,60215.0
169596,95000,2021,57051.0
169597,96000,2021,58094.0
169598,97000,2021,62879.0


## Prep employment data

In [173]:
# employment data from Bureau of Labor Statistics

employment = pd.read_csv('Employment.csv',dtype={'fips': object})

In [174]:
employment['employment'] = employment['employment'].replace('(NA)',0).astype(float)
employment.rename(columns={'fips':'fips_state_county_code'},inplace=True)

In [175]:
def process_fips(fips):
    if len(fips) == 4:
        return '0'+fips
    else:
        return fips

In [176]:
# clean up FIPS code

employment['fips_state_county_code'] = employment['fips_state_county_code'].apply(lambda x: process_fips(x))

In [177]:
employment = employment[['fips_state_county_code','year','employment']]

In [178]:
employment

Unnamed: 0,fips_state_county_code,year,employment
0,01001,1973,8617.0
1,01003,1973,23015.0
2,01005,1973,9961.0
3,01007,1973,3955.0
4,01009,1973,8512.0
...,...,...,...
153855,56037,2021,25444.0
153856,56039,2021,35628.0
153857,56041,2021,11554.0
153858,56043,2021,5174.0


## Prep Levitt Donohue covariates - state level

In [168]:
tables_1 = pd.read_csv('data_for_tables_1_2_3_4.csv')

In [179]:
# covariates that we want

# police and prison data are logged and lagged by one year

cols = [
    'year',
    'state',
    'ab_res_agi_rate',
    'ab_occ_cdc_rate',
    'poverty_rate',
    'popstatecensus',
    'beer_pc',
    'ln_income_pc',
    'unemployment_rate',
    'ln_lag_police_pc',
    'ln_lag_prison_pc',
]

In [180]:
tables_1 = tables_1[cols]

In [183]:
tables_1['state'] = tables_1['state'].apply(lambda x: x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tables_1['state'] = tables_1['state'].apply(lambda x: x.lower())


## Prep crime data

In [245]:
data = pd.read_csv("offenses_known_yearly_1960_2020.csv",dtype={'fips_state_county_code': object})

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [246]:
data =  data[data['state'].notna()]

## Merge personal income data onto crime data

In [247]:
data = data.merge(personal_income,how='left',on=['fips_state_county_code','year'])

## Merge employment data onto crime data

In [248]:
data = data.merge(employment,how='left',on=['fips_state_county_code','year'])

## Merge covariates onto crime data

In [249]:
data = data.merge(tables_1,how='left',on=['state','year'])

In [324]:
data.to_csv("offenses_merged.csv")