## Data Sources: Uniform Crime Report
* **2016 Data**: Table 3 - Crime in the United States by State, 2016. Retrieved from the FBI Uniform Crime Reporting (UCR) Program:
https://ucr.fbi.gov/crime-in-the-u.s/2016/crime-in-the-u.s.-2016/tables/table-3

* **2020 Data**: Table 5 - Crime in the United States by State, 2020. Available via the FBI Crime Data Explorer:
https://cde.ucr.cjis.gov/LATEST/webapp/#

In [1]:
import pandas as pd
import data_prep_functions as prep

In [2]:
spotlite = prep.aggregate_spotlite_to_state_long('Data/SPOTLITE.csv')
spotlite

Unnamed: 0,STATE,STATE_NAME,YEAR,USE_OF_FORCE_COUNT
0,AK,Alaska,2016,14
1,AK,Alaska,2020,20
2,AL,Alabama,2016,51
3,AL,Alabama,2020,75
4,AR,Arkansas,2016,35
...,...,...,...,...
97,WI,Wisconsin,2016,36
98,WV,West Virginia,2020,22
99,WV,West Virginia,2016,24
100,WY,Wyoming,2016,4


In [3]:
crimes_2016 = prep.extract_state_total_crime('Data/table-3.xlsx')
crimes_2020 = prep.extract_state_total_crime('Data/Table_05_Crime_in_the_United_States_by_State_2020.xlsx')

In [4]:
crimes = pd.concat([crimes_2016, crimes_2020])
crimes

3,STATE,YEAR,POPULATION,TOTAL_CRIME
0,AK,2016,741894,30842
1,AL,2016,4863300,169248
2,AR,2016,2988248,114134
3,AZ,2016,6931071,239015
4,CA,2016,39250017,1176866
...,...,...,...,...
46,VT,2020,623347,8667
47,WA,2020,7693612,232819
48,WI,2020,5832655,105515
49,WV,2020,1784787,31328


In [12]:
lemas_2016 = prep.read_lemas('Data/LEMAS2016.tsv')
lemas_2020 = prep.read_lemas('Data/LEMAS2020.tsv')

166 rows dropped due to FTSWORN <=0 or invalid demographic counts.
909 rows dropped due to FTSWORN <=0 or invalid demographic counts.


In [13]:
lemas = pd.concat([lemas_2020, lemas_2016])
lemas

Unnamed: 0,STATE,YEAR,FTSWORN,%_FEMALE,%_BLACK,%_HISP,CCRB,CFDBK_POLICY,AG_STATE,AG_SHERIFF,AG_LOCAL,PERS_FEMALE,PERS_BLACK_FEM,PERS_BLACK_MALE,PERS_HISP_FEM,PERS_HISP_MALE
0,AK,2020,286,0.080420,0.038462,0.055944,0.017483,0.017483,0,0,0,23,0,11,1,15
1,AL,2020,5623,0.092299,0.273164,0.012271,0.187444,0.572648,0,0,0,519,260,1276,4,65
2,AR,2020,3098,0.114913,0.122014,0.034538,0.193996,0.599419,0,0,0,356,73,305,19,88
3,AZ,2020,8751,0.113244,0.032111,0.205919,0.180322,0.736830,0,0,0,991,29,252,230,1572
4,CA,2020,60466,0.139103,0.059025,0.346840,0.514570,0.817832,0,0,0,8411,594,2975,3817,17155
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46,VT,2016,429,0.102564,0.009324,0.006993,0.731935,0.000000,1,3,9,44,1,3,0,3
47,WA,2016,5019,0.110181,0.041243,0.045228,0.402869,0.321578,1,5,31,553,25,182,19,208
48,WI,2016,6067,0.164332,0.080765,0.059502,0.395747,0.021757,1,16,66,997,108,382,51,310
49,WV,2016,1265,0.035573,0.024506,0.005534,0.004743,0.066403,1,3,19,45,2,29,1,6


In [14]:
merged = lemas.merge(crimes, on=['STATE', 'YEAR'], how='left')
data = merged.merge(spotlite, on=['STATE', 'YEAR'], how='left')

In [16]:
data

Unnamed: 0,STATE,YEAR,FTSWORN,%_FEMALE,%_BLACK,%_HISP,CCRB,CFDBK_POLICY,AG_STATE,AG_SHERIFF,AG_LOCAL,PERS_FEMALE,PERS_BLACK_FEM,PERS_BLACK_MALE,PERS_HISP_FEM,PERS_HISP_MALE,POPULATION,TOTAL_CRIME,STATE_NAME,USE_OF_FORCE_COUNT
0,AK,2020,286,0.080420,0.038462,0.055944,0.017483,0.017483,0,0,0,23,0,11,1,15,731158,22654,Alaska,20
1,AL,2020,5623,0.092299,0.273164,0.012271,0.187444,0.572648,0,0,0,519,260,1276,4,65,4921532,127483,Alabama,75
2,AR,2020,3098,0.114913,0.122014,0.034538,0.193996,0.599419,0,0,0,356,73,305,19,88,3030522,99563,Arkansas,50
3,AZ,2020,8751,0.113244,0.032111,0.205919,0.180322,0.736830,0,0,0,991,29,252,230,1572,7421401,201303,Arizona,121
4,CA,2020,60466,0.139103,0.059025,0.346840,0.514570,0.817832,0,0,0,8411,594,2975,3817,17155,39368078,1016080,California,435
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,VT,2016,429,0.102564,0.009324,0.006993,0.731935,0.000000,1,3,9,44,1,3,0,3,624594,11591,Vermont,2
98,WA,2016,5019,0.110181,0.041243,0.045228,0.402869,0.321578,1,5,31,553,25,182,19,208,7288000,276676,Washington,55
99,WI,2016,6067,0.164332,0.080765,0.059502,0.395747,0.021757,1,16,66,997,108,382,51,310,5778708,129399,Wisconsin,36
100,WV,2016,1265,0.035573,0.024506,0.005534,0.004743,0.066403,1,3,19,45,2,29,1,6,1831102,44044,West Virginia,24


In [15]:
data.describe()

Unnamed: 0,YEAR,FTSWORN,%_FEMALE,%_BLACK,%_HISP,CCRB,CFDBK_POLICY,AG_STATE,AG_SHERIFF,AG_LOCAL,PERS_FEMALE,PERS_BLACK_FEM,PERS_BLACK_MALE,PERS_HISP_FEM,PERS_HISP_MALE,POPULATION,TOTAL_CRIME,USE_OF_FORCE_COUNT
count,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0,102.0
mean,2018.0,8276.196078,0.120187,0.101813,0.076588,0.298231,0.410669,0.441176,5.558824,19.666667,1168.911765,259.215686,799.666667,233.078431,1057.215686,6398153.294118,166345.45098,59.637255
std,2.009877,11513.98686,0.038637,0.103242,0.096165,0.244279,0.270345,0.49898,8.495279,30.878341,1790.546883,423.396723,1064.908413,636.911883,2758.787966,7273752.789691,198709.704168,69.64902
min,2016.0,286.0,0.035573,0.002024,0.003984,0.0,0.0,0.0,0.0,0.0,23.0,0.0,1.0,0.0,3.0,582328.0,8667.0,2.0
25%,2016.0,1679.25,0.08976,0.022385,0.021685,0.130737,0.158487,0.0,0.0,0.0,203.75,3.0,40.5,5.0,41.5,1795318.5,41563.5,17.75
50%,2018.0,4206.0,0.115251,0.071154,0.038028,0.244556,0.413532,0.0,0.0,0.5,513.0,50.0,282.0,23.5,145.5,4457112.5,120965.0,43.0
75%,2020.0,9275.0,0.143206,0.131881,0.074665,0.426072,0.631446,1.0,10.0,31.0,1351.25,349.75,1231.0,92.0,437.25,7388050.75,200628.25,74.5
max,2020.0,60466.0,0.241183,0.528287,0.517425,1.0,1.0,1.0,40.0,143.0,8411.0,2027.0,4434.0,3817.0,17155.0,39368078.0,1176866.0,435.0
