In [1]:
import crime as cr
import pandas as pd, numpy as np
import chart_tools as ct
import plotly.express as px, seaborn as sns
cr.load('dist_arrests', full=True)
cr.load("dist_crime", full=True)

Unnamed: 0,year,policedistrict,type,count,subtype
0,2015.0,ACET Task Force,Murder/Manslaughter,0.0,
1,2015.0,ACET Task Force,Negligent Manslaughter,0.0,
2,2015.0,ACET Task Force,Rape,0.0,By Force
3,2015.0,ACET Task Force,Rape,0.0,Attempted
4,2015.0,ACET Task Force,Robbery,0.0,By Firearm
...,...,...,...,...,...
78838,2009.0,Colorado Div. of Gaming,Motor Vehicle Theft,0.0,Attempted
78839,2007.0,Las Animas PD,Burglary,0.0,Attempted
78840,2015.0,Pagosa Springs PD,Motor Vehicle Theft,0.0,Attempted
78841,2006.0,Phillips County Sheriff,Motor Vehicle Theft,,Attempted


## **Juvenile Arrests by District**

---

In [2]:
df = cr.df('dist_arrests')

df = df.drop(columns=['adultcount'])

# Apparently there were duplicate rows, even though there shouldn't be. Sum them.
df = df.groupby(['year', 'type', 'policedistrict']).agg('sum')
df = df.reset_index()

# Make separate columns for crime type
df = df.pivot(index=['year', 'policedistrict'], columns='type')

# Fix weird column structure as a result of the pivot
df.columns = df.columns.droplevel()
df = df.reset_index()
df.to_csv('output/dist_juvenile_arrests.csv')
df.head(3)

type,year,policedistrict,Aggravated Assault,All Other Offenses,Arson,Burglary,Curfew Violations,DUI,Disorderly Conduct,Drug Violations,...,Other Family Offenses,Other Sex Offenses,Prostitution,Rape,Robbery,Runaways,Stolen Property,Vagrancy,Vandalism,Weapons
0,2001.0,Adams County Sheriff,19.0,196.0,5.0,37.0,16.0,12.0,96.0,47.0,...,0.0,7.0,0.0,,4.0,0.0,18.0,0.0,39.0,7.0
1,2001.0,Adams State College PD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
2,2001.0,Alamosa County Sheriff,1.0,18.0,0.0,0.0,0.0,3.0,0.0,4.0,...,1.0,0.0,0.0,,0.0,0.0,0.0,0.0,1.0,0.0


## **Adult Arrests by District**

---

In [3]:
df = cr.df('dist_arrests')

df = df.drop(columns=['juvenilecount'])

# Apparently there were duplicate rows, even though there shouldn't be. Sum them.
df = df.groupby(['year', 'type', 'policedistrict']).agg('sum')
df = df.reset_index()

# Make separate columns for crime type
df = df.pivot(index=['year', 'policedistrict'], columns='type')

# Fix weird column structure as a result of the pivot
df.columns = df.columns.droplevel()
df = df.reset_index()
df.to_csv('output/dist_adult_arrests.csv')
df.head(3)

type,year,policedistrict,Aggravated Assault,All Other Offenses,Arson,Burglary,Curfew Violations,DUI,Disorderly Conduct,Drug Violations,...,Other Family Offenses,Other Sex Offenses,Prostitution,Rape,Robbery,Runaways,Stolen Property,Vagrancy,Vandalism,Weapons
0,2001.0,Adams County Sheriff,108.0,4296.0,2.0,31.0,0.0,607.0,73.0,176.0,...,57.0,11.0,0.0,,21.0,0.0,28.0,1.0,79.0,19.0
1,2001.0,Adams State College PD,2.0,4.0,1.0,0.0,0.0,8.0,0.0,0.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,1.0,0.0
2,2001.0,Alamosa County Sheriff,0.0,153.0,0.0,0.0,0.0,94.0,0.0,2.0,...,10.0,0.0,0.0,,0.0,0.0,0.0,0.0,7.0,0.0


## **Total Arrests by District**

---

In [4]:
df = cr.df('dist_arrests')

df['count'] = df.juvenilecount + df.adultcount

df = df.drop(columns=['juvenilecount', 'adultcount'])

# Apparently there were duplicate rows, even though there shouldn't be. Sum them.
df = df.groupby(['year', 'type', 'policedistrict']).agg('sum')
df = df.reset_index()

# Make separate columns for crime type
df = df.pivot(index=['year', 'policedistrict'], columns='type')

# Fix weird column structure as a result of the pivot
df.columns = df.columns.droplevel()
df = df.reset_index()
df.to_csv('output/dist_total_arrests.csv')
df.head(3)

type,year,policedistrict,Aggravated Assault,All Other Offenses,Arson,Burglary,Curfew Violations,DUI,Disorderly Conduct,Drug Violations,...,Other Family Offenses,Other Sex Offenses,Prostitution,Rape,Robbery,Runaways,Stolen Property,Vagrancy,Vandalism,Weapons
0,2001.0,Adams County Sheriff,127.0,4492.0,7.0,68.0,0.0,619.0,169.0,223.0,...,57.0,18.0,0.0,,25.0,0.0,46.0,0.0,118.0,26.0
1,2001.0,Adams State College PD,2.0,4.0,1.0,0.0,0.0,8.0,0.0,0.0,...,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,1.0,0.0
2,2001.0,Alamosa County Sheriff,1.0,171.0,0.0,0.0,0.0,97.0,0.0,6.0,...,11.0,0.0,0.0,,0.0,0.0,0.0,0.0,8.0,0.0


## **Crime by District**

---

It's unclear if we will use this. We would need to convert police district to county

In [5]:
df_c = cr.df('dist_crime')

# Group by the same keys as the Arrests data. This will remove subtype column automatically when we agg sum it
df_c = df_c.groupby(['year', 'type', 'policedistrict']).agg('sum').reset_index()

df_c = df_c.pivot(index=['year', 'policedistrict'], columns='type').reset_index()

df_c.head(3)

Unnamed: 0_level_0,year,policedistrict,count,count,count,count,count,count,count,count,count
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Arson,Assaults,Burglary,Larceny/Theft,Motor Vehicle Theft,Murder/Manslaughter,Negligent Manslaughter,Rape,Robbery
0,2001.0,Adams County Sheriff,60.0,906.0,794.0,2647.0,727.0,6.0,102.0,,58.0
1,2001.0,Adams State College PD,2.0,5.0,4.0,51.0,1.0,0.0,0.0,,0.0
2,2001.0,Alamosa County Sheriff,0.0,31.0,0.0,28.0,0.0,0.0,4.0,,0.0
