In [1]:
# Import libraries
import numpy as np
import pandas as pd

In [2]:
# Read in data
df = pd.read_csv('/content/allegations_202007271729.csv')

In [3]:
# Filter data to only necessary colummns
df = df[['complaint_id', 'year_received', 'precinct', 'fado_type']]
# Fix nulls and datatypes
df = df[df['precinct'].notnull()]
df['precinct'] = df['precinct'].astype(int)
df['precinct'] = df['precinct'].astype('string')
df['fado_type'] = df['fado_type'].astype('string')
# Remove 2020 data
df = df[df['year_received'] <= 2019]
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 33330 entries, 0 to 33357
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   complaint_id   33330 non-null  int64 
 1   year_received  33330 non-null  int64 
 2   precinct       33330 non-null  string
 3   fado_type      33330 non-null  string
dtypes: int64(2), string(2)
memory usage: 1.3 MB


In [4]:
# Group data by year, precinct, & fado_type -> report complaint counts
counts = (
    df.groupby(['year_received', 'precinct', 'fado_type']).size().reset_index(name='complaint_count')
)
counts

Unnamed: 0,year_received,precinct,fado_type,complaint_count
0,1985,63,Force,1
1,1985,67,Abuse of Authority,1
2,1985,71,Abuse of Authority,1
3,1985,71,Force,1
4,1985,83,Abuse of Authority,1
...,...,...,...,...
4803,2019,9,Offensive Language,3
4804,2019,90,Abuse of Authority,69
4805,2019,90,Discourtesy,2
4806,2019,90,Force,1


In [5]:
# Pivot data so that each fado_type has its own column with # of complaints
wide = counts.pivot_table(
    index=['year_received', 'precinct'],
    columns='fado_type',
    values='complaint_count',
    fill_value=0
).reset_index()

# Add column for total # of complaints (sum of fado_type columns)
wide['total_complaints'] = wide[['Abuse of Authority', 'Discourtesy', 'Force', 'Offensive Language']].sum(axis=1)

wide.columns.name = None

wide

Unnamed: 0,year_received,precinct,Abuse of Authority,Discourtesy,Force,Offensive Language,total_complaints
0,1985,63,0.0,0.0,1.0,0.0,1.0
1,1985,67,1.0,0.0,0.0,0.0,1.0
2,1985,71,1.0,0.0,1.0,0.0,2.0
3,1985,83,1.0,1.0,1.0,0.0,3.0
4,1986,14,0.0,0.0,1.0,0.0,1.0
...,...,...,...,...,...,...,...
1770,2019,84,1.0,0.0,0.0,0.0,1.0
1771,2019,88,4.0,1.0,1.0,0.0,6.0
1772,2019,9,7.0,2.0,5.0,3.0,17.0
1773,2019,90,69.0,2.0,1.0,0.0,72.0


In [6]:
# Export pandas dataframe to .json
wide.to_json('complaints_by_precinct.json', orient='records')