# Summarize by Ward
last updated 3/5/24

<a name="read"></a>
# 1. Read and Prepare Geocoded Fines and 311 Complaints Data

In [1]:
import pandas as pd

In [2]:
df_fines = pd.read_csv("../../data/05-finalized/dockets-summary.csv")
df_fines.head()

In [3]:
df_311 = pd.read_csv("../../data/05-finalized/311-complaints-unshoveled.csv")
df_311.head()

In [4]:
len(df_fines)

In [5]:
len(df_311)

<a name="community"></a>
# Aggregate Dockets by Ward

In [6]:
# index_to_drop = df_wards[df_wards['ward'] == 'unknown'].index
# df_wards = df_wards.drop(index=index_to_drop)
#df_ward_fines= df_fines.drop(index=(df_wards[df_wards['ward']=='unknown'].index))
df_ward_fines = df_fines.pivot_table(index='ward_1523',
                             columns='dept',
                             values='docket',
                             aggfunc=['count'],
                             fill_value=0).reset_index()
# Flatten the MultiIndex in columns
df_ward_fines.columns = ['_'.join(col).strip() for col in df_ward_fines.columns.values]

df_ward_fines = df_ward_fines.rename(columns={'ward_1523_':'ward'})
df_ward_fines.rename(columns=lambda x: x.replace('count_','n_'), inplace=True)

# Reset the index to flatten it
df_ward_fines.reset_index(drop=True, inplace=True)

df_ward_fines['n_dockets'] = df_ward_fines.filter(like='n_').sum(axis=1)
df_ward_fines['ward']=df_ward_fines['ward'].astype(str)

df_ward_fines.head()

In [7]:
# confirm totals
df_ward_fines['n_dockets'].sum()

In [8]:
len(df_ward_fines)

# Aggregate 311 Complaints by Ward

In [9]:
df_ward_311 = df_311.groupby('WARD').size().reset_index(name='n_311_complaints')
df_ward_311.rename(columns={'WARD':'ward'}, inplace = True)
df_ward_311['ward'].astype(int).astype(str)
df_ward_311.head()

In [10]:
df_ward_311['n_311_complaints'].sum()
df_ward_311['ward'] = df_ward_311['ward'].astype(int).astype(str)

In [11]:
len(df_ward_311)

# Merge Datasets

### Merge Dockets and 311 complaints

In [12]:
df_ward = pd.merge(df_ward_fines,df_ward_311, on = 'ward')
df_ward.head()

In [13]:
# confirm I have 50 wards
len(df_ward)

# Calculations

In [14]:
df_ward.isnull().sum()

In [15]:
df_ward['complaint_ratio_cdot'] = df_ward['n_TRANPORT']/df_ward['n_311_complaints']
df_ward['complaint_ratio_streets'] = df_ward['n_STRTSAN']/df_ward['n_311_complaints']
df_ward['complaint_ratio_all'] = df_ward['n_dockets']/df_ward['n_311_complaints']
df_ward.head()

In [16]:
# create ward label to facilitate mapping
df_ward['ward_label']=df_ward['ward']

In [17]:
df_ward.head()

### citywide averages

### confirm totals

In [18]:
df_ward[['n_POLICE','n_STRTSAN','n_TRANPORT','n_dockets']].median()

In [19]:
df_ward.sum()

# Export

In [20]:
df_ward.to_csv("../../results/ssw02-fines/ward-summary.csv", index= False)