# Summarize by Community
last updated 3/3/24

<a name="read"></a>
# 1. Read and Prepare Geocoded Fines and 311 Complaints Data

In [1]:
import pandas as pd

In [2]:
df_fines = pd.read_csv("../../data/05-finalized/dockets-summary.csv")
df_fines.head()

Unnamed: 0,docket,dept,violation_address,lat,long,community,ward_1523,violation_date,n_records
0,19DS68300L,STRTSAN,4710 S WESTERN AVE,41.807859,-87.68479703503766,BRIGHTON PARK,15,2019-11-13,2
1,19DS69216L,STRTSAN,1425 W MORSE AVE,42.0074513,-87.6668285,ROGERS PARK,49,2019-11-13,1
2,19DS70010L,STRTSAN,715 E 47TH ST,41.8093383,-87.6080127,GRAND BOULEVARD,4,2019-11-13,1
3,19DS72153L,STRTSAN,300 W WASHINGTON ST,41.8818694,-87.7401431,WEST GARFIELD PARK,28,2019-11-12,5
4,19DS72160L,STRTSAN,6929 N SHERIDAN RD,41.9598134,-87.654693,UPTOWN,46,2019-11-14,1


In [3]:
df_311 = pd.read_csv("../../data/05-finalized/311-complaints-unshoveled.csv")
df_311['community_caps']=df_311['COMMUNITY_NAME'].str.upper()
df_311 = df_311.drop(columns=['COMMUNITY_NAME'])
df_311.head()

Unnamed: 0,SR_NUMBER,SR_SHORT_CODE,CREATED_DATE,STREET_ADDRESS,COMMUNITY_AREA,WARD,STATUS,ORIGIN,CLOSED_DATE,LATITUDE,LONGITUDE,SR_TYPE,year,month,date,season,GEOID,community_caps
0,SR20-05649092,SWSNOREM,2020-12-17 13:24:37,5200 S BLACKSTONE AVE,41,4,Completed,Phone Call,2020-12-18 15:12:51,41.800909,-87.590515,Snow – Uncleared Sidewalk Complaint,2020,12,2020-12-17,2020-2021,41,HYDE PARK
1,SR21-00001364,SWSNOREM,2021-01-01 11:40:33,66 E CHESTNUT ST,8,42,Completed,Mobile Device,2021-01-04 14:33:44,41.898399,-87.625723,Snow – Uncleared Sidewalk Complaint,2021,1,2021-01-01,2020-2021,8,NEAR NORTH SIDE
2,SR21-00000774,SWSNOREM,2021-01-01 09:11:15,2648 N WHIPPLE ST,22,32,Completed,Internet,2021-01-06 13:37:58,41.930233,-87.703706,Snow – Uncleared Sidewalk Complaint,2021,1,2021-01-01,2020-2021,22,LOGAN SQUARE
3,SR20-05723378,SWSNOREM,2020-12-31 07:19:37,5015 N SPRINGFIELD AVE,14,39,Completed,Mobile Device,2021-01-08 15:30:57,41.972439,-87.72543,Snow – Uncleared Sidewalk Complaint,2020,12,2020-12-31,2020-2021,14,ALBANY PARK
4,SR21-00014080,SWSNOREM,2021-01-04 14:07:09,3757 N BELL AVE,5,47,Completed,Internet,2021-01-06 11:37:30,41.950316,-87.684472,Snow – Uncleared Sidewalk Complaint,2021,1,2021-01-04,2020-2021,5,NORTH CENTER


### census data

In [4]:
df_population = pd.read_csv("../../data/05-finalized/census-by-community.csv")
df_population.head()

Unnamed: 0,community_name,2020_pop,vac_hu,hu_tot,vacant,vacperc,community_caps
0,Albany Park,48396,1448.0,18230.0,5.322983,0.004331,ALBANY PARK
1,Archer Heights,14196,406.0,4538.0,29.231599,0.02277,ARCHER HEIGHTS
2,Armour Square,13890,393.513552,5860.348806,15.696404,0.024625,ARMOUR SQUARE
3,Ashburn,41098,540.0,13479.0,59.468213,0.019123,ASHBURN
4,Auburn Gresham,44878,3364.0,20617.0,119.122147,0.049388,AUBURN GRESHAM


In [5]:
len(df_population)

77

<a name="community"></a>
# Aggregate Dockets by Community
Note that 7 communities have no dockets

In [6]:
df_comm_fines = df_fines.pivot_table(index='community',
                             columns='dept',
                             values='docket',
                             aggfunc=['count'],
                             fill_value=0).reset_index()
# Flatten the MultiIndex in columns
df_comm_fines.columns = ['_'.join(col).strip() for col in df_comm_fines.columns.values]

df_comm_fines = df_comm_fines.rename(columns={'community_':'community'})
df_comm_fines.rename(columns=lambda x: x.replace('count_','n_'), inplace=True)

# Reset the index to flatten it
df_comm_fines.reset_index(drop=True, inplace=True)

df_comm_fines['n_dockets'] = df_comm_fines.filter(like='n_').sum(axis=1)
df_comm_fines['community_caps']=df_comm_fines['community']
#df_comm_fines = df_comm_fines.drop(columns=['community'])

df_comm_fines.head()

Unnamed: 0,community,n_BAFCONP,n_POLICE,n_STRTSAN,n_TRANPORT,n_unknown,n_dockets,community_caps
0,ALBANY PARK,0,0,0,15,0,15,ALBANY PARK
1,ARCHER HEIGHTS,0,0,0,14,0,14,ARCHER HEIGHTS
2,ARMOUR SQUARE,0,0,0,26,0,26,ARMOUR SQUARE
3,ASHBURN,0,0,0,2,0,2,ASHBURN
4,AUBURN GRESHAM,0,0,3,16,0,19,AUBURN GRESHAM


In [7]:
# confirm totals
df_comm_fines['n_dockets'].sum()

1918

In [8]:
len(df_comm_fines)

72

# Aggregate 311 Complaints by Community

In [9]:
df_comm_311 = df_311.groupby('community_caps').size().reset_index(name='n_311_complaints')
df_comm_311['community_name']=df_comm_311['community_caps'].str.title()
df_comm_311 = df_comm_311.drop(columns=['community_name'])
df_comm_311.head()

Unnamed: 0,community_caps,n_311_complaints
0,ALBANY PARK,400
1,ARCHER HEIGHTS,60
2,ARMOUR SQUARE,77
3,ASHBURN,108
4,AUBURN GRESHAM,128


In [10]:
len(df_comm_311)

77

In [11]:
df_comm_311['n_311_complaints'].sum()

21079

# Merge Datasets

### Merge Community Census Data with Dockets and 311 complaints

In [12]:
df_community = pd.merge(df_population,df_comm_fines, on = 'community_caps', how='left')
df_community = pd.merge(df_community,df_comm_311, on = 'community_caps')
df_community.fillna(0, inplace=True)
df_community['n_BAFCONP']=df_community['n_BAFCONP'].astype(int)
df_community['n_POLICE']=df_community['n_POLICE'].astype(int)
df_community['n_STRTSAN']=df_community['n_STRTSAN'].astype(int)
df_community['n_TRANPORT']=df_community['n_TRANPORT'].astype(int)
df_community.head()

Unnamed: 0,community_name,2020_pop,vac_hu,hu_tot,vacant,vacperc,community_caps,community,n_BAFCONP,n_POLICE,n_STRTSAN,n_TRANPORT,n_unknown,n_dockets,n_311_complaints
0,Albany Park,48396,1448.0,18230.0,5.322983,0.004331,ALBANY PARK,ALBANY PARK,0,0,0,15,0.0,15.0,400
1,Archer Heights,14196,406.0,4538.0,29.231599,0.02277,ARCHER HEIGHTS,ARCHER HEIGHTS,0,0,0,14,0.0,14.0,60
2,Armour Square,13890,393.513552,5860.348806,15.696404,0.024625,ARMOUR SQUARE,ARMOUR SQUARE,0,0,0,26,0.0,26.0,77
3,Ashburn,41098,540.0,13479.0,59.468213,0.019123,ASHBURN,ASHBURN,0,0,0,2,0.0,2.0,108
4,Auburn Gresham,44878,3364.0,20617.0,119.122147,0.049388,AUBURN GRESHAM,AUBURN GRESHAM,0,0,3,16,0.0,19.0,128


In [13]:
# confirm I have 77 communities
len(df_community)

77

In [14]:
df_community[df_community['community_name']=='Loop']

Unnamed: 0,community_name,2020_pop,vac_hu,hu_tot,vacant,vacperc,community_caps,community,n_BAFCONP,n_POLICE,n_STRTSAN,n_TRANPORT,n_unknown,n_dockets,n_311_complaints
65,Loop,42298,3598.0,24565.0,30.45673,0.028743,LOOP,LOOP,0,0,2,29,0.0,31.0,177


# Calculations

In [15]:
df_community['dp10k'] = \
(10000/4)*df_community['n_dockets']/df_community['2020_pop']

df_community['streets_p10k'] = \
(10000/4)*df_community['n_STRTSAN']/df_community['2020_pop']

df_community['cdot_p10k'] = \
(10000/4)*df_community['n_TRANPORT']/df_community['2020_pop']

df_community['police_p10k'] = \
(10000/4)*df_community['n_POLICE']/df_community['2020_pop']

df_community['n_p10k'] = \
(10000/4)*df_community['n_BAFCONP']/df_community['2020_pop']

df_community['311_p10k'] = \
(10000/4)*df_community['n_311_complaints']/df_community['2020_pop']

df_community['complaint_ratio'] = df_community['n_TRANPORT']/df_community['n_311_complaints']

In [16]:
df_community

Unnamed: 0,community_name,2020_pop,vac_hu,hu_tot,vacant,vacperc,community_caps,community,n_BAFCONP,n_POLICE,...,n_unknown,n_dockets,n_311_complaints,dp10k,streets_p10k,cdot_p10k,police_p10k,n_p10k,311_p10k,complaint_ratio
0,Albany Park,48396,1448.000000,18230.000000,5.322983,0.004331,ALBANY PARK,ALBANY PARK,0,0,...,0.0,15.0,400,0.774857,0.000000,0.774857,0.000000,0.000000,20.662865,0.037500
1,Archer Heights,14196,406.000000,4538.000000,29.231599,0.022770,ARCHER HEIGHTS,ARCHER HEIGHTS,0,0,...,0.0,14.0,60,2.465483,0.000000,2.465483,0.000000,0.000000,10.566357,0.233333
2,Armour Square,13890,393.513552,5860.348806,15.696404,0.024625,ARMOUR SQUARE,ARMOUR SQUARE,0,0,...,0.0,26.0,77,4.679626,0.000000,4.679626,0.000000,0.000000,13.858891,0.337662
3,Ashburn,41098,540.000000,13479.000000,59.468213,0.019123,ASHBURN,ASHBURN,0,0,...,0.0,2.0,108,0.121660,0.000000,0.121660,0.000000,0.000000,6.569663,0.018519
4,Auburn Gresham,44878,3364.000000,20617.000000,119.122147,0.049388,AUBURN GRESHAM,AUBURN GRESHAM,0,0,...,0.0,19.0,128,1.058425,0.167120,0.891305,0.000000,0.000000,7.130443,0.125000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,West Lawn,33662,525.000000,9701.000000,46.394184,0.024551,WEST LAWN,WEST LAWN,0,0,...,0.0,17.0,146,1.262551,0.148535,1.114016,0.000000,0.000000,10.843087,0.102740
73,West Pullman,26104,2147.000000,11210.000000,221.071452,0.096914,WEST PULLMAN,WEST PULLMAN,0,1,...,0.0,5.0,23,0.478854,0.095771,0.287312,0.095771,0.000000,2.202728,0.130435
74,West Ridge,77122,2185.000000,28054.000000,11.765845,0.005207,WEST RIDGE,WEST RIDGE,0,0,...,0.0,24.0,804,0.777988,0.226913,0.551075,0.000000,0.000000,26.062602,0.021144
75,West Town,87781,3177.000000,41891.000000,62.407906,0.021311,WEST TOWN,WEST TOWN,0,0,...,0.0,97.0,1609,2.762557,0.028480,2.734077,0.000000,0.000000,45.824267,0.059664


### verify totals
compare these totals with ward-level summaries

* n_BAFCONP                                                         2
* n_POLICE                                                         25
* n_STRTSAN                                                       495
* n_TRANPORT                                                     1359
* n_dockets                                                      1881
* n_311_complaints                                                21079

In [17]:
df_community['n_BAFCONP'].sum()

2

In [18]:
df_community['n_POLICE'].sum()

25

In [19]:
df_community['n_STRTSAN'].sum()

497

In [20]:
df_community['n_TRANPORT'].sum()

1388

In [21]:
df_community['n_dockets'].sum()

1913.0

In [22]:
df_community['n_311_complaints'].sum()

21079

# Export

In [23]:
df_community.to_csv("../../results/ssw02-fines/community-summary.csv", index= False)