<h1>Aggregate Dockets</h1>
15 February - 2 March 2024

<a name="read"></a>
# 1. Read and Prepare Geocoded Fines Data

In [1]:
import pandas as pd

# need this for get_department function, which identifies multiple departments targeting a single address
import numpy as np

In [2]:
df = pd.read_csv("../../data/04-standardized/dockets-to-details-gis.csv")

In [3]:
df.head()

Unnamed: 0,respondent,docket,violation_num,violation_date,hearing_date,violation_address,violation_desc,case_disposition,fine_amt,dept,cleaned_address,latlong,community,ward_1523,lat,long
0,1010 W MADISON LLC,23DT000582,T000215159,2023-01-31,7/10/2023,1010 W MADISON,10-8-180 Snow and ice removal.,Non-Suit,0.0,TRANPORT,1010 W MADISON,"41.88196975,-87.6524227995416",NEAR WEST SIDE,25.0,41.88197,-87.652423
1,"1100 E 47TH STREET LLC, C/O PETER CASSEL",22DT001042,T000207342,2022-01-04,5/23/2022,1100 E 47TH ST,10-8-180 Snow and ice removal.,Non-Suit,0.0,TRANPORT,1100 E 47TH ST,"41.809698,-87.599101",KENWOOD,4.0,41.809698,-87.599101
2,"1101 BERWYN LLC, C/O JOHN BARRETT",23DT000559,T000220112,2023-02-02,3/20/2023,1101 W BERWYN,10-8-180 Snow and ice removal.,Non-Suit,0.0,TRANPORT,1101 W BERWYN,"41.978248,-87.65863472496416",EDGEWATER,48.0,41.978248,-87.658635
3,1157 W ERIE,22DT001387,T000207431,2022-02-08,4/11/2022,100 UNKNOWN,10-8-180 Snow and ice removal.,Not Liable,0.0,TRANPORT,,,,,,
4,1200B INC C/O THOMAS GEORGE,21DS20339M,QTUF62L,2021-02-05,7/14/2021,1200 W BELMONT AVE,10-8-180 Snow and ice removal.,Not Liable,0.0,STRTSAN,1200 W BELMONT AVE,"41.9401529,-87.659326",LAKE VIEW,44.0,41.940153,-87.659326


# Aggregate by Docket

In [4]:
# by docket
df_dockets=df.groupby(['docket','dept','violation_address','lat','long','community','ward_1523','violation_date']).agg(
    n_records=('docket','count'),
).reset_index()

In [5]:
df_dockets.head()

Unnamed: 0,docket,dept,violation_address,lat,long,community,ward_1523,violation_date,n_records
0,19DS68300L,STRTSAN,4710 S WESTERN AVE,41.807859,-87.684797,BRIGHTON PARK,15.0,2019-11-13,2
1,19DS69216L,STRTSAN,1425 W MORSE AVE,42.007451,-87.666828,ROGERS PARK,49.0,2019-11-13,1
2,19DS70010L,STRTSAN,715 E 47TH ST,41.809338,-87.608013,GRAND BOULEVARD,4.0,2019-11-13,1
3,19DS72153L,STRTSAN,300 W WASHINGTON ST,41.881869,-87.740143,WEST GARFIELD PARK,28.0,2019-11-12,5
4,19DS72160L,STRTSAN,6929 N SHERIDAN RD,41.959813,-87.654693,UPTOWN,46.0,2019-11-14,1


In [6]:
len(df_dockets)

1912

# Aggregate by Address

In [7]:
# create pivot table
df_addresses = df_dockets.pivot_table(index=['violation_address', 'lat','long','community'],
                             columns='dept',
                             values='docket',
                             aggfunc=['count'],
                             fill_value=0)

# Rename columns for clarity
df_addresses.columns = ['_'.join(col).strip() for col in df_addresses.columns.values]
df_addresses.rename(columns=lambda x: x.replace('count_','n_'), inplace=True)
#df_addresses_dates = df_dockets.groupby('violation_address')['violation_date'].agg(['min', 'max']).reset_index()

# Add total columns for each row
df_addresses['n_dockets'] = df_addresses.filter(like='n_').sum(axis=1)
#df_addresses['total_fines'] = df_addresses.filter(like='sum_').sum(axis=1)

df_addresses.reset_index(inplace=True)

In [8]:
def get_department(BAFCONP, POLICE, STRTSAN, TRANPORT):
    conditions = [
        (TRANPORT >= 1) & (STRTSAN >= 1),
        (TRANPORT >= 1),
        (STRTSAN >= 1),
        (POLICE >= 1),
        (BAFCONP >= 1)
    ]
    choices = ['cdot_and_streets', 'cdot', 'streets', 'police', 'bafconp']
    return np.select(conditions, choices, default='unknown')

In [9]:
df_addresses['depts']= get_department(df_addresses['n_BAFCONP'],
                                      df_addresses['n_POLICE'],
                                      df_addresses['n_STRTSAN'],
                                      df_addresses['n_TRANPORT'])

In [10]:
df_addresses.head()

Unnamed: 0,violation_address,lat,long,community,n_BAFCONP,n_POLICE,n_STRTSAN,n_TRANPORT,n_dockets,depts
0,10 N KILBOURN AVE,41.880962,-87.738356,WEST GARFIELD PARK,0,0,1,0,1,streets
1,100 E CHESTNUT ST,41.898588,-87.625892,NEAR NORTH SIDE,0,0,0,1,1,cdot
2,100 N KEDZIE,41.946722,-87.707835,IRVING PARK,0,0,0,1,1,cdot
3,100 N KEDZIE AVE,41.883159,-87.706529,EAST GARFIELD PARK,0,0,3,0,3,streets
4,100 W GRAND,41.765581,-87.621695,GREATER GRAND CROSSING,0,0,0,1,1,cdot


In [11]:
len(df_addresses)

1737

<a name="community"></a>
# Aggregate by Community
Note that 7 communities have no dockets

In [12]:
df_communities = df_dockets.pivot_table(index='community',
                             columns='dept',
                             values='docket',
                             aggfunc=['count'],
                             fill_value=0).reset_index()
# Flatten the MultiIndex in columns
df_communities.columns = ['_'.join(col).strip() for col in df_communities.columns.values]

df_communities = df_communities.rename(columns={'community_':'community'})
df_communities.rename(columns=lambda x: x.replace('count_','n_'), inplace=True)

# Reset the index to flatten it
df_communities.reset_index(drop=True, inplace=True)

df_communities['n_dockets'] = df_communities.filter(like='n_').sum(axis=1)

df_communities.head()

Unnamed: 0,community,n_BAFCONP,n_POLICE,n_STRTSAN,n_TRANPORT,n_dockets
0,ALBANY PARK,0,0,0,15,15
1,ARCHER HEIGHTS,0,0,0,14,14
2,ARMOUR SQUARE,0,0,0,26,26
3,ASHBURN,0,0,0,2,2
4,AUBURN GRESHAM,0,0,3,16,19


In [13]:
df_communities[df_communities['community']=='LOOP']

Unnamed: 0,community,n_BAFCONP,n_POLICE,n_STRTSAN,n_TRANPORT,n_dockets
38,LOOP,0,0,2,29,31


# Aggregate by Ward

In [14]:
df_wards = df_dockets.pivot_table(index='ward_1523',
                             columns='dept',
                             values='docket',
                             aggfunc=['count'],
                             fill_value=0).reset_index()

# Flatten the MultiIndex in columns
df_wards.columns = ['_'.join(col).strip() for col in df_wards.columns.values]

df_wards = df_wards.rename(columns={'ward_1523_':'ward'})
df_wards.rename(columns=lambda x: x.replace('count_','n_'), inplace=True)

# Reset the index to flatten it
df_wards.reset_index(drop=True, inplace=True)

df_wards['n_dockets'] = df_wards.filter(like='n_').sum(axis=1)

df_wards = df_wards.astype(int)
df_wards.head()

Unnamed: 0,ward,n_BAFCONP,n_POLICE,n_STRTSAN,n_TRANPORT,n_dockets
0,1,0,0,1,61,62
1,2,0,0,0,64,64
2,3,0,0,0,91,91
3,4,0,0,21,75,96
4,5,0,0,1,7,8


# Validate all totals

In [15]:
len(df_dockets)

1912

In [16]:
df_addresses['n_dockets'].sum()

1912

In [17]:
df_communities['n_dockets'].sum()

1912

In [18]:
df_wards['n_dockets'].sum()

1912

# Export

In [19]:
df_dockets.to_csv("../../data/05-aggregated/dockets-summary.csv", index= False)
df_addresses.to_csv("../../data/05-aggregated/dockets-by-address.csv", index= False)
df_communities.to_csv("../../data/05-aggregated/dockets-by-community.csv", index= False)
df_wards.to_csv("../../data/05-aggregated/dockets-by-ward.csv", index= False)
#df_respondents.to_csv("../../data/05-aggregate/dockets-by-respondent.csv", index= False)