# Standardize Snow Clearance Data
3/2/24

<a name="read"></a>
# 1. Read Spatially Joined Data
<div style="color:red"> remove 2x drop duplicates once geocoding is working </div>

In [1]:
import pandas as pd

In [2]:
df_communities = pd.read_csv("../../data/03-spatially-joined/addresses-to-communities.csv")
df_wards = pd.read_csv("../../data/03-spatially-joined/addresses-to-wards2015.csv")
df_dockets = pd.read_csv("../../data/01-tidied/dockets-to-details.csv")

In [3]:
len(df_communities)

1914

In [4]:
len(df_wards)

1914

In [5]:
len(df_dockets)

3662

# Merge Geographies

In [6]:
df_communities = df_communities[['docket','cleaned_address','latlong','community']]
df_communities.head()

Unnamed: 0,docket,cleaned_address,latlong,community
0,21DT001189,3832 S WABASH,"41.8246153,-87.62519449555593",DOUGLAS
1,22DT001565,2831 S MICHIGAN,"41.842898074485504,-87.62367788440045",DOUGLAS
2,22DT001566,250 E 29TH ST,"41.84211911644582,-87.61996275276782",DOUGLAS
3,20DT000917,3100 S INDIANA,"41.838241,-87.622033",DOUGLAS
4,22DT001221,835 E 35TH ST,"41.8310311,-87.6245552",DOUGLAS


In [7]:
df_wards = df_wards[['docket','ward']]
df_wards.dropna(subset=['ward'], inplace=True)
df_wards['ward'] = df_wards['ward'].astype(int)
df_wards = df_wards.rename(columns={'ward':'ward_1523'})
df_wards.head()

Unnamed: 0,docket,ward_1523
0,21DT001240,12
1,22DT000758,12
2,20DT001445,12
3,20DT001104,12
4,20DT001102,12


### merge address data

In [8]:
#left-hand join b/c we want to include records with no ward assignment
df_gis = pd.merge(df_communities,df_wards,on="docket", how = 'left')
df_gis.head()

Unnamed: 0,docket,cleaned_address,latlong,community,ward_1523
0,21DT001189,3832 S WABASH,"41.8246153,-87.62519449555593",DOUGLAS,3.0
1,22DT001565,2831 S MICHIGAN,"41.842898074485504,-87.62367788440045",DOUGLAS,3.0
2,22DT001566,250 E 29TH ST,"41.84211911644582,-87.61996275276782",DOUGLAS,4.0
3,20DT000917,3100 S INDIANA,"41.838241,-87.622033",DOUGLAS,3.0
4,22DT001221,835 E 35TH ST,"41.8310311,-87.6245552",DOUGLAS,3.0


### parse lat and long

In [9]:
#parse lat and long
df_gis['lat']=df_gis['latlong'].str.split(',').str[0]
df_gis['long']=df_gis['latlong'].str.split(',').str[1]

### review and clean records with no community assigned

In [10]:
df_gis[df_gis["community"].isna()]

Unnamed: 0,docket,cleaned_address,latlong,community,ward_1523,lat,long
1913,21DT002747,5450 S 47TH,"41.8654712,-87.7423005",,,41.8654712,-87.7423005


In [11]:
df_gis[df_gis["ward_1523"].isna()]

Unnamed: 0,docket,cleaned_address,latlong,community,ward_1523,lat,long
1913,21DT002747,5450 S 47TH,"41.8654712,-87.7423005",,,41.8654712,-87.7423005


One address, 5450 S 47TH (docket 21DT002747), is not a valid address but was geocoded at 41.8654712,-87.7423005 at the edge of Lawndale by Roosevelt, which doesn't look right. If it's 5450 W. 47th it'd be in suburban Stickney. So I'm choosing to skip it.

In [12]:
#drop record with invalid address
# index_to_drop = df[df['address'] == '5450 S 47TH'].index
# df = df.drop(index_to_drop)

In [13]:
# manually assign community areas for the other two addresses. strangely this address isn't recognized, and placed in West Garfield if I use BLVD instead of ST
df_gis.loc[df_gis['cleaned_address'] == '300 W WASHINGTON ST', 'community'] = 'LOOP'

<a name="docket"></a>
# Merge Geographies into Detailed Dockets

In [14]:
df_dockets_gis = pd.merge(df_dockets,df_gis,on='docket', how = 'left')

In [15]:
len(df_dockets_gis)

3662

In [16]:
df_dockets_gis[df_dockets_gis["community"].isna()]

Unnamed: 0,respondent,docket,violation_num,violation_date,hearing_date,violation_address,violation_desc,case_disposition,fine_amt,dept,cleaned_address,latlong,community,ward_1523,lat,long
3,1157 W ERIE,22DT001387,T000207431,2022-02-08,4/11/2022,100 UNKNOWN,10-8-180 Snow and ice removal.,Not Liable,0.0,TRANPORT,,,,,,
1797,CORE POWER YOGA LLC,21DT000877,T000203714,2021-02-17,4/12/2021,100 UNKNOWN,10-8-180 Snow and ice removal.,Liable,150.0,TRANPORT,,,,,,
2313,"PTS CANAL INC, C/O MARK W LEUNG",21DT000685,T000200346,2021-02-09,3/29/2021,100 UNKNOWN,10-8-180 Snow and ice removal.,Liable,150.0,TRANPORT,,,,,,
2351,ROMA PACKING CO,22DT000756,T000212869,2022-01-11,3/21/2022,100 UNKNOWN,10-8-180 Snow and ice removal.,Liable,150.0,TRANPORT,,,,,,
3348,"RAINBOW MOTEL CORP, C/O ROSARIO ORTIZ",21DT002747,T000207541,2021-01-27,8/16/2021,5450 S 47TH,10-8-180 Snow and ice removal.,Default,500.0,TRANPORT,5450 S 47TH,"41.8654712,-87.7423005",,,41.8654712,-87.7423005


# 5. Export

In [17]:
df_gis.to_csv("../../data/04-standardized/dockets-to-gis.csv", index=False)
df_dockets_gis.to_csv("../../data/04-standardized/dockets-to-details-gis.csv", index= False)