In [7]:
import pandas as pd
import json
import geopandas as gpd
from shapely.geometry import Point
import altair as alt

alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [8]:
geojson_path = "Communities-Chicago.geojson"
with open(geojson_path, 'r') as f:
    communities_geojson = json.load(f)

In [9]:
# Load and preprocess CSV data
complaints_path = 'CDPH_Environmental_Complaints.csv'
complaints_df = pd.read_csv(complaints_path)

# Lowercase the 'COMPLAINT TYPE' column
complaints_df['COMPLAINT TYPE'] = complaints_df['COMPLAINT TYPE'].str.lower()

# Convert 'COMPLAINT DATE' to a datetime object and categorize decades
complaints_df['COMPLAINT DATE'] = pd.to_datetime(complaints_df['COMPLAINT DATE'])
complaints_df.dropna(subset=['COMPLAINT DATE'], inplace=True)
# Extract the year from 'COMPLAINT DATE' and store it in a new column 'COMPLAINT YEAR'
complaints_df['COMPLAINT YEAR'] = complaints_df['COMPLAINT DATE'].dt.year

# Extract latitude and longitude from the 'LOCATION' column
complaints_df['LATITUDE'] = complaints_df['LOCATION'].str.extract(r'POINT \((.*?) (.*?)\)')[1].astype(float)
complaints_df['LONGITUDE'] = complaints_df['LOCATION'].str.extract(r'POINT \((.*?) (.*?)\)')[0].astype(float)

# Load community geometries
communities_gdf = gpd.read_file(geojson_path)

# Create a GeoDataFrame with point geometries for complaints
geometry = [Point(xy) for xy in zip(complaints_df['LONGITUDE'], complaints_df['LATITUDE'])]
complaints_gdf = gpd.GeoDataFrame(complaints_df, geometry=geometry)

# Perform a spatial join to associate complaints with neighborhoods
complaints_with_neighborhood = gpd.sjoin(complaints_gdf, communities_gdf, how='left', op='within')

complaints_df = complaints_with_neighborhood[['COMPLAINT TYPE', 'COMPLAINT YEAR', 'LATITUDE', 'LONGITUDE', 'area_num_1', 'INSPECTOR']]

print(complaints_df.head(15))

  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  complaints_with_neighborhood = gpd.sjoin(complaints_gdf, communities_gdf, how='left', op='within')


                               COMPLAINT TYPE  COMPLAINT YEAR   LATITUDE  \
0                             noise complaint            1994  41.882436   
1                    air pollution work order            2008  41.882841   
2                             noise complaint            1996  41.883306   
3                    air pollution work order            1996  41.883341   
4                    air pollution work order            1995  41.909701   
5   service stations/storage tanks work order            2000  41.979658   
6                    air pollution work order            1995  41.868770   
7                    air pollution work order            1994  41.741261   
8                  illegal dumping work order            2000  41.709820   
9                         asbestos work order            1995  41.869520   
10                                      other            1993  41.707960   
11                            noise complaint            1993  41.705268   
12          

In [10]:
communities_gdf = gpd.read_file(geojson_path)
communities_gdf.crs = "EPSG:4326"

# Calculate the centroids
centroids = communities_gdf['geometry'].centroid

# Extract the latitude and longitude and add them as new columns
communities_gdf['latitude_centroid'] = centroids.y
communities_gdf['longitude_centroid'] = centroids.x

communities_gdf = communities_gdf[['area_num_1', 'community', 'latitude_centroid', 'longitude_centroid']]
print(communities_gdf)

   area_num_1        community  latitude_centroid  longitude_centroid
0          35          DOUGLAS          41.835118          -87.618678
1          36          OAKLAND          41.823750          -87.603216
2          37      FULLER PARK          41.809085          -87.632425
3          38  GRAND BOULEVARD          41.812949          -87.617860
4          39          KENWOOD          41.808916          -87.596184
..        ...              ...                ...                 ...
72         74  MOUNT GREENWOOD          41.694879          -87.713192
73         75      MORGAN PARK          41.689730          -87.669054
74         76            OHARE          41.975684          -87.893701
75         77        EDGEWATER          41.986712          -87.663417
76          9      EDISON PARK          42.007613          -87.813781

[77 rows x 4 columns]



  centroids = communities_gdf['geometry'].centroid


In [11]:
# Merge the two dataframes on the 'area_num_1' column
merged_df = complaints_df.merge(communities_gdf, on='area_num_1', how='left')

# Print the merged dataframe
merged_df = merged_df.dropna()
print(merged_df)

                    COMPLAINT TYPE  COMPLAINT YEAR   LATITUDE  LONGITUDE  \
0                  noise complaint            1994  41.882436 -87.626829   
1         air pollution work order            2008  41.882841 -87.662404   
2                  noise complaint            1996  41.883306 -87.627969   
3         air pollution work order            1996  41.883341 -87.652663   
4         air pollution work order            1995  41.909701 -87.653183   
...                            ...             ...        ...        ...   
57541              noise complaint            2022  41.881096 -87.641998   
57542  construction and demolition            2023  41.699623 -87.626664   
57543     air pollution work order            2023  41.713027 -87.557534   
57544              noise complaint            2022  42.017369 -87.668867   
57545  construction and demolition            2023  41.942554 -87.757848   

      area_num_1 INSPECTOR        community  latitude_centroid  \
0             32     

In [12]:
import altair as alt
import pandas as pd

inspector_complaints = merged_df.groupby(['INSPECTOR', 'COMPLAINT YEAR', 'COMPLAINT TYPE', 'community']).size().reset_index(name='COUNT')
df_inspector_counts = merged_df.groupby(['COMPLAINT TYPE', 'INSPECTOR', 'latitude_centroid', 'longitude_centroid']).size().reset_index(name='COUNT')
df_top_inspectors = df_inspector_counts.groupby(['COMPLAINT TYPE', 'latitude_centroid', 'longitude_centroid']).apply(lambda x: x.nlargest(5, 'COUNT')).reset_index(drop=True)

# Define selections
select_point = alt.selection_single(empty='all', on='click', fields=['latitude_centroid', 'longitude_centroid'])
select_complaint_type = alt.selection_single(fields=['COMPLAINT TYPE'], empty='all', on='click')
select_inspector = alt.selection_single(fields=['INSPECTOR'], empty='all', on='click')
select_bar = alt.selection_single(fields=['INSPECTOR'], empty='all', on='click')

# Define the bar chart for the top 10 inspectors
bar_chart = alt.Chart(df_top_inspectors.nlargest(15, 'COUNT')).mark_bar().encode(
    x=alt.X('INSPECTOR:N', title='Inspector'),
    y=alt.Y('COUNT:Q', title='Number of Complaints'),
    color=alt.Color('COMPLAINT TYPE:N', scale=alt.Scale(scheme='category20')),
    tooltip=['COMPLAINT TYPE:N']
).add_selection(
    select_point
).add_selection(
    select_complaint_type
).add_selection(
    select_inspector
).add_selection(
    select_bar  # Add select_bar to the bar chart
).properties(
    width=500,
    height=400,
    title='Top 10 Inspectors for Selected Complaint'
)

# Filter data for the top 4 communities
top_4_communities = inspector_complaints.groupby('community').sum('COUNT').nlargest(4, 'COUNT').reset_index()
inspector_complaints_top_4 = inspector_complaints[inspector_complaints['community'].isin(top_4_communities['community'])]

# # Define the line chart and circle chart
# line_chart = alt.Chart(inspector_complaints_top_4).mark_line().encode(
#     x='COMPLAINT YEAR:N',
#     y='COUNT:Q',
#     # color='INSPECTOR:N',
#     color=alt.Color('community:N', scale=alt.Scale(scheme='category20')),
#     detail='community:N',  # Group lines based on the "community" column
# ).add_selection(
#     select_point
# ).add_selection(
#     select_complaint_type
# ).add_selection(
#     select_inspector
# ).transform_filter(
#     select_bar  # Filter based on the selected bar in the bar chart
# )

circle_chart = alt.Chart(inspector_complaints_top_4).mark_circle(size=120).encode(
    x='COMPLAINT YEAR:N',
    y='COUNT:Q',
    # color='INSPECTOR:N',
    color = alt.Color('community:N', scale=alt.Scale(scheme='category20')),
    tooltip=['INSPECTOR:N', 'COUNT:Q', 'community:N']
).add_selection(
    select_point
).add_selection(
    select_complaint_type
).add_selection(
    select_inspector
).transform_filter(
    select_bar  # Filter based on the selected bar in the bar chart
)

# Combine the line chart and circle chart
# combined_chart = line_chart + circle_chart

combined_chart = circle_chart

# Set properties for the combined chart
combined_chart = combined_chart.properties(
    width=400,
    height=400,
    title='Inspector Complaints by Year (use tool-tip to identify inspector!)'
)

# Concatenate and display the charts side by side
displayed_charts = alt.hconcat(bar_chart, combined_chart)

# Save the charts to files
# displayed_charts.save('displayed_charts.html')

# Display the charts
displayed_charts

# with open("../vega_lite/linked_charts/linked_stacked_bar_scatter/linked_stacked_bar_scatter_raw_files.json", "w") as f:
#     f.write(json.dumps(displayed_charts.to_dict(), indent=4))

# with open("../JSON/linked_stacked_bar_scatter_df_top_inspectors_15.json", "w") as f:
#     f.write(df_top_inspectors.nlargest(15, 'COUNT').to_json(indent=4))

# with open("../JSON/linked_stacked_bar_scatter_inspector_complaints_top_4.json", "w") as f:
#     f.write(inspector_complaints_top_4.to_json(indent=4))


