In [1]:
import altair as alt
from IPython.display import display
# from vega_datasets import data
from shapely.geometry import Point,LineString,Polygon
import geopandas as gpd
import json
import pandas as pd
import matplotlib.pyplot as plt
import random
from shapely.geometry import shape, mapping

In [2]:
communities_geojson = None
communities_gdf = None
chicago_gdf = None
complaints_df = None
inChicago_complaints_gdf = None
inChicago_complaints_json = None

geojson_path = 'Communities-Chicago.geojson'
complaints_path = 'CDPH_Environmental_Complaints.csv'

In [3]:
with open(geojson_path, 'r') as f:
    communities_geojson = json.load(f)


communities_gdf = gpd.read_file(geojson_path)
communities_gdf.crs = "EPSG:4326"
print(communities_gdf)

          community area     shape_area perimeter area_num_1 area_numbe  \
0           DOUGLAS    0  46004621.1581         0         35         35   
1           OAKLAND    0  16913961.0408         0         36         36   
2       FULLER PARK    0  19916704.8692         0         37         37   
3   GRAND BOULEVARD    0  48492503.1554         0         38         38   
4           KENWOOD    0  29071741.9283         0         39         39   
..              ...  ...            ...       ...        ...        ...   
72  MOUNT GREENWOOD    0  75584290.0209         0         74         74   
73      MORGAN PARK    0  91877340.6988         0         75         75   
74            OHARE    0  371835607.687         0         76         76   
75        EDGEWATER    0  48449990.8397         0         77         77   
76      EDISON PARK    0  31636313.7864         0          9          9   

   comarea_id comarea      shape_len  \
0           0       0  31027.0545098   
1           0      

In [4]:
# Merge all the polygons into a single polygon
chicago_gdf = communities_gdf.unary_union

# Create a new GeoDataFrame with the merged polygon
chicago_gdf = gpd.GeoDataFrame(geometry=[chicago_gdf])

In [5]:
# # Create a Matplotlib figure
# fig, ax = plt.subplots()

# # Plot the Chicago GeoJSON
# communities_gdf.plot(ax=ax, color='lightgray', edgecolor='black', linewidth=1)
# # Set axis labels
# ax.set_xlabel('Longitude')
# ax.set_ylabel('Latitude')
# ax.set_aspect('equal')

# # Show the Matplotlib plot
# plt.show()

In [6]:
# # Create a Matplotlib figure
# fig, ax = plt.subplots()

# # Plot the Chicago GeoJSON
# chicago_gdf.plot(ax=ax, color='lightgray', edgecolor='black', linewidth=1)
# # Set axis labels
# ax.set_xlabel('Longitude')
# ax.set_ylabel('Latitude')
# ax.set_aspect('equal')

# # Show the Matplotlib plot
# plt.show()

In [7]:
def categorize_decade(year):
    if 1992 <= year <= 2003:
        return 1
    elif 2003 < year <= 2013:
        return 2
    elif 2013 < year <= 2024:
        return 3

In [8]:
complaints_df = pd.read_csv(complaints_path)

complaints_df['LATITUDE'] = complaints_df['LOCATION'].str.extract(r'POINT \((.*?) (.*?)\)')[1].astype(float)
complaints_df['LONGITUDE'] = complaints_df['LOCATION'].str.extract(r'POINT \((.*?) (.*?)\)')[0].astype(float)
complaints_df['COMPLAINT TYPE'] = complaints_df['COMPLAINT TYPE'].str.lower()

complaints_df['COMPLAINT DATE'] = pd.to_datetime(complaints_df['COMPLAINT DATE'])
complaints_df.dropna(subset=['COMPLAINT DATE'], inplace=True)
complaints_df['DECADE'] = complaints_df['COMPLAINT DATE'].dt.year.apply(categorize_decade)
complaints_required_cols = [ 'COMPLAINT TYPE','LATITUDE','LONGITUDE','DECADE']
#print(complaints_df)
complaints_df = complaints_df[complaints_required_cols].copy()
complaints_df.columns = [ 'COMPLAINT TYPE','LATITUDE','LONGITUDE','DECADE']
print(complaints_df)

                    COMPLAINT TYPE   LATITUDE  LONGITUDE  DECADE
0                  noise complaint  41.882436 -87.626829       1
1         air pollution work order  41.882841 -87.662404       2
2                  noise complaint  41.883306 -87.627969       1
3         air pollution work order  41.883341 -87.652663       1
4         air pollution work order  41.909701 -87.653183       1
...                            ...        ...        ...     ...
57543              noise complaint  41.881096 -87.641998       3
57544  construction and demolition  41.699623 -87.626664       3
57545     air pollution work order  41.713027 -87.557534       3
57546              noise complaint  42.017369 -87.668867       3
57547  construction and demolition  41.942554 -87.757848       3

[57546 rows x 4 columns]


In [9]:
geometry = [Point(xy) for xy in zip(complaints_df['LONGITUDE'], complaints_df['LATITUDE'])]

# Create a GeoDataFrame by specifying the geometry column
complaints_gdf = gpd.GeoDataFrame(complaints_df, geometry=geometry,crs='EPSG:4326')
unique_complaint_types = complaints_gdf['DECADE'].unique()

complaints_points_gdf = complaints_gdf

In [10]:
#filter points only within chicago
chicago_complaints_gdf = gpd.sjoin(complaints_gdf, chicago_gdf, how='right',op='within')
chicago_complaints_gdf = chicago_complaints_gdf.drop(columns=['index_left', 'geometry'])
chicago_complaints_json = json.dumps(chicago_complaints_gdf.to_dict(orient='records')) 
print(chicago_complaints_gdf)

  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: None

  chicago_complaints_gdf = gpd.sjoin(complaints_gdf, chicago_gdf, how='right',op='within')


                               COMPLAINT TYPE   LATITUDE  LONGITUDE  DECADE
0       toxics hazardous materials work order  41.683467 -87.667399       2
0                             noise complaint  41.687645 -87.667248       2
0                                       other  41.692635 -87.667798       1
0                    air pollution work order  41.692635 -87.667798       1
0                    air pollution work order  41.692635 -87.667798       1
..                                        ...        ...        ...     ...
0                             noise complaint  42.018336 -87.808083       1
0                             noise complaint  42.018418 -87.808081       1
0       toxics hazardous materials work order  42.018506 -87.816695       2
0   service stations/storage tanks work order  42.018520 -87.807279       2
0   service stations/storage tanks work order  42.018520 -87.807279       2

[57464 rows x 4 columns]


In [11]:
community_features = communities_geojson["features"]

# Specify the path where you want to save the JSON file
output_json_path = 'JSON/community_features.json'

# Write the 'features' list to the new JSON file
with open(output_json_path, 'w') as output_file:
    json.dump(community_features, output_file, indent=4)

# -----

# Assuming chicago_complaints_gdf is a GeoDataFrame
chicago_complaints_json = json.dumps(chicago_complaints_gdf.to_dict(orient='records'), indent=4)

# Specify the path where you want to save the JSON file
# output_json_path = 'JSON/chicago_complaints.json'

# Write the JSON string to the new JSON file
# with open(output_json_path, 'w') as output_file:
#     output_file.write(chicago_complaints_json)

In [12]:
spec = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",

    "width": 500,
    "height": 500,
    "title": "Distribution of complaint types across Chicago",
    
    "layer": [
        
        
    {
        "data": {
            # "values":communities_geojson["features"],
            "url": "JSON/community_features.json",
            "format": {"type": "json"}
        },
        

        
          "projection": {"type": "mercator"},
          "mark": "geoshape",
          "encoding": {

           "color": {
                  "field": "properties.sector",
                  "type": "nominal"
                },
              


            "tooltip": {"field": "properties.community", "title": "hello"}
          }
    }, 
          
    {
        "data": {
            # "values": chicago_complaints_json
            "url": "JSON/chicago_complaints.json"
          },
        
        "params": [{
            "name": "complaint_types",
            "select": {"type": "point", "fields": ["COMPLAINT TYPE"]},
            "bind": "legend"
          }],
        
        "mark": "circle",
        "encoding": {
            "color": {"field": "COMPLAINT TYPE", "type": "nominal"},
          "opacity": {
              "condition": {"param": "complaint_types", "value": 1},
              "value": 0.0
            },
            "longitude": {
              "field": "LONGITUDE",
              "type": "quantitative"
            },
            "latitude": {
              "field": "LATITUDE",
              "type": "quantitative"
            },
            "projection": {"type": "mercator"},
            "size": {"value": 50}
        }
    }     
  ]
}
display(alt.display.html_renderer(spec), raw=True)

In [13]:
spec = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",

    "width": 500,
    "height": 500,
    "title": "Distribution of complaint types across decades",
    
    "layer": [
        
        
    {
        "data": {
            # "values":communities_geojson["features"],
            "url": "JSON/community_features.json",
            "format": {"type": "json"}
        },
        

        
          "projection": {"type": "mercator"},
          "mark": "geoshape",
          "encoding": {

           "color": {
                  "field": "properties.sector",
                  "type": "nominal"
                },
              


            "tooltip": {"field": "properties.community", "title": "hello"}
          }
    }, 
          
    {
        "data": {
            # "values": chicago_complaints_json
            "url": "JSON/chicago_complaints.json"
          },
        
          "selection": {
            "DECADE": {
              "type": "single",
              "fields": ["DECADE"],
              "init": {"DECADE": 3},
              "bind": {
                "input": "select",
                "options": [1, 2, 3]
              }
            }
          },
        
        "mark": "circle",
        "encoding": {
            "color": {"field": "COMPLAINT TYPE", "type": "nominal"},
          "opacity": {
              "condition": {"param": "DECADE", "value": 1},
              "value": 0.0
            },
            "longitude": {
              "field": "LONGITUDE",
              "type": "quantitative"
            },
            "latitude": {
              "field": "LATITUDE",
              "type": "quantitative"
            },
            "projection": {"type": "mercator"},
            "size": {"value": 5}
        }
    }     
  ]
}
display(alt.display.html_renderer(spec), raw=True)

In [14]:
#print(complaints_points_gdf)
# print(communities_gdf)


grid_tricount_gdf = gpd.sjoin(complaints_points_gdf, communities_gdf, how='right', op='within')


grid_tricount_required_cols = ['area_num_1', 'community', 'COMPLAINT TYPE','geometry']
grid_tricount_gdf = grid_tricount_gdf[grid_tricount_required_cols].copy()
grid_tricount_gdf.columns = ['area_num_1', 'community', 'COMPLAINT TYPE','geometry']
grouped_grid_tricount_gdf = grid_tricount_gdf.groupby(['area_num_1', 'community']).agg({'geometry': 'first', 'COMPLAINT TYPE': 'count'}).reset_index()
# print(grouped_grid_tricount_gdf)
# print(grid_tricount_gdf)

# Rename columns for clarity
grouped_grid_tricount_gdf.columns = ['area_num_1', 'community', 'geometry', 'Total_Complaint_Types']
grouped_grid_tricount_gdf = grouped_grid_tricount_gdf.set_geometry('geometry')
grouped_grid_tricount_json = grouped_grid_tricount_gdf.to_json()
grouped_grid_tricount_geojson = json.loads(grouped_grid_tricount_json) 

print(grouped_grid_tricount_gdf)



  if await self.run_code(code, result, async_=asy):


   area_num_1        community  \
0           1      ROGERS PARK   
1          10     NORWOOD PARK   
2          11   JEFFERSON PARK   
3          12      FOREST GLEN   
4          13       NORTH PARK   
..        ...              ...   
72         75      MORGAN PARK   
73         76            OHARE   
74         77        EDGEWATER   
75          8  NEAR NORTH SIDE   
76          9      EDISON PARK   

                                             geometry  Total_Complaint_Types  
0   MULTIPOLYGON (((-87.65456 41.99817, -87.65574 ...                    698  
1   MULTIPOLYGON (((-87.78002 41.99741, -87.78049 ...                    590  
2   MULTIPOLYGON (((-87.75264 41.96797, -87.75279 ...                    544  
3   MULTIPOLYGON (((-87.76919 42.00489, -87.76919 ...                    279  
4   MULTIPOLYGON (((-87.70690 41.98308, -87.70688 ...                    345  
..                                                ...                    ...  
72  MULTIPOLYGON (((-87.64215 41.68508

In [15]:
features = grouped_grid_tricount_geojson["features"]

# Specify the path where you want to save the JSON file
grouped_grid_tricount_json_path = 'JSON/grouped_grid_tricount.json'

# # Write the 'features' list to the new JSON file
# with open(grouped_grid_tricount_json_path, 'w') as output_file:
#     json.dump(features, output_file, indent=4)

In [16]:
spec = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",

    "width": 600,
    "height": 500,
    "title": "Complaints by community",
    
    "layer": [
        
        
    {
        "data": {
            # "values":grouped_grid_tricount_geojson["features"],
            "url": "JSON/grouped_grid_tricount.json",
            "format": {"type": "json"}
        },
          "projection": {"type": "mercator"},
          "mark": "geoshape",
          "encoding": {

           "color": {
                  "field": "properties.Total_Complaint_Types",
                  "type": "quantitative",
                   "title":"complaints count per commmunity"
                },
              
            "tooltip": {"field": "properties.community", "title": "hello"}
          }
    }    
  ]
}
display(alt.display.html_renderer(spec), raw=True)