In [1]:
import pandas as pd
import json
import geopandas as gpd
from shapely.geometry import Point
import altair as alt

alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [2]:
geojson_path = "Communities-Chicago.geojson"
with open(geojson_path, 'r') as f:
    communities_geojson = json.load(f)

In [3]:
# Load and preprocess CSV data
complaints_path = 'CDPH_Environmental_Complaints.csv'
complaints_df = pd.read_csv(complaints_path)

# Lowercase the 'COMPLAINT TYPE' column
complaints_df['COMPLAINT TYPE'] = complaints_df['COMPLAINT TYPE'].str.lower()

# Convert 'COMPLAINT DATE' to a datetime object and categorize decades
complaints_df['COMPLAINT DATE'] = pd.to_datetime(complaints_df['COMPLAINT DATE'])
complaints_df.dropna(subset=['COMPLAINT DATE'], inplace=True)
# Extract the year from 'COMPLAINT DATE' and store it in a new column 'COMPLAINT YEAR'
complaints_df['COMPLAINT YEAR'] = complaints_df['COMPLAINT DATE'].dt.year

# Extract latitude and longitude from the 'LOCATION' column
complaints_df['LATITUDE'] = complaints_df['LOCATION'].str.extract(r'POINT \((.*?) (.*?)\)')[1].astype(float)
complaints_df['LONGITUDE'] = complaints_df['LOCATION'].str.extract(r'POINT \((.*?) (.*?)\)')[0].astype(float)

# Load community geometries
communities_gdf = gpd.read_file(geojson_path)

# Create a GeoDataFrame with point geometries for complaints
geometry = [Point(xy) for xy in zip(complaints_df['LONGITUDE'], complaints_df['LATITUDE'])]
complaints_gdf = gpd.GeoDataFrame(complaints_df, geometry=geometry)

# Perform a spatial join to associate complaints with neighborhoods
complaints_with_neighborhood = gpd.sjoin(complaints_gdf, communities_gdf, how='left', op='within')

complaints_df = complaints_with_neighborhood[['COMPLAINT TYPE', 'COMPLAINT YEAR', 'LATITUDE', 'LONGITUDE', 'area_num_1', 'INSPECTOR', 'DIRECTION']]

print(complaints_df.head(15))

  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  complaints_with_neighborhood = gpd.sjoin(complaints_gdf, communities_gdf, how='left', op='within')


                               COMPLAINT TYPE  COMPLAINT YEAR   LATITUDE  \
0                             noise complaint            1994  41.882436   
1                    air pollution work order            2008  41.882841   
2                             noise complaint            1996  41.883306   
3                    air pollution work order            1996  41.883341   
4                    air pollution work order            1995  41.909701   
5   service stations/storage tanks work order            2000  41.979658   
6                    air pollution work order            1995  41.868770   
7                    air pollution work order            1994  41.741261   
8                  illegal dumping work order            2000  41.709820   
9                         asbestos work order            1995  41.869520   
10                                      other            1993  41.707960   
11                            noise complaint            1993  41.705268   
12          

In [4]:
communities_gdf = gpd.read_file(geojson_path)
communities_gdf.crs = "EPSG:4326"

# Calculate the centroids
centroids = communities_gdf['geometry'].centroid

# Extract the latitude and longitude and add them as new columns
communities_gdf['latitude_centroid'] = centroids.y
communities_gdf['longitude_centroid'] = centroids.x

communities_gdf = communities_gdf[['area_num_1', 'community', 'latitude_centroid', 'longitude_centroid']]
print(communities_gdf)

   area_num_1        community  latitude_centroid  longitude_centroid
0          35          DOUGLAS          41.835118          -87.618678
1          36          OAKLAND          41.823750          -87.603216
2          37      FULLER PARK          41.809085          -87.632425
3          38  GRAND BOULEVARD          41.812949          -87.617860
4          39          KENWOOD          41.808916          -87.596184
..        ...              ...                ...                 ...
72         74  MOUNT GREENWOOD          41.694879          -87.713192
73         75      MORGAN PARK          41.689730          -87.669054
74         76            OHARE          41.975684          -87.893701
75         77        EDGEWATER          41.986712          -87.663417
76          9      EDISON PARK          42.007613          -87.813781

[77 rows x 4 columns]



  centroids = communities_gdf['geometry'].centroid


In [5]:
# Merge the two dataframes on the 'area_num_1' column
merged_df = complaints_df.merge(communities_gdf, on='area_num_1', how='left')

# Print the merged dataframe
merged_df = merged_df.dropna()
print(merged_df.head(25))

                               COMPLAINT TYPE  COMPLAINT YEAR   LATITUDE  \
0                             noise complaint            1994  41.882436   
1                    air pollution work order            2008  41.882841   
2                             noise complaint            1996  41.883306   
3                    air pollution work order            1996  41.883341   
4                    air pollution work order            1995  41.909701   
5   service stations/storage tanks work order            2000  41.979658   
6                    air pollution work order            1995  41.868770   
7                    air pollution work order            1994  41.741261   
8                  illegal dumping work order            2000  41.709820   
9                         asbestos work order            1995  41.869520   
10                                      other            1993  41.707960   
11                            noise complaint            1993  41.705268   
12          

In [6]:
# # Filter the data for N, S, E, W directions
# selected_directions = ['N', 'E', 'S', 'W']
# filtered_directions = merged_df[merged_df['DIRECTION'].isin(selected_directions)]

# # Aggregate count of complaints for each direction
# direction_counts = filtered_directions.groupby('DIRECTION').size().reset_index(name='Count')

# # Create a new column for the direction labels
# direction_counts['Direction'] = direction_counts['DIRECTION']

# # Define colors for each direction
# direction_colors = {'N': '#3498db', 'S': '#e74c3c', 'E': '#2ecc71', 'W': '#f39c12'}

# # Define the tooltips with directional arrows
# direction_tooltips = {
#     'N': 'North (↑)',
#     'E': 'East (→)',
#     'S': 'South (↓)',
#     'W': 'West (←)'
# }
# direction_counts['Tooltip'] = direction_counts['Direction'].map(direction_tooltips)

# # Define the order of the directions
# direction_order = {'N': 0, 'E': 1, 'S': 2, 'W': 3}

# # Add a new column for the order
# direction_counts['Order'] = direction_counts['Direction'].map(direction_order)

# # Define the pie chart with equal angles and varying widths
# pie_chart = alt.Chart(direction_counts).mark_arc().encode(
#     alt.Color('Direction:N', scale=alt.Scale(domain=list(direction_colors.keys()), range=list(direction_colors.values()))),
#     alt.Size('Count:Q'),
#     tooltip=['Tooltip:N', 'Count:Q'],
#     theta='Count:Q',  # Use count for the angle
#     order='Order'  # Use the order encoding
# ).properties(
#     width=300,
#     height=300,
#     title='Pie Chart of Complaints by Direction (N, S, E, W)'
# )

# # Display the pie chart
# pie_chart


In [7]:
# import altair as alt

# # Your data and preprocessing code here...

# # Filter the data for N, S, E, W directions
# selected_directions = ['N', 'E', 'S', 'W']
# filtered_directions = merged_df[merged_df['DIRECTION'].isin(selected_directions)]

# # Aggregate count of complaints for each direction
# direction_counts = filtered_directions.groupby('DIRECTION').size().reset_index(name='Count')

# # Create a new column for the direction labels
# direction_counts['Direction'] = direction_counts['DIRECTION']

# # Define colors for each direction
# direction_colors = {'N': '#3498db', 'S': '#e74c3c', 'E': '#2ecc71', 'W': '#f39c12'}

# # Define the tooltips with directional arrows
# direction_tooltips = {
#     'N': 'North (↑)',
#     'E': 'East (→)',
#     'S': 'South (↓)',
#     'W': 'West (←)'
# }
# direction_counts['Tooltip'] = direction_counts['Direction'].map(direction_tooltips)

# # Define the order of the directions
# direction_order = {'N': 0, 'E': 1, 'S': 2, 'W': 3}

# # Add a new column for the order
# direction_counts['Order'] = direction_counts['Direction'].map(direction_order)

# # Create a selection
# click = alt.selection_single(on='click', fields=['Direction'], empty='all')

# # Define the pie chart with equal angles and varying widths
# pie_chart = alt.Chart(direction_counts).mark_arc().encode(
#     alt.Color('Direction:N', scale=alt.Scale(domain=list(direction_colors.keys()), range=list(direction_colors.values()))),
#     alt.Size('Count:Q'),
#     tooltip=['Tooltip:N', 'Count:Q'],
#     theta='Count:Q',  # Use count for the angle
#     order='Order'  # Use the order encoding
# ).properties(
#     width=600,
#     height=400,
#     title='Pie Chart of Complaints by Direction (N, S, E, W)'
# ).add_selection(
#     click
# )

# # Define the base bar chart
# base_bar_chart = alt.Chart(direction_counts).mark_bar().encode(
#     x='count():Q',
#     y=alt.Y('community:N', title='Community'),
#     color=alt.Color('count():Q', title='Number of Complaints', scale=alt.Scale(scheme='viridis')),
#     tooltip=['community:N', 'count():Q']
# ).transform_filter(
#     click
# ).properties(
#     width=600,
#     height=400,
#     title='Complaints by Community'
# )

# # Combine the charts
# combined_chart = pie_chart | base_bar_chart

# # Display the chart
# combined_chart.display()


In [8]:
import altair as alt

# Create a selection
click = alt.selection_single(on='click', fields=['DIRECTION'], empty='all')

# Define the pie chart with equal angles and varying widths
pie_chart = alt.Chart(merged_df).mark_arc().encode(
    alt.Color('DIRECTION:N', scale=alt.Scale(scheme='plasma')),
    alt.Size('count():Q'),
    tooltip=['DIRECTION:N', 'count():Q'],
    theta='count():Q',  # Use count for the angle
).properties(
    width=600,
    height=400,
    title='Pie Chart of Complaints by Direction (N, S, E, W)'
).add_selection(
    click
)

# Define the base bar chart
base_bar_chart = alt.Chart(merged_df).mark_bar().encode(
    x='count():Q',
    y=alt.Y('community:N', title='Community'),
    color=alt.Color('count():Q', title='Number of Complaints', scale=alt.Scale(scheme='viridis')),
    tooltip=['community:N', 'count():Q']
).transform_filter(
    click
).properties(
    width=600,
    height=800,
    title='Complaints by Community'
)

# Combine the charts
combined_chart = pie_chart | base_bar_chart

# Display the chart
combined_chart.display()

with open("../vega_lite/linked_charts/linked_pie_horizontal_bar/linked_pie_horizontal_bar_raw_files.json", "w") as f:
    f.write(json.dumps(combined_chart.to_dict(), indent=4))

# with open("../JSON/linked_pie_horizontal_bar_merged_df.json", "w") as f:
#     f.write(merged_df.to_json(indent=4))

# with open("../JSON/linked_pie_horizontal_bar_inspector_complaints_top_4.json", "w") as f:
#     f.write(inspector_complaints_top_4.to_json(indent=4))

