In [2]:
import json
import os

import pandas as pd

from src.choropleth_map_animation import ChoroplethMapAnimation
from src.heatmap_animation import HeatmapAnimation

# Load the Parquet and CSV files
df = pd.read_parquet("./data/cityofnewyork/service_requests_2024.parquet")

# NYC ZCTA shapefile path
shapefile_path = "./data/cityofnewyork/modzcta.csv"

# Load the NYC ZCTA shapefile
zcta_df = pd.read_csv(shapefile_path, dtype={"MODZCTA": str, "ZCTA": str})

# Convert the zcta_df to a mapping dictionary
zcta_mapping = (
    zcta_df.assign(ZCTA=zcta_df["ZCTA"].str.split(", "))
    .explode("ZCTA")
    .set_index("ZCTA")["MODZCTA"]
    .to_dict()
)

# Map the ZCTA to MODZCTA in dataframe
df["MODZCTA"] = df["incident_zip"].map(zcta_mapping)

# Convert event_date to datetime
df["created_date"] = pd.to_datetime(df["created_date"])

# Extract the hour of the day
df["hour"] = df["created_date"].dt.hour
df["month"] = df["created_date"].dt.month

In [None]:
# Create output directory if it doesn't exist
os.makedirs("./output/event_maps/2024", exist_ok=True)

ChoroplethMapAnimation(
    df,
    shapefile_path,
    date_field="month",
    label="2024 NYC Service Requests Choropleth Map by Month",
    output_path="./output/event_maps/2024",
).create_animation()

ChoroplethMapAnimation(
    df,
    shapefile_path,
    date_field="hour",
    label="2024 NYC Service Requests Choropleth Map by Hour",
    output_path="./output/event_maps/2024",
).create_animation()

HeatmapAnimation(
    df,
    date_field="month",
    label="2024 NYC Service Requests Heatmap by Month",
    output_path="./output/event_maps/2024",
).create_animation()

HeatmapAnimation(
    df,
    date_field="hour",
    label="2024 NYC Service Requests Heatmap by Hour",
    output_path="./output/event_maps/2024",
).create_animation()

In [4]:
# Load the LLM output CSV
llm_output = pd.read_csv("./output/llm_categorize_output_2024.csv")

# Perform the join operation
joined_data = df.merge(
    llm_output,
    how="left",
    on=["agency", "complaint_type", "descriptor"],
    suffixes=("", "_llm"),
)

# Rename columns for clarity
joined_data.rename(
    columns={"category": "llm_category", "subcategory": "llm_subcategory"}, inplace=True
)

# Drop rows with NaN values in the 'llm_category' or 'llm_subcategory' columns
joined_data.dropna(subset=["llm_category", "llm_subcategory"], inplace=True)

# Validate that the subcategoriy is valid
valid_category_file_path = "./data/categories.json"
with open(valid_category_file_path) as file:
    categories_data = json.load(file)

# Extract all valid subcategories["category"] values from the categories_data
valid_subcategories = [
    subcategory["category"]
    for category in categories_data["categories"]
    for subcategory in category["subcategories"]
]

print("Valid subcategories loaded from categories.json.")
print(f"Valid subcategories: {valid_subcategories}")

# Generate heatmap animations for each valid subcategory
for subcategory in valid_subcategories:
    subcategory_data = joined_data[joined_data["llm_subcategory"] == subcategory]

    subcategory_folder = subcategory.lower().replace(" ", "_").replace("/", "_")
    os.makedirs(f"./output/event_maps/2024/{subcategory_folder}", exist_ok=True)

    print(f"Starting to creat animations for subcategory: {subcategory}")

    # Heatmap animation by hour
    HeatmapAnimation(
        subcategory_data,
        date_field="hour",
        label=f"2024 NYC {subcategory} Service Requests Heatmap by Hour",
        output_path=f"./output/event_maps/2024/{subcategory_folder}",
    ).create_animation()

    # Heatmap animation by month
    HeatmapAnimation(
        subcategory_data,
        date_field="month",
        label=f"2024 NYC {subcategory} Service Requests Heatmap by Month",
        output_path=f"./output/event_maps/2024/{subcategory_folder}",
    ).create_animation()

    # Choropleth map animation by hour
    ChoroplethMapAnimation(
        subcategory_data,
        shapefile_path,
        date_field="hour",
        label=f"2024 NYC {subcategory} Service Requests Choropleth Map by Hour",
        output_path=f"./output/event_maps/2024/{subcategory_folder}",
    ).create_animation()

    # Choropleth map animation by month
    ChoroplethMapAnimation(
        subcategory_data,
        shapefile_path,
        date_field="month",
        label=f"2024 NYC {subcategory} Service Requests Choropleth Map by Month",
        output_path=f"./output/event_maps/2024/{subcategory_folder}",
    ).create_animation()

    print("Done!")

Valid subcategories loaded from categories.json.
Valid subcategories: ['Noise & Disturbances', 'Parking', 'Homeless Assistance', 'Non-Emergency Police Matters', 'Building & Utilities', 'Street & Sidewalk Conditions', 'Waste Management & Sanitation', 'Animals & Pests', 'Consumer Complaints', 'Transportation Services', 'Government Requests & Inquiries', 'Parks & Community', 'Hazardous Materials & Safety', 'Water Quality & Leaks']
Starting to creat animations for subcategory: Noise & Disturbances
Done!
Starting to creat animations for subcategory: Parking
Done!
Starting to creat animations for subcategory: Homeless Assistance
Done!
Starting to creat animations for subcategory: Non-Emergency Police Matters
Done!
Starting to creat animations for subcategory: Building & Utilities
Done!
Starting to creat animations for subcategory: Street & Sidewalk Conditions
Done!
Starting to creat animations for subcategory: Waste Management & Sanitation
Done!
Starting to creat animations for subcategory: 