In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df_nyc = pd.read_csv('data/data_snapshot_for_gdv.csv')
df_nyc.head()

# Which neighborhood gets help last?

In [None]:
last_to_help = df_nyc.groupby('neighborhood').agg({'median_resolution_time_hours': 'median'}).sort_values(by='median_resolution_time_hours', ascending=False)
last_to_help

In [None]:
import requests, certifi
import geopandas as gpd
import matplotlib.pyplot as plt
from pathlib import Path

# 1) Choose a source URL for UHF42 geodata (GitHub repo raw GeoJSON)
# If you prefer the DOH / EpiQuery files, replace this URL with that one.
uhf_url = "https://raw.githubusercontent.com/nychealth/coronavirus-data/master/Geography-resources/UHF_resources/UHF42.geo.json"

out_path = Path("UHF42.geojson")

# 2) Download reliably using certifi CA bundle
resp = requests.get(uhf_url, timeout=60, verify=certifi.where())
resp.raise_for_status()
out_path.write_bytes(resp.content)

# 3) Read with geopandas and reproject for plotting
g_uhf = gpd.read_file(out_path).to_crs(3857)

In [None]:
# 🎯 PRECISE MAPPING: UHF geodata + last_to_help data
import pandas as pd
import numpy as np

# Create manual mapping between names in your data and geodata
# Based on geographical understanding of NYC UHF areas
uhf_name_mapping = {
    # Your data -> Names in geodata
    'Williamsburg - Bushwick': 'Williamsburg - Bushwick',
    'Northeast Bronx': 'Northeast Bronx',
    'East Flatbush - Flatbush': 'East Flatbush - Flatbush', 
    'Bedford Stuyvesant - Crown Heights': 'Bedford Stuyvesant - Crown Heights',
    'Pelham - Throgs Neck': 'Pelham - Throgs Neck',
    'Bensonhurst - Bay Ridge': 'Bensonhurst - Bay Ridge',
    'Downtown - Heights - Park Slope': 'Downtown - Heights - Park Slope',
    'Coney Island - Sheepshead Bay': 'Coney Island - Sheepshead Bay',
    'Central Harlem - Morningside Heights': 'Central Harlem - Morningside Heights',
    'Hunts Point - Mott Haven': 'Hunts Point - Mott Haven',
    'Southeast Queens': 'Southeast Queens',
    'Southwest Queens': 'Southwest Queens',
    'West Queens': 'West Queens',
    'Rockaways': 'Rockaways',
    'Long Island City - Astoria': 'Long Island City - Astoria',
    'Flushing - Clearview': 'Flushing - Clearview',
    'Bayside - Little Neck': 'Bayside - Little Neck',
    'Fresh Meadows': 'Fresh Meadows',
    'Jamaica': 'Jamaica',
    'Ridgewood - Forest Hills': 'Ridgewood - Forest Hills',
    'Upper East Side': 'Upper East Side',
    'Upper West Side': 'Upper West Side',
    'Washington Heights': 'Washington Heights',
    'Gramercy Park - Murray Hill': 'Gramercy Park - Murray Hill',
    'Greenwich Village - SoHo': 'Greenwich Village - SoHo',
    'Lower East Side': 'Lower East Side',
    'Chelsea - Clinton': 'Chelsea - Clinton',
    'Union Square - Lower East Side': 'Union Square - Lower East Side',
    'Kingsbridge - Riverdale': 'Kingsbridge - Riverdale',
    'High Bridge - Morrisania': 'High Bridge - Morrisania',
    'Southwest Bronx': 'Southwest Bronx',
    'Crotona -Tremont': 'Crotona -Tremont',
    'Fordham - Bronx Pk': 'Fordham - Bronx Pk',
    'East New York': 'East New York',
    'Greenpoint': 'Greenpoint',
    'Sunset Park': 'Sunset Park',
    'Borough Park': 'Borough Park',
    'East Harlem': 'East Harlem',
    'Stapleton - St. George': 'Stapleton - St. George',
    'South Beach - Tottenville': 'South Beach - Tottenville',
    'Willowbrook': 'Willowbrook',
    'Port Richmond': 'Port Richmond'
}

# Create a copy of UHF geodata and add resolution time data
g_uhf_with_data = g_uhf.copy()

# Add column with your area names (for reverse mapping)
reverse_mapping = {v: k for k, v in uhf_name_mapping.items()}
g_uhf_with_data['data_name'] = g_uhf_with_data['GEONAME'].map(reverse_mapping)

# Add resolution time data
last_to_help_dict = last_to_help['median_resolution_time_hours'].to_dict()
g_uhf_with_data['resolution_time'] = g_uhf_with_data['data_name'].map(last_to_help_dict)

In [None]:
# 🗺️ FINAL PRECISE CHOROPLETH MAP: UHF areas with 311 problem resolution time data
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import pandas as pd

# Create combined visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 12), dpi=150)

# LEFT MAP: Choropleth map of UHF areas
# Draw areas with data
areas_with_data = g_uhf_with_data[g_uhf_with_data['resolution_time'].notna()]
areas_without_data = g_uhf_with_data[g_uhf_with_data['resolution_time'].isna()]

# Areas with data - colored map
if len(areas_with_data) > 0:
    areas_with_data.plot(ax=ax1, column='resolution_time', cmap='Reds',
                        edgecolor='white', linewidth=0.8, legend=True,
                        legend_kwds={'label': 'Average resolution time (hours)', 
                                   'shrink': 0.8, 'aspect': 30})

# Areas without data - gray color
if len(areas_without_data) > 0:
    areas_without_data.plot(ax=ax1, color='lightgray', edgecolor='white', 
                           linewidth=0.8, alpha=0.6)

ax1.set_title('NYC: 311 Problem Resolution Time by UHF Areas\n(Precise Mapping)', 
             fontsize=16, fontweight='bold', pad=20)
ax1.set_axis_off()
ax1.set_aspect("equal")

# RIGHT CHART: Top-10 worst areas
top_10_worst = areas_with_data.nlargest(10, 'resolution_time')

# Horizontal bar chart
y_pos = range(len(top_10_worst))
resolution_times = top_10_worst['resolution_time'].values
area_names = [name[:25] + '...' if len(name) > 25 else name 
              for name in top_10_worst['GEONAME'].values]

# Colors depending on the time of resolution
colors_bars = plt.cm.Reds([plt.Normalize(vmin=resolution_times.min(), 
                                        vmax=resolution_times.max())(x) for x in resolution_times])

bars = ax2.barh(y_pos, resolution_times, color=colors_bars, 
                edgecolor='black', linewidth=0.5, alpha=0.8)

# Add values to the column
for i, (bar, time) in enumerate(zip(bars, resolution_times)):
    width = bar.get_width()
    ax2.text(width + 1, bar.get_y() + bar.get_height()/2, 
             f'{time:.1f}h', ha='left', va='center', fontweight='bold')

ax2.set_yticks(y_pos)
ax2.set_yticklabels(area_names, fontsize=10)
ax2.set_xlabel('Average resolution time (hours)', fontsize=12, fontweight='bold')
ax2.set_title('Top-10 worst UHF areas\n(according to problem resolution time)', 
              fontsize=14, fontweight='bold')
ax2.grid(axis='x', alpha=0.3, linestyle='--')
ax2.invert_yaxis()  # Worst on top

# General heading
fig.suptitle(f'Analysis of 311 NYC Service Effectiveness by UHF Areas\n'
             f'(Mapped {len(areas_with_data)} out of {len(g_uhf_with_data)} areas)', 
             fontsize=18, fontweight='bold', y=0.95)

plt.tight_layout()
plt.show()

# Witch complaints take for solving to much?

In [None]:
last_to_solve = df_nyc.groupby('complaint_type').agg({'median_resolution_time_hours': 'median'}).sort_values(by='median_resolution_time_hours', ascending=False).head(10)
last_to_solve

In [None]:
plt.figure(figsize=(12, 8))
bars = plt.bar(range(len(last_to_solve)), last_to_solve['median_resolution_time_hours'], 
               color='steelblue', alpha=0.8)

# Add value labels on top of bars
for i, bar in enumerate(bars):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 50,
             f'{height:.0f}h', ha='center', va='bottom', fontsize=10)

# Customize the chart
plt.title('Top 10 Complaint Types with Longest Resolution Times', 
          fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Complaint Type', fontsize=12, fontweight='bold')
plt.ylabel('Median Resolution Time (Hours)', fontsize=12, fontweight='bold')

# Rotate x-axis labels for better readability
plt.xticks(range(len(last_to_solve)), last_to_solve.index, 
           rotation=45, ha='right', fontsize=10)
plt.yticks(fontsize=10)

# Add grid for better readability
plt.grid(axis='y', alpha=0.3, linestyle='--')

# Adjust layout to prevent label cutoff
plt.tight_layout()

# Display the chart
plt.show()

# What is the biggest pain in the city?

In [None]:
top_city_pains = df_nyc.groupby('complaint_type').agg({'complaint_count': 'sum', 'median_resolution_time_hours': 'median'}).sort_values(by='complaint_count', ascending=False).head(10)
top_city_pains

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))

# Chart 1: Complaint Count (horizontal bars colored by resolution time)
# Dark colors for long resolution times, light colors for short resolution times
colors = plt.cm.YlOrRd(top_city_pains['median_resolution_time_hours'] / top_city_pains['median_resolution_time_hours'].max())
bars1 = ax1.barh(range(len(top_city_pains)), top_city_pains['complaint_count'], 
                 color=colors, alpha=0.8, edgecolor='black', linewidth=0.5)

# Add value labels
for i, (count, time) in enumerate(zip(top_city_pains['complaint_count'], top_city_pains['median_resolution_time_hours'])):
    ax1.text(count + 200, i, f'{count:,}', va='center', ha='left', fontsize=9)

ax1.set_yticks(range(len(top_city_pains)))
ax1.set_yticklabels(top_city_pains.index, fontsize=10)
ax1.set_xlabel('Number of Complaints', fontsize=12, fontweight='bold')
ax1.set_title('Top 10 Complaint Types by Volume\n(Color = Resolution Time)', fontsize=14, fontweight='bold')
ax1.grid(axis='x', alpha=0.3, linestyle='--')

# Chart 2: Resolution Time (horizontal bars)
bars2 = ax2.barh(range(len(top_city_pains)), top_city_pains['median_resolution_time_hours'], 
                 color='coral', alpha=0.8, edgecolor='black', linewidth=0.5)

# Add value labels
for i, time in enumerate(top_city_pains['median_resolution_time_hours']):
    ax2.text(time + max(top_city_pains['median_resolution_time_hours'])*0.02, i, 
             f'{time:.1f}h', va='center', ha='left', fontsize=9)

ax2.set_yticks(range(len(top_city_pains)))
ax2.set_yticklabels(top_city_pains.index, fontsize=10)
ax2.set_xlabel('Median Resolution Time (Hours)', fontsize=12, fontweight='bold')
ax2.set_title('Resolution Time for Each Complaint Type', fontsize=14, fontweight='bold')
ax2.grid(axis='x', alpha=0.3, linestyle='--')

# Add overall title
fig.suptitle('NYC 311 Complaints Analysis: Volume vs Resolution Time', fontsize=16, fontweight='bold', y=0.98)

plt.tight_layout()
plt.show()