In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins

# Load data
crimes_gdf = gpd.read_file("../data/processed/crimes_with_districts.geojson")

print("=== SPATIAL ANALYSIS ===")
print(f"Total crimes: {len(crimes_gdf)}")

# 1. Crimes by District
district_counts = crimes_gdf['dist_num'].value_counts().head(10)
print("\nTop 10 Districts by Crime Count:")
print(district_counts)

# 2. Create density map
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# District-wise crime counts
district_counts.plot(kind='bar', ax=axes[0])
axes[0].set_title('Crimes by Police District')
axes[0].set_xlabel('District Number')
axes[0].set_ylabel('Crime Count')

# Crime types by district (top 5 districts)
top_districts = district_counts.head(5).index
district_crime_types = crimes_gdf[crimes_gdf['dist_num'].isin(top_districts)].groupby(['dist_num', 'primary_type']).size().unstack(fill_value=0)
district_crime_types.plot(kind='bar', stacked=True, ax=axes[1])
axes[1].set_title('Crime Types by Top 5 Districts')
axes[1].legend(bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.savefig('../data/exports/district_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

# 3. Interactive map with Folium
def create_crime_heatmap(crimes_sample, output_file):
    """Create interactive heatmap"""
    # Center on Chicago
    m = folium.Map(location=[41.8781, -87.6298], zoom_start=11)
    
    # Prepare data for heatmap
    heat_data = [[row['latitude'], row['longitude']] for idx, row in crimes_sample.iterrows()]
    
    # Add heatmap
    plugins.HeatMap(heat_data, radius=15, blur=10).add_to(m)
    
    # Add district boundaries if available
    try:
        districts = gpd.read_file("../data/external/chicago_police_districts.geojson")
        folium.GeoJson(districts, style_function=lambda x: {
            'fillColor': 'transparent',
            'color': 'blue',
            'weight': 2
        }).add_to(m)
    except:
        print("District boundaries not available")
    
    # Save map
    m.save(output_file)
    print(f"Interactive map saved to {output_file}")

# Create heatmap with sample data (for performance)
crimes_sample = crimes_gdf.sample(min(5000, len(crimes_gdf)))
create_crime_heatmap(crimes_sample, "../data/exports/chicago_crime_heatmap.html")

# 4. Temporal-Spatial analysis
hourly_spatial = crimes_gdf.groupby(['hour', 'dist_num']).size().reset_index(name='count')
hourly_pivot = hourly_spatial.pivot(index='hour', columns='dist_num', values='count').fillna(0)

plt.figure(figsize=(12, 8))
sns.heatmap(hourly_pivot, cmap='YlOrRd', cbar_kws={'label': 'Crime Count'})
plt.title('Crime Patterns: Hour of Day vs Police District')
plt.xlabel('Police District')
plt.ylabel('Hour of Day')
plt.tight_layout()
plt.savefig('../data/exports/hourly_district_heatmap.png', dpi=300)
plt.show()

print("Spatial analysis completed!")