In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
import sqlite3
import json

# Load optimal parameters
with open("../data/experiments/optimal_parameters.json", 'r') as f:
    optimal_params = json.load(f)

print("=== CHICAGO CRIME HOTSPOT ANALYSIS ===")
print(f"Using optimal parameters: {optimal_params}")

# Load crime data
conn = sqlite3.connect("../data/processed/chicago_crimes.db")
query = "SELECT * FROM crimes"
crimes_df = pd.read_sql_query(query, conn)

print(f"Loaded {len(crimes_df)} crime records")

# Load hotspots
hotspots_gdf = gpd.read_file("../data/processed/chicago_crime_hotspots.geojson")
hotspot_analysis = pd.read_csv("../data/processed/hotspot_analysis.csv")

print(f"Identified {len(hotspots_gdf)} crime hotspots")

# === ANALYSIS 1: Hotspot Characteristics ===
print("\n=== HOTSPOT CHARACTERISTICS ===")

# Top hotspots by crime density
top_hotspots = hotspot_analysis.sort_values('crimes_per_sq_km', ascending=False).head(10)
print("Top 10 Hotspots by Crime Density:")
print(top_hotspots[['hotspot_id', 'crime_count', 'area_sq_km', 'crimes_per_sq_km']])

# Hotspot size distribution
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
plt.hist(hotspots_gdf['area_sq_km'], bins=15, alpha=0.7, color='skyblue')
plt.title('Distribution of Hotspot Sizes')
plt.xlabel('Area (km²)')
plt.ylabel('Number of Hotspots')

plt.subplot(2, 2, 2)
plt.scatter(hotspot_analysis['area_sq_km'], hotspot_analysis['crime_count'], alpha=0.7)
plt.title('Hotspot Size vs Crime Count')
plt.xlabel('Area (km²)')
plt.ylabel('Crime Count')

# Crime type analysis in hotspots
plt.subplot(2, 2, 3)
crime_type_data = []
for _, row in hotspot_analysis.iterrows():
    top_crimes = eval(row['top_crime_types']) if isinstance(row['top_crime_types'], str) else row['top_crime_types']
    for crime_type, count in top_crimes.items():
        crime_type_data.append({'crime_type': crime_type, 'count': count})

crime_type_df = pd.DataFrame(crime_type_data)
crime_type_summary = crime_type_df.groupby('crime_type')['count'].sum().sort_values(ascending=False).head(8)
crime_type_summary.plot(kind='bar')
plt.title('Crime Types in Hotspots')
plt.xticks(rotation=45)
plt.ylabel('Total Count')

# Temporal patterns
plt.subplot(2, 2, 4)
temporal_data = []
for _, row in hotspot_analysis.iterrows():
    temporal = eval(row['temporal_patterns']) if isinstance(row['temporal_patterns'], str) else row['temporal_patterns']
    if 'peak_hour' in temporal and temporal['peak_hour'] is not None:
        temporal_data.append(temporal['peak_hour'])

if temporal_data:
    plt.hist(temporal_data, bins=24, alpha=0.7, color='lightcoral')
    plt.title('Peak Hours in Hotspots')
    plt.xlabel('Hour of Day')
    plt.ylabel('Number of Hotspots')

plt.tight_layout()
plt.savefig('../data/exports/hotspot_characteristics.png', dpi=300, bbox_inches='tight')
plt.show()

# === ANALYSIS 2: Geographic Distribution ===
print("\n=== GEOGRAPHIC ANALYSIS ===")

# Create comprehensive map
def create_comprehensive_hotspot_map():
    """Create detailed interactive map with all analysis layers"""
    # Center on Chicago
    m = folium.Map(location=[41.8781, -87.6298], zoom_start=11, tiles='OpenStreetMap')
    
    # Add crime heatmap (sample for performance)
    crimes_sample = crimes_df.sample(min(10000, len(crimes_df)))
    heat_data = [[row['latitude'], row['longitude']] 
                 for _, row in crimes_sample.iterrows()]
    
    heatmap_layer = plugins.HeatMap(heat_data, radius=10, blur=15, max_zoom=1, name='Crime Heatmap')
    heatmap_layer.add_to(m)
    
    # Add hotspot boundaries
    hotspot_layer = folium.FeatureGroup(name='Crime Hotspots')
    for idx, hotspot in hotspots_gdf.iterrows():
        # Get hotspot analysis data
        analysis_row = hotspot_analysis[hotspot_analysis['hotspot_id'] == hotspot['id']]
        
        if len(analysis_row) > 0:
            analysis = analysis_row.iloc[0]
            
            # Create popup with details
            popup_html = f"""
            <b>Hotspot #{hotspot['id']}</b><br>
            Crime Count: {analysis['crime_count']}<br>
            Area: {hotspot['area_sq_km']:.2f} km²<br>
            Density: {analysis['crimes_per_sq_km']:.1f} crimes/km²<br>
            Weekend %: {analysis['temporal_patterns']}
            """
            
            # Color based on density
            density = analysis['crimes_per_sq_km']
            if density > 100:
                color = 'red'
            elif density > 50:
                color = 'orange'
            else:
                color = 'yellow'
            
            folium.GeoJson(
                hotspot['geometry'].__geo_interface__,
                style_function=lambda x, color=color: {
                    'fillColor': color,
                    'color': 'black',
                    'weight': 2,
                    'fillOpacity': 0.6
                },
                popup=folium.Popup(popup_html, max_width=300)
            ).add_to(hotspot_layer)
    
    hotspot_layer.add_to(m)
    
    # Add district boundaries if available
    try:
        districts = gpd.read_file("../data/external/chicago_police_districts.geojson")
        district_layer = folium.FeatureGroup(name='Police Districts')
        
        folium.GeoJson(
            districts,
            style_function=lambda x: {
                'fillColor': 'transparent',
                'color': 'blue',
                'weight': 1,
                'dashArray': '5, 5'
            }
        ).add_to(district_layer)
        
        district_layer.add_to(m)
    except:
        print("District boundaries not available")
    
    # Add layer control
    folium.LayerControl().add_to(m)
    
    # Save map
    map_path = "../data/exports/comprehensive_hotspot_map.html"
    m.save(map_path)
    print(f"Comprehensive hotspot map saved to {map_path}")
    
    return m

# Create the comprehensive map
comprehensive_map = create_comprehensive_hotspot_map()

# === ANALYSIS 3: Effectiveness Metrics ===
print("\n=== HOTSPOT EFFECTIVENESS ===")

# Calculate overall effectiveness
total_crimes = len(crimes_df)
crimes_in_hotspots = hotspot_analysis['crime_count'].sum()
total_hotspot_area = hotspots_gdf['area_sq_km'].sum()

# Chicago total area (approximate)
chicago_total_area = 606  # km²
hotspot_coverage = (total_hotspot_area / chicago_total_area) * 100

effectiveness_metrics = {
    'Total Crimes Analyzed': total_crimes,
    'Crimes in Hotspots': crimes_in_hotspots,
    'Coverage Percentage': f"{(crimes_in_hotspots / total_crimes) * 100:.1f}%",
    'Total Hotspot Area': f"{total_hotspot_area:.1f} km²",
    'City Coverage': f"{hotspot_coverage:.1f}%",
    'Efficiency': f"{crimes_in_hotspots / total_hotspot_area:.1f} crimes/km²"
}

print("Hotspot Detection Effectiveness:")
for metric, value in effectiveness_metrics.items():
    print(f"- {metric}: {value}")

# === ANALYSIS 4: Recommendations ===
print("\n=== RECOMMENDATIONS FOR POLICE DEPLOYMENT ===")

# Top priority hotspots
priority_hotspots = hotspot_analysis.sort_values(
    ['crimes_per_sq_km', 'crime_count'], 
    ascending=[False, False]
).head(5)

print("Top 5 Priority Hotspots for Increased Patrol:")
for _, hotspot in priority_hotspots.iterrows():
    print(f"- Hotspot #{hotspot['hotspot_id']}: {hotspot['crime_count']} crimes, "
          f"{hotspot['crimes_per_sq_km']:.1f} crimes/km²")

# Save final results summary
results_summary = {
    'analysis_date': pd.Timestamp.now().isoformat(),
    'total_crimes_analyzed': int(total_crimes),
    'hotspots_identified': int(len(hotspots_gdf)),
    'effectiveness_metrics': effectiveness_metrics,
    'top_priority_hotspots': priority_hotspots['hotspot_id'].tolist(),
    'parameters_used': optimal_params
}

with open('../data/exports/hotspot_analysis_summary.json', 'w') as f:
    json.dump(results_summary, f, indent=2, default=str)

print("\nHotspot analysis completed!")
print("Results saved to: hotspot_analysis_summary.json")

conn.close()