In [6]:
# Predict Crimes - Forecast Only
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta
from sklearn.metrics import classification_report, roc_auc_score

# 1. Load model and reference dataset
model = joblib.load("crime_predictor_model.pkl")
df = pd.read_pickle("df_reference.pkl")
print("✅ Loaded model and historical reference dataset")

# 2. Feature setup
features = [
    'Hour', 'DayOfWeek', 'Month', 'IsWeekend',
    'temp', 'rhum', 'prcp', 'snow', 'wspd',
    'RecentArrests', 'RepeatOffenderSignal'
]

# 3. Predict crimes for next 12 hours
print("📡 Predicting next 12 hours of potential crimes...")
last_hour = df['DateHour'].max()
future_hours = pd.date_range(start=last_hour + pd.Timedelta(hours=1), periods=12, freq='h')
locations = df[['LatGrid', 'LonGrid']].drop_duplicates()
sampled_locations = locations.sample(n=100, random_state=42)

forecast_grid = pd.MultiIndex.from_product(
    [future_hours, sampled_locations['LatGrid'], sampled_locations['LonGrid']],
    names=['DateHour', 'LatGrid', 'LonGrid']
)
forecast_df = pd.DataFrame(index=forecast_grid).reset_index()

forecast_df['Hour'] = forecast_df['DateHour'].dt.hour
forecast_df['DayOfWeek'] = forecast_df['DateHour'].dt.dayofweek
forecast_df['Month'] = forecast_df['DateHour'].dt.month
forecast_df['IsWeekend'] = forecast_df['DayOfWeek'].isin([5, 6]).astype(int)

# Recreate hot location map using RecentArrests & RepeatOffenderSignal proxies only
if 'CrimeOccurred' in df.columns:
    hot_locations = df[df['CrimeOccurred'] == 1].groupby(['LatGrid', 'LonGrid']).size().reset_index(name='crime_count')
else:
    print("⚠️ 'CrimeOccurred' not in reference dataset — using overall crime density instead.")
    hot_locations = df.groupby(['LatGrid', 'LonGrid']).size().reset_index(name='crime_count')

forecast_df = forecast_df.merge(hot_locations, on=['LatGrid', 'LonGrid'], how='left')
forecast_df['crime_count'] = forecast_df['crime_count'].fillna(0)
forecast_df['RecentArrests'] = forecast_df['crime_count'] * np.random.uniform(0.1, 0.5)
forecast_df['RepeatOffenderSignal'] = forecast_df['crime_count'] * np.random.uniform(0.3, 1.2)

# Fill weather columns with dummy baseline values (required by model)
weather_cols = ['temp', 'rhum', 'prcp', 'snow', 'wspd']
for col in weather_cols:
    forecast_df[col] = np.random.normal(loc=0.0, scale=1.0, size=len(forecast_df)).astype(np.float32)

forecast_X = forecast_df[features]
forecast_df['PredictedCrimeProb'] = model.predict_proba(forecast_X)[:, -1]
top_preds = forecast_df.sort_values('PredictedCrimeProb', ascending=False).head(10)

# 4. Output
print("\n🔮 Top 10 Predicted Crime Hotspots:")
print(top_preds.reset_index(drop=True)[['DateHour', 'LatGrid', 'LonGrid', 'PredictedCrimeProb']])


✅ Loaded model and historical reference dataset
📡 Predicting next 12 hours of potential crimes...
⚠️ 'CrimeOccurred' not in reference dataset — using overall crime density instead.


ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- prcp
- rhum
- temp
- wspd
