In [21]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# -------------------------------
# Step 1: Load train & test data
# -------------------------------
train_df = pd.read_csv("/Users/Intel/Downloads/DatasetsHexAssigned/trainassigned.csv")
test_df = pd.read_csv("/Users/Intel/Downloads/DatasetsHexAssigned/testassigned.csv")

# Ensure consistency
df = pd.concat([train_df, test_df])
df["date"] = pd.to_datetime(df["date"])
df["cell_id"] = df["cell_id"].astype(str)
df["crime_count"] = df["VICTIM_COUNT"]

# Aggregate by cell_id and date
daily_crime = df.groupby(["cell_id", "date"])["crime_count"].sum().reset_index()

# -----------------------------------
# Step 2: Add lag + rolling features
# -----------------------------------
def add_features(group):
    group = group.sort_values("date")
    group["lag_1"] = group["crime_count"].shift(1)
    group["rolling_7"] = group["crime_count"].rolling(window=7).mean()
    return group

daily_crime = daily_crime.groupby("cell_id").apply(add_features).dropna().reset_index(drop=True)

# -----------------------------------
# Step 3: Scaling
# -----------------------------------
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X = scaler_X.fit_transform(daily_crime[["lag_1", "rolling_7"]])
y = scaler_y.fit_transform(daily_crime[["crime_count"]])
meta = daily_crime[["date", "cell_id", "crime_count"]].reset_index(drop=True)

# Split based on dates
split_date = test_df["date"].min()
X_train = X[daily_crime["date"] < split_date]
X_test = X[daily_crime["date"] >= split_date]
y_train = y[daily_crime["date"] < split_date]
y_test = y[daily_crime["date"] >= split_date]
meta_test = meta[daily_crime["date"] >= split_date].reset_index(drop=True)

# -----------------------------------
# Step 4: PyTorch Dataset
# -----------------------------------
class CrimeDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

train_loader = DataLoader(CrimeDataset(X_train, y_train), batch_size=32, shuffle=True)
test_loader = DataLoader(CrimeDataset(X_test, y_test), batch_size=32, shuffle=False)

# -----------------------------------
# Step 5: LSTM Model
# -----------------------------------
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add sequence dim
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

model = LSTMModel(input_size=2, hidden_size=32)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# -----------------------------------
# Step 6: Train
# -----------------------------------
for epoch in range(10):
    model.train()
    total_loss = 0
    for xb, yb in train_loader:
        preds = model(xb).squeeze()
        loss = loss_fn(preds, yb.squeeze())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

# -----------------------------------
# Step 7: Predict & Evaluate
# -----------------------------------
model.eval()
all_preds, all_actuals = [], []

with torch.no_grad():
    for xb, yb in test_loader:
        pred = model(xb).squeeze().numpy()
        all_preds.extend(pred)
        all_actuals.extend(yb.squeeze().numpy())

# Inverse transform
pred_inv = scaler_y.inverse_transform(np.array(all_preds).reshape(-1, 1)).flatten()
actual_inv = scaler_y.inverse_transform(np.array(all_actuals).reshape(-1, 1)).flatten()

# Save
meta_test = meta_test.copy()
meta_test["predicted_crime_count"] = pred_inv
meta_test.rename(columns={"crime_count": "actual_crime_count"}, inplace=True)
meta_test.to_csv("lstm_forecast_per_cell.csv", index=False)

# Metrics
rmse = np.sqrt(mean_squared_error(actual_inv, pred_inv))
mae = mean_absolute_error(actual_inv, pred_inv)
r2 = r2_score(actual_inv, pred_inv)
mape = np.mean(np.abs((actual_inv - pred_inv) / (actual_inv + 1e-8))) * 100

print(f"\n✅ RMSE: {rmse:.4f}")
print(f"✅ MAE: {mae:.4f}")
print(f"✅ R² Score: {r2:.4f}")
print(f"✅ MAPE: {mape:.2f}%")
print("📁 Saved as 'lstm_forecast_per_cell.csv'")


Epoch 1, Loss: 0.6205
Epoch 2, Loss: 0.5889
Epoch 3, Loss: 0.5866
Epoch 4, Loss: 0.5818
Epoch 5, Loss: 0.5812
Epoch 6, Loss: 0.5811
Epoch 7, Loss: 0.5801
Epoch 8, Loss: 0.5775
Epoch 9, Loss: 0.5766
Epoch 10, Loss: 0.5760

✅ RMSE: 1.1107
✅ MAE: 0.6147
✅ R² Score: 0.4179
✅ MAPE: 35.61%
📁 Saved as 'lstm_forecast_per_cell.csv'
