The following code is taken from my original experimentation on Jupyter Notebook
(It probably wont work as-is outside of that environment)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
import pandas as pd
import json
import re

from datetime import datetime

crimes_excel = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/CivSentinel/Data/iowa_city_raw_6_29_2025.xlsx')
with open('/content/drive/MyDrive/Colab Notebooks/CivSentinel/Data/iowa_city_location_mapping.json') as f:
    crime_mapping_dict = json.load(f)

cleaned_data = pd.DataFrame()
DATE_REGEX = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'

for _, row in crimes_excel.iterrows():
  if pd.isna(row['Associated ID']) or pd.isna(row['Date Reported']) or pd.isna(row['Date/Time Occurred']) or pd.isna(row['General Location']):
    continue

  date = re.search(DATE_REGEX, str(row['Date Reported']))

  general_location = row['General Location'].lower().strip()

  if general_location in crime_mapping_dict and pd.notna(crime_mapping_dict[general_location]) and date.group(0):
    date_dt = datetime.strptime(date.group(0), '%Y-%m-%d %H:%M:%S')

    cleaned_data = pd.concat([cleaned_data, pd.DataFrame([{
        'associated_id': row['Associated ID'],
        'general_location': general_location,
        'natures_of_crime': row['Nature of Crime(s)'],
        'date': date_dt,
        'latitude': crime_mapping_dict[general_location]['coordinates'][0],
        'longitude': crime_mapping_dict[general_location]['coordinates'][1],
    }])])

ModuleNotFoundError: No module named 'pandas'

In [None]:
!pip install h3

import torch
import h3
from h3 import LatLngPoly

coords_latitude = cleaned_data['latitude'].tolist()
coords_longitude = cleaned_data['longitude'].tolist()

max_lat, min_lat = max(coords_latitude), min(coords_latitude)
max_lon, min_lon = max(coords_longitude), min(coords_longitude)

polygon = LatLngPoly(
  [
    (min_lat, min_lon),
    (min_lat, max_lon),
    (max_lat, max_lon),
    (max_lat, min_lon),
    (min_lat, min_lon)
  ]
)

all_hexes = h3.polygon_to_cells(polygon, 9)

cleaned_data['h3_index'] = cleaned_data.apply(
    lambda row: h3.latlng_to_cell(row['latitude'], row['longitude'], 9),
    axis=1
)

earliest_date = cleaned_data['date'].min()
latest_date = cleaned_data['date'].max()

full_range = pd.date_range(start=earliest_date, end=latest_date, freq="D")

crime_freq = cleaned_data.groupby(['h3_index', 'date']).size().unstack(fill_value=0)
crime_freq = crime_freq.reindex(columns=full_range, fill_value=0)
crime_freq = crime_freq.reindex(all_hexes, fill_value=0)

hex_to_id = {}
for i, hex in enumerate(all_hexes):
  if hex not in hex_to_id:
    hex_to_id[hex] = i

graph_edges = []

for hex in all_hexes:
  neighbors = h3.grid_ring(hex, 1)
  for neighbor in neighbors:
    if neighbor in all_hexes:
      graph_edges.append([hex_to_id[hex], hex_to_id[neighbor]])

graph_edges = torch.tensor(graph_edges, dtype=torch.long).t().contiguous()

In [None]:
def create_rolling_windows(crime_freq, window_size=30, horizon=2, start_day=None, end_day=None):
  if start_day is not None:
    crime_freq = crime_freq.loc[:, start_day:]
  if end_day is not None:
    crime_freq = crime_freq.loc[:, :end_day]

  windows = []
  crime_array = crime_freq.values
  num_days = crime_freq.shape[1]

  for i in range(num_days - window_size - horizon + 1):
    past_window = crime_array[:, i:i+window_size]
    future_window = crime_array[:, i+window_size:i+window_size+horizon]
    windows.append((past_window, future_window))

  return windows

train_set = create_rolling_windows(crime_freq, window_size=30, horizon=2, start_day='2023-01-01', end_day='2025-01-04')
val_set = create_rolling_windows(crime_freq, window_size=30, horizon=2, start_day='2025-01-05', end_day='2025-04-02')
test_set = create_rolling_windows(crime_freq, window_size=30, horizon=2, start_day='2025-04-03', end_day='2025-06-29')

In [None]:
!pip install torch-geometric

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch_geometric.nn import GCNConv


class STGNN(nn.Module):
  def __init__(self, num_nodes, window_size, horizon, hidden_dim=64):
    super(STGNN, self).__init__()
    self.num_nodes = num_nodes
    self.window_size = window_size
    self.horizon = horizon
    self.hidden_dim = hidden_dim

    self.gru = nn.GRU(input_size=1, hidden_size=hidden_dim, batch_first=True)

    self.gc1 = GCNConv(hidden_dim, hidden_dim)
    self.gc2 = GCNConv(hidden_dim, hidden_dim)


    self.fc = nn.Linear(hidden_dim, horizon)

  def forward(self, x, edge_index):
    x = x.unsqueeze(-1)

    out, _ = self.gru(x)
    out = out[:, -1, :]

    out = F.relu(self.gc1(out, edge_index))
    out = F.relu(self.gc2(out, edge_index))

    out = self.fc(out)
    return out

window_size = 30
hidden_dim = 32
horizon = 2
lr = 0.001
epochs = 50
batch_size = 32

X_train = torch.stack([torch.tensor(pw, dtype=torch.float) for pw, _ in train_set])
y_train = torch.stack([torch.tensor(fw, dtype=torch.float) for _, fw in train_set])

X_val = torch.stack([torch.tensor(pw, dtype=torch.float) for pw, _ in val_set])
y_val = torch.stack([torch.tensor(fw, dtype=torch.float) for _, fw in val_set])

X_test = torch.stack([torch.tensor(pw, dtype=torch.float) for pw, _ in test_set])
y_test = torch.stack([torch.tensor(fw, dtype=torch.float) for _, fw in test_set])

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = TensorDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

model = STGNN(window_size, hidden_dim, horizon)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

for epoch in range(epochs):
    total_loss = 0
    for x_batch, y_batch in train_loader:
        optimizer.zero_grad()
        preds = torch.stack([model(x, graph_edges) for x in x_batch])
        loss = loss_fn(preds, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_train_loss = total_loss / len(train_loader)

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x_batch, y_batch in val_loader:
            preds = torch.stack([model(x, graph_edges) for x in x_batch])
            loss = loss_fn(preds, y_batch)
            val_loss += loss.item()
    avg_val_loss = val_loss / len(val_loader)
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

In [None]:
X_test = torch.stack([torch.tensor(pw, dtype=torch.float) for pw, _ in test_set])
y_test = torch.stack([torch.tensor(fw, dtype=torch.float) for _, fw in test_set])

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

model.eval()
test_loss = 0
with torch.no_grad():
    for x_batch, y_batch in test_loader:
        preds = torch.stack([model(x, graph_edges) for x in x_batch])
        loss = loss_fn(preds, y_batch)
        test_loss += loss.item()
test_loss /= len(test_loader)

print(test_loss)

In [None]:
torch.save(model, '/content/drive/MyDrive/Colab Notebooks/CivSentinel/STGNN_Model')
torch.save(graph_edges, '/content/drive/MyDrive/Colab Notebooks/CivSentinel/STGNN_Model_Graph')