In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import torch
from torch_geometric.data import Data
import h3
from scipy.sparse import lil_matrix
from torch_geometric.utils import from_scipy_sparse_matrix
import torch.nn as nn
from torch_geometric.nn import GCNConv
from torch.utils.data import DataLoader, Dataset

# Debug Helper Function
def debug(message, variable=None):
    print(f"[DEBUG] {message}")
    if variable is not None:
        print(variable)

# Load the data
data = pd.read_csv('dataset/sales_4.csv')

# Convert date column to datetime
data['date'] = pd.to_datetime(data['date'])


In [4]:

# Select top N stores
top_n = 100
store_lifetime_product_count = data.groupby('store_id')['product_count'].sum()
top_n_stores = store_lifetime_product_count.sort_values(ascending=False).head(top_n).index
filtered_data = data[data['store_id'].isin(top_n_stores)]

# Aggregate data to weekly demand per store
filtered_data['week'] = filtered_data['date'].dt.to_period('W').apply(lambda r: r.start_time)
weekly_data = filtered_data.groupby(['store_id', 'week']).agg({
    'product_count': 'sum',
    'latitude': 'first',
    'longitude': 'first',
    'locality_type': 'first'
}).reset_index()

# Normalize demand and encode locality type
weekly_data['product_count'] = (weekly_data['product_count'] - weekly_data['product_count'].mean()) / weekly_data['product_count'].std()
locality_mapping = {'Diamond': 0, 'Gold': 1, 'Silver': 2}
weekly_data['locality_type'] = weekly_data['locality_type'].map(locality_mapping)

# Time-series pivot
time_series = weekly_data.pivot(index='store_id', columns='week', values='product_count').fillna(0)

# Create adjacency matrix using H3 indexing
resolution = 3
weekly_data['h3_index'] = weekly_data.apply(
    lambda row: h3.latlng_to_cell(row['latitude'], row['longitude'], resolution), axis=1
)
h3_to_store_map = weekly_data.groupby('h3_index')['store_id'].apply(list).to_dict()
h3_neighbors = {h: h3.grid_disk(h, 1) for h in h3_to_store_map.keys()}

num_stores = len(weekly_data['store_id'].unique())
adj_matrix = lil_matrix((num_stores, num_stores))
store_to_idx = {store_id: idx for idx, store_id in enumerate(weekly_data['store_id'].unique())}

for h3_index, neighbors in h3_neighbors.items():
    stores_in_hex = h3_to_store_map.get(h3_index, [])
    for neighbor in neighbors:
        neighbor_stores = h3_to_store_map.get(neighbor, [])
        for s1 in stores_in_hex:
            for s2 in neighbor_stores:
                idx1 = store_to_idx[s1]
                idx2 = store_to_idx[s2]
                adj_matrix[idx1, idx2] = 1

adj_sparse = adj_matrix.tocsr()
edge_index, edge_weight = from_scipy_sparse_matrix(adj_sparse)

# Prepare node features
locality_encoded = weekly_data.groupby('store_id')['locality_type'].first()
locality_encoded = pd.get_dummies(locality_encoded, prefix='locality_type')
locality_tensor = torch.tensor(locality_encoded.values, dtype=torch.float)
node_features = torch.tensor(time_series.values, dtype=torch.float)
node_features = torch.cat([node_features, locality_tensor], dim=1)

debug("Node feature shape before GCN", x.shape)
debug("Edge index shape", edge_index.shape)
debug("Edge weight shape", edge_weight.shape)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['week'] = filtered_data['date'].dt.to_period('W').apply(lambda r: r.start_time)


[DEBUG] Node feature shape before GCN
torch.Size([16, 100, 8])
[DEBUG] Edge index shape
torch.Size([2, 294])
[DEBUG] Edge weight shape
torch.Size([294])


In [12]:
class STGNN(nn.Module):
    def __init__(self, in_channels, spatial_out, temporal_out, forecast_steps):
        super(STGNN, self).__init__()
        self.gcn = GCNConv(in_channels, spatial_out)
        self.temporal_conv = nn.Conv1d(spatial_out, temporal_out, kernel_size=3, padding=1)
        self.fc = nn.Linear(temporal_out, forecast_steps)

    def forward(self, x, edge_index, edge_weight):
        batch_size, num_nodes, in_channels = x.shape
        debug("Input shape to GCN", x.shape)

        # Reshape input for GCNConv to (batch_size * num_nodes, in_channels)
        x = x.view(-1, in_channels)  # Flatten batch and nodes into one dimension
        debug("Shape after reshaping for GCN", x.shape)

        # Spatial convolution using GCNConv
        x = self.gcn(x, edge_index, edge_weight)
        x = torch.relu(x)
        debug("Shape after GCN", x.shape)

        # Reshape back to (batch_size, num_nodes, spatial_out)
        x = x.view(batch_size, num_nodes, -1)  # Reshape after GCN to original batch and nodes
        debug("Shape after reshaping for temporal conv", x.shape)

        # Temporal convolution (expecting shape: (batch_size, spatial_out, time_steps))
        x = x.permute(0, 2, 1)  # Change shape to (batch_size, spatial_out, num_nodes) for Conv1d
        debug("Shape before temporal convolution", x.shape)

        x = self.temporal_conv(x)
        x = torch.relu(x)

        # Flatten and apply the final fully connected layer
        x = x.mean(dim=-1)  # Take mean across the temporal dimension (num_nodes)
        x = self.fc(x)
        debug("Shape after fully connected layer", x.shape)
        return x


# Dataset Class
class TimeSeriesDataset(Dataset):
    def __init__(self, data, time_steps, forecast_steps):
        self.data = data
        self.time_steps = time_steps
        self.forecast_steps = forecast_steps

    def __len__(self):
        return self.data.shape[1] - self.time_steps - self.forecast_steps

    def __getitem__(self, idx):
        # x will have shape (num_nodes, time_steps)
        x = self.data[:, idx:idx + self.time_steps]  # Select time steps
        # y will have shape (num_nodes, forecast_steps)
        y = self.data[:, idx + self.time_steps:idx + self.time_steps + self.forecast_steps]
        return x.clone().detach(), y.clone().detach()

# Training setup
time_steps = 8
forecast_steps = 1
spatial_out = 32
temporal_out = 64
epochs = 10
batch_size = 16
learning_rate = 0.001

dataset = TimeSeriesDataset(node_features, time_steps, forecast_steps)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

model = STGNN(
    in_channels=node_features.shape[1],  # Match input feature dimension (e.g., 8)
    spatial_out=spatial_out,
    temporal_out=temporal_out,
    forecast_steps=forecast_steps
)


criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


# Training Loop
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for x, y in dataloader:
        optimizer.zero_grad()

        # Forward pass: input shape is (batch_size, num_nodes, time_steps)
        out = model(x, edge_index, edge_weight)  # Output shape: (batch_size, forecast_steps)

        # Reshape `y` to match `out` for loss calculation
        y = y.view(out.shape)  # Ensure `y` has the same shape as `out`

        # Compute loss
        loss = criterion(out, y)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(dataloader)}")



[DEBUG] Input shape to GCN
torch.Size([16, 100, 8])
[DEBUG] Shape after reshaping for GCN
torch.Size([1600, 8])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1600x8 and 208x32)