In [None]:
# Creating lagged features for 'newAdmissions' to capture trends and seasonality
for lag in [1, 7, 14]:  # 1 day, 1 week, and 2 weeks lag
    merged_data_corrected[f'newAdmissions_lag{lag}'] = merged_data_corrected.groupby('areaName')['newAdmissions'].shift(lag)

# Creating temporal features
merged_data_corrected['day_of_week'] = merged_data_corrected['date'].dt.dayofweek  # Monday=0, Sunday=6
merged_data_corrected['month'] = merged_data_corrected['date'].dt.month  # January=1, December=12
merged_data_corrected['week_of_year'] = merged_data_corrected['date'].dt.isocalendar().week

# Assuming population density and healthcare infrastructure data are available and merged into the dataset
# For demonstration, we'll use existing 'population' as a proxy for spatial features
# If additional spatial features were available, they could be directly included in the dataframe

# Dropping any rows with NaN values that may have been introduced by creating lagged features
merged_data_corrected.dropna(inplace=True)

# Displaying the updated dataframe to verify the new features
merged_data_corrected[['date', 'areaName', 'newAdmissions', 'newAdmissions_lag1', 'newAdmissions_lag7', 'newAdmissions_lag14', 'day_of_week', 'month', 'week_of_year']].head()


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import numpy as np

class TimeseriesDataset(Dataset):
    def __init__(self, data, sequence_length=30, target_delay=1, features=[], target='newAdmissions'):
        self.data = data
        self.sequence_length = sequence_length
        self.target_delay = target_delay
        self.features = features
        self.target = target
        self.scaler = StandardScaler()
        self.scaler.fit(self.data[self.features])
        
    def __len__(self):
        return len(self.data) - self.sequence_length - self.target_delay + 1
    
    def __getitem__(self, idx):
        start = idx
        end = idx + self.sequence_length
        data = self.data.iloc[start:end]
        x = self.scaler.transform(data[self.features].values)
        y = self.data.iloc[end + self.target_delay - 1][self.target]
        return torch.tensor(x, dtype=torch.float), torch.tensor(y, dtype=torch.float)

# Specify features and target
features = ['newAdmissions_lag1', 'newAdmissions_lag7', 'newAdmissions_lag14', 'day_of_week', 'month', 'week_of_year']
target = 'newAdmissions'

# Create the dataset
sequence_length = 30  # Example sequence length
target_delay = 1  # Predicting the next day
dataset = TimeseriesDataset(merged_data_corrected, sequence_length, target_delay, features, target)

# Splitting the data (e.g., 70% train, 15% validation, 15% test)
train_size = int(len(dataset) * 0.7)
val_size = int(len(dataset) * 0.15)
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
import torch
from torch import nn
import torch_geometric.nn as geom_nn

class GraphCNNLSTM(nn.Module):
    def __init__(self, num_node_features, num_nodes, lstm_hidden_size, output_size):
        super(GraphCNNLSTM, self).__init__()
        # Graph convolutional layer
        self.graph_conv = geom_nn.GCNConv(num_node_features, 16)
        # LSTM layer
        self.lstm = nn.LSTM(input_size=16*num_nodes, hidden_size=lstm_hidden_size, batch_first=True)
        # Fully connected output layer
        self.fc = nn.Linear(lstm_hidden_size, output_size)
    
    def forward(self, x, edge_index, batch_data):
        # x: Node features, shape [num_nodes, num_node_features]
        # edge_index: Graph connectivity, shape [2, num_edges]
        
        # Spatial processing with graph convolution
        x = self.graph_conv(x, edge_index)
        x = x.view(batch_data.size(0), -1)  # Reshape for LSTM input
        
        # Temporal processing with LSTM
        lstm_out, (hn, cn) = self.lstm(batch_data)
        
        # Output layer
        out = self.fc(lstm_out[:, -1, :])  # Use the last LSTM output
        return out


In [None]:
# Function to perform inverse transformation for predictions and actuals
def inverse_transform_data(scaler, data, feature_order):
    # Create a placeholder for the full feature set based on the scaler's expected input
    full_data = np.zeros((data.shape[0], scaler.n_features_in_))

    # Fill in the placeholder with the actual data
    for i, column in enumerate(feature_order):
        full_data[:, i] = data[:, column]

    # Perform the inverse transformation
    return scaler.inverse_transform(full_data)

# Function to plot actual vs predicted data after inverse transformation
def plot_actual_vs_predicted(time_points, actual, predicted, labels, title, filename=None):
    plt.figure(figsize=(15, 8))
    
    for i, label in enumerate(labels):
        plt.plot(time_points, actual[:, i], label=f'{label} Actual', linewidth=2)
        plt.plot(time_points, predicted[:, i], '--', label=f'{label} Predicted', linewidth=2)

    plt.xlabel("Days since: 2020-04-01")
    plt.ylabel("Population")
    plt.title(title)
    plt.legend()
    plt.grid(True)
    plt.show()
    
    if filename:
        plt.savefig(filename)

# Extract the time points from t_tensor for plotting
time_points = t_data.cpu().detach().numpy()

# Actual values
actual_SIR = np.array([S_actual, I_actual, R_actual]).T

# Predicted values
predicted_SIR = np.array([S_pred, I_pred, R_pred]).T

# Inverse transform the actual and predicted values
actual_SIR_transformed = inverse_transform_data(transformer, actual_SIR, [2, 1, 0])  # Susceptible, Infected, Recovered
predicted_SIR_transformed = inverse_transform_data(transformer, predicted_SIR, [2, 1, 0])

# Plot actual vs predicted
labels = ['Susceptible', 'Infected', 'Recovered']
plot_actual_vs_predicted(time_points, actual_SIR_transformed, predicted_SIR_transformed, labels, "SIR Model Predictions vs. Actual Data", filename="../../images/sir_model_predictions_inverse_transformed.pdf")