# 2nd ST-GCN Example dividing train and test

SEOYEON CHOI  
2023-01-17

> Try to divide train and test(ST-GCN WikiMathsDatasetLoader)

# import

In [1]:
import rpy2
import rpy2.robjects as ro 
from rpy2.robjects.vectors import FloatVector 
from rpy2.robjects.packages import importr

import torch
import numpy as np
from tqdm import tqdm

import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import GConvGRU

import matplotlib.pyplot as plt
import pandas as pd

import time

from scipy.interpolate import interp1d

In [2]:
class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features, filters):
        super(RecurrentGCN, self).__init__()
        self.recurrent = GConvGRU(node_features, filters, 2)
        self.linear = torch.nn.Linear(filters, 1)

    def forward(self, x, edge_index, edge_weight):
        h = self.recurrent(x, edge_index, edge_weight)
        h = F.relu(h)
        h = self.linear(h)
        return h

# Data

In [110]:
from torch_geometric_temporal.dataset import WikiMathsDatasetLoader
from torch_geometric_temporal.signal import temporal_signal_split

In [111]:
loader = WikiMathsDatasetLoader()

In [112]:
dataset = loader.get_dataset(lags=1)

In [113]:
train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.8)

## Train

In [114]:
data_train=[]
for time, snapshot in enumerate(train_dataset):
    data_train.append([time,snapshot])

In [115]:
data_train[0][1].x.shape,data_train[0][1].y.shape,data_train[0][1].edge_index.shape,data_train[0][1].edge_attr.shape

In [116]:
time

In [117]:
T_train = time
N = len(data[0][1].x)

In [118]:
edge_index = data_train[0][1].edge_index
edge_attr = data_train[0][1].edge_attr

In [119]:
x_train = []
for i in range(time):
    x_train.append(data_train[i][1].x)

In [120]:
data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in x_train:
    # Concatenate the data point to the tensor
    data_tensor = torch.cat((data_tensor, i), dim=0)
x_train = data_tensor.reshape(time,1068,-1)
x_train.shape

In [121]:
y_train = []
for i in range(time):
    y_train.append(data_train[i][1].y)

In [122]:
data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in y_train:
    # Concatenate the data point to the tensor
    data_tensor = torch.cat((data_tensor, i), dim=0)
y_train = data_tensor.reshape(time,1068)
y_train.shape

In [123]:
x_train.shape, y_train.shape

## Test

In [124]:
data_test=[]
for time, snapshot in enumerate(test_dataset):
    data_test.append([time,snapshot])

In [125]:
data_test[0][1].x.shape,data_test[0][1].y.shape,data_test[0][1].edge_index.shape,data_test[0][1].edge_attr.shape

In [126]:
time

In [127]:
T_test = time

In [128]:
x_test = []
for i in range(time):
    x_test.append(data_test[i][1].x)

In [129]:
data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in x_test:
    # Concatenate the data point to the tensor
    data_tensor = torch.cat((data_tensor, i), dim=0)
x_test = data_tensor.reshape(time,1068,-1)
x_test.shape

In [130]:
y_test = []
for i in range(time):
    y_test.append(data_test[i][1].y)

In [131]:
data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in y_test:
    # Concatenate the data point to the tensor
    data_tensor = torch.cat((data_tensor, i), dim=0)
y_test = data_tensor.reshape(time,1068)
y_test.shape

In [132]:
x_test.shape, y_test.shape

# Randomly Missing Values

In [189]:
x_train = x_train.reshape(-1,N)

In [190]:
np.random.seed(90)
seed_number = np.random.choice(len(x_train),290,replace=False)

In [191]:
x_train[seed_number] = float('nan')

# 1) Missing Value - Mean

In [136]:
x_train_mean = x_train.clone()

In [140]:
df = pd.DataFrame(x_train_mean.tolist())
mean_value = df.mean() # finds the mean value of the column A
df = df.fillna(mean_value) # replace missing values with the mean value

In [143]:
x_train_mean = torch.Tensor(df.values)

## ST-GCN

In [146]:
mean_f_train = x_train_mean.reshape(T_train,N,1).float()

In [169]:
mean_X = mean_f_train[:438,:,:]
mean_y = mean_f_train[145:,:,:]

In [170]:
mean_X.shape,mean_y.shape

In [153]:
model = RecurrentGCN(node_features=1, filters=4)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()

for epoch in tqdm(range(50)):
    for time, (xt,yt) in enumerate(zip(mean_X,mean_y)):
        y_hat = model(xt, edge_index, edge_attr)
        cost = torch.mean((y_hat-yt)**2)
        cost.backward()
        optimizer.step()
        optimizer.zero_grad()

100%|██████████| 50/50 [04:17<00:00,  5.15s/it]

In [210]:
mean_X_fore = mean_f_train[438:,:]

In [212]:
mean_fhat = torch.stack([model(xt, edge_index, edge_attr) for xt in mean_X_fore]).detach().numpy()

In [213]:
mean_X_fore.shape,x_test.shape

# 2) Missing Value - Linear Interpolation

In [192]:
df = pd.DataFrame(x_train.tolist())
df.interpolate(method='linear', inplace=True)
df = df.fillna(0)

In [196]:
x_train_linear = torch.Tensor(df.values).reshape(T_train,N,1)

## ST-GCN

In [197]:
linear_f_train = x_train_linear.clone()

In [199]:
linear_X = linear_f_train[:438,:,:]
linear_y = linear_f_train[145:,:,:]

In [200]:
model = RecurrentGCN(node_features=1, filters=4)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()

for epoch in tqdm(range(50)):
    for time, (xt,yt) in enumerate(zip(linear_X,linear_y)):
        y_hat = model(xt, edge_index, edge_attr)
        cost = torch.mean((y_hat-yt)**2)
        cost.backward()
        optimizer.step()
        optimizer.zero_grad()

100%|██████████| 50/50 [04:20<00:00,  5.22s/it]

In [237]:
linear_X_fore = linear_f_train[438:,:]

In [238]:
linear_X_fore.shape

In [239]:
linear_fhat = torch.stack([model(xt, edge_index, edge_attr) for xt in linear_X_fore]).detach().numpy()

In [240]:
linear_X_fore.shape,x_test.shape

# Comparison

MSE

In [235]:
((pd.DataFrame(mean_fhat.reshape(T_test,N)) -  pd.DataFrame(x_test.reshape(T_test,N)))**2).mean()

In [241]:
((pd.DataFrame(linear_fhat.reshape(T_test,N)) -  pd.DataFrame(x_test.reshape(T_test,N)))**2).mean()