> ST-GCN Dataset PedalMeDatasetLoader

# import

In [99]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import animation

# torch
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import GConvGRU

# scipy 
from scipy.interpolate import interp1d

# utils
import time
import pickle
from tqdm import tqdm

# rpy2
import rpy2
import rpy2.robjects as ro 
from rpy2.robjects.vectors import FloatVector 
from rpy2.robjects.packages import importr

import copy

import rpy2.robjects.numpy2ri as rpyn
import rpy2.robjects as robjects

In [100]:
class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features, filters):
        super(RecurrentGCN, self).__init__()
        self.recurrent = GConvGRU(node_features, filters, 2)
        self.linear = torch.nn.Linear(filters, 1)

    def forward(self, x, edge_index, edge_weight):
        h = self.recurrent(x, edge_index, edge_weight)
        h = F.relu(h)
        h = self.linear(h)
        return h

# my functions

In [101]:
def load_data(fname):
    with open(fname, 'rb') as outfile:
        data_dict = pickle.load(outfile)
    return data_dict

In [102]:
def save_data(data_dict,fname):
    with open(fname,'wb') as outfile:
        pickle.dump(data_dict,outfile)

In [103]:
def plot(f,*args,t=None,h=2.5,**kwargs):
    T,N = f.shape
    if t == None: t = range(T)
    fig = plt.figure()
    ax = fig.subplots(N,1)
    for n in range(N):
        ax[n].plot(t,f[:,n],*args,**kwargs)
        ax[n].set_title('node='+str(n))
    fig.set_figheight(N*h)
    fig.tight_layout()
    plt.close()
    return fig

In [104]:
def plot_add(fig,f,*args,t=None,**kwargs):
    T = f.shape[0]
    N = f.shape[1] 
    if t == None: t = range(T)   
    ax = fig.get_axes()
    for n in range(N):
        ax[n].plot(t,f[:,n],*args,**kwargs)
    return fig

In [105]:
def make_Psi(T):
    W = np.zeros((T,T))
    for i in range(T):
        for j in range(T):
            if i==j :
                W[i,j] = 0
            elif np.abs(i-j) <= 1 : 
                W[i,j] = 1
    d = np.array(W.sum(axis=1))
    D = np.diag(d)
    L = np.array(np.diag(1/np.sqrt(d)) @ (D-W) @ np.diag(1/np.sqrt(d)))
    lamb, Psi = np.linalg.eigh(L)
    return Psi

In [106]:
ebayesthresh = importr('EbayesThresh').ebayesthresh

In [107]:
def trim(f):
    f = np.array(f)
    if len(f.shape)==1: f = f.reshape(-1,1)
    T,N = f.shape
    Psi = make_Psi(T)
    fbar = Psi.T @ f # apply dft 
    fbar_threshed = np.stack([ebayesthresh(FloatVector(fbar[:,i])) for i in range(N)],axis=1)
    fhat = Psi @ fbar_threshed # inverse dft 
    return fhat

In [108]:
def update_from_freq_domain(signal, missing_index):
    signal = np.array(signal)
    T,N = signal.shape 
    signal_trimed = trim(signal)
    for i in range(N):
        signal[missing_index[i],i] = signal_trimed[missing_index[i],i]
    return signal

# Data

In [109]:
from torch_geometric_temporal.dataset import  PedalMeDatasetLoader
from torch_geometric_temporal.signal import temporal_signal_split

In [110]:
loader = PedalMeDatasetLoader()

In [111]:
dataset = loader.get_dataset(lags=4)

In [112]:
train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.8)

## Train

In [115]:
train_dataset.x

AttributeError: 'StaticGraphTemporalSignal' object has no attribute 'x'

In [77]:
data_train=[]
for time, snapshot in enumerate(train_dataset):
    data_train.append([time,snapshot])

In [78]:
data_train[0][1].x.shape,data_train[0][1].y.shape,data_train[0][1].edge_index.shape,data_train[0][1].edge_attr.shape

(torch.Size([15, 4]),
 torch.Size([15]),
 torch.Size([2, 225]),
 torch.Size([225]))

In [79]:
time

23

In [80]:
T_train = time
N = len(data_train[0][1].x)

In [81]:
edge_index = data_train[0][1].edge_index
edge_attr = data_train[0][1].edge_attr

In [82]:
x_train = []
for i in range(time):
    x_train.append(data_train[i][1].x)

In [83]:
data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in x_train:
    # Concatenate the data point to the tensor
    data_tensor = torch.cat((data_tensor, i), dim=0)
x_train = data_tensor.reshape(time,15,-1)
x_train.shape

torch.Size([23, 15, 4])

In [84]:
y_train = []
for i in range(time):
    y_train.append(data_train[i][1].y)

In [85]:
data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in y_train:
    # Concatenate the data point to the tensor
    data_tensor = torch.cat((data_tensor, i), dim=0)
y_train = data_tensor.reshape(time,15)
y_train.shape

torch.Size([23, 15])

In [86]:
x_train.shape, y_train.shape

(torch.Size([23, 15, 4]), torch.Size([23, 15]))

## Test

In [87]:
data_test=[]
for time, snapshot in enumerate(test_dataset):
    data_test.append([time,snapshot])

In [88]:
data_test[0][1].x.shape,data_test[0][1].y.shape,data_test[0][1].edge_index.shape,data_test[0][1].edge_attr.shape

(torch.Size([15, 4]),
 torch.Size([15]),
 torch.Size([2, 225]),
 torch.Size([225]))

In [89]:
time

6

In [28]:
T_test = time

In [29]:
x_test = []
for i in range(time):
    x_test.append(data_test[i][1].x)

In [30]:
data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in x_test:
    # Concatenate the data point to the tensor
    data_tensor = torch.cat((data_tensor, i), dim=0)
x_test = data_tensor.reshape(time,15,-1)
x_test.shape

torch.Size([6, 15, 4])

In [31]:
y_test = []
for i in range(time):
    y_test.append(data_test[i][1].y)

In [32]:
data_tensor = torch.Tensor()
# Iterate over the data points of the dataset
for i in y_test:
    # Concatenate the data point to the tensor
    data_tensor = torch.cat((data_tensor, i), dim=0)
y_test = data_tensor.reshape(time,15)
y_test.shape

torch.Size([6, 15])

In [33]:
x_test.shape, y_test.shape

(torch.Size([6, 15, 4]), torch.Size([6, 15]))

# data 정리 

`-` 데이터정리 

In [34]:
T_test,T_train,N

(6, 23, 15)

In [35]:
E = edge_index;E

tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,
          1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,
          2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,
          3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
          4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
          6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,  7,
          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  8,
          8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,  9,
          9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
         10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
         12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13,
         13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14,
         14, 14, 14, 14, 14,

In [36]:
edge_index = E

In [37]:
edge_attr

tensor([1.0000, 0.4255, 0.1574, 0.2065, 0.5641, 0.5186, 0.4004, 0.0267, 0.2247,
        0.5012, 0.6450, 0.3018, 0.2209, 0.2913, 0.2953, 0.4255, 1.0000, 0.0675,
        0.2620, 0.3781, 0.3043, 0.4957, 0.0126, 0.4736, 0.2147, 0.6440, 0.1405,
        0.1200, 0.2369, 0.5157, 0.1574, 0.0675, 1.0000, 0.0580, 0.1245, 0.1816,
        0.0840, 0.1300, 0.0394, 0.3139, 0.1045, 0.2730, 0.2654, 0.0848, 0.0490,
        0.2065, 0.2620, 0.0580, 1.0000, 0.1279, 0.2995, 0.4941, 0.0193, 0.3641,
        0.1441, 0.2718, 0.0660, 0.1816, 0.0693, 0.1373, 0.5641, 0.3781, 0.1245,
        0.1279, 1.0000, 0.2931, 0.2577, 0.0182, 0.1796, 0.3608, 0.4670, 0.3473,
        0.1312, 0.5161, 0.3760, 0.5186, 0.3043, 0.1816, 0.2995, 0.2931, 1.0000,
        0.4593, 0.0411, 0.2104, 0.4788, 0.4550, 0.2095, 0.3927, 0.1517, 0.1748,
        0.4004, 0.4957, 0.0840, 0.4941, 0.2577, 0.4593, 1.0000, 0.0201, 0.4531,
        0.2433, 0.5497, 0.1222, 0.2012, 0.1401, 0.2557, 0.0267, 0.0126, 0.1300,
        0.0193, 0.0182, 0.0411, 0.0201, 

`-` train / test 

In [38]:
x_train_f = torch.concat([x_train[:-1,:,0], x_train[-1,:,:].T])

In [39]:
x_train.shape,x_train_f.shape, x_test.shape,y_train.shape,y_test.shape,T_train,T_test

(torch.Size([23, 15, 4]),
 torch.Size([26, 15]),
 torch.Size([6, 15, 4]),
 torch.Size([23, 15]),
 torch.Size([6, 15]),
 23,
 6)

# Random Missing Values

In [40]:
class Missing:
    def __init__(self,df):
        self.df = df
        self.N = N
        self.number = []
    def miss(self,percent=0.5):
        self.missing = copy.deepcopy(self.df)
        self.percent = percent
        for i in range(self.N):
            #self.seed = np.random.choice(1000,1,replace=False)
            #np.random.seed(self.seed)
            self.number.append(np.random.choice(int(len(self.df))-1,int(len(self.df)*self.percent),replace=False))
            self.missing[self.number[i],i] = float('nan')
    def first_mean(self):
        self.train_mean = np.array(copy.deepcopy(self.missing))
        for i in range(self.N):
            self.train_mean[self.number[i],i] = np.nanmean(self.missing[:,i])
    def second_linear(self):
        self.train_linear = pd.DataFrame(self.missing.tolist())
        self.train_linear.interpolate(method='linear', inplace=True)
        self.train_linear = self.train_linear.fillna(0)
        self.train_linear = np.array(self.train_linear).reshape(int(len(self.df)),N)

In [41]:
col = ['Dataset','iteration', 'method', 'missingrate', 'missingtype', 'lag', 'number_of_filters', 'interpolation','MSE_train', 'MSE_test']

rate = [i/10 for i in range(10)]

# Class code by Method

## STGCN

In [98]:
class STGCN_Missing:
    def __init__(self,Dataset, df, iterable, Method, Missingrate, Missingtype, lag, Number_of_filters, Interpolation):
        self.Dataset = Dataset
        self.df = df
        self.iterable = iterable
        self.Method = Method
        self.Missingrate = Missingrate
        self.Missingtype = Missingtype
        self.lag = lag
        self.Number_of_filters = Number_of_filters
        self.Interpolation = Interpolation
        self.iterable = iterable
    def iter(self):
        self.XX = x_test
        self.yy = y_test

        self.real_y = y_train
        for i in range(self.iterable):
    
            _zero = Missing(x_train_f)
            _zero.miss(percent = self.Missingrate)
            _zero.second_linear()

            missing_index = _zero.number
            interpolated_signal = _zero.train_linear

            X = torch.tensor(np.stack([interpolated_signal[j:(T_train+j),:] for j in range(self.lag)],axis = -1)).float()
            y = torch.tensor(interpolated_signal[self.lag:,:]).float()

            net = RecurrentGCN(node_features=self.lag, filters=self.Number_of_filters)
            optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
            net.train()
            for epoch in range(50):
                for time, (xt,yt) in enumerate(zip(X,y)):
                    yt_hat = net(xt, edge_index, edge_attr)
                    cost = torch.mean((yt_hat-yt)**2)
                    cost.backward()
                    optimizer.step()
                    optimizer.zero_grad()

            yhat = torch.stack([net(xt, edge_index, edge_attr) for xt in X]).detach().numpy()
            yyhat = torch.stack([net(xt, edge_index, edge_attr) for xt in self.XX]).detach().numpy()

            train_mse_total_stgcn = (((self.real_y-yhat.squeeze()).squeeze())**2).mean()
            test_mse_total_stgcn = (((self.yy-yyhat.squeeze()).squeeze())**2).mean()
    
            df_row = pd.DataFrame(columns=col)
            df_row['Dataset'] = self.Dataset,
            df_row['iteration'] = i+1, # 1,2,3,...,10 
            df_row['method'] = self.Method, # 'stgcn','estgcn','gnar' 
            df_row['missingrate'] = self.Missingrate, # 0.0, 0.2, 0.4, 0.6, 0.8 
            df_row['missingtype'] = self.Missingtype,  # None, 'randomly' and 'block' 
            df_row['lag'] = self.lag, # 1,2,3,4 ... 
            df_row['number_of_filters'] = self.Number_of_filters, # 16,24,32, ... 
            df_row['interpolation'] = self.Interpolation, # None, 'mean', 'linear'
            df_row['MSE_train'] = train_mse_total_stgcn.tolist()
            df_row['MSE_test'] = test_mse_total_stgcn.tolist()

            self.df = pd.concat([self.df,df_row])

## Enhencement of STGCN

In [43]:
class ESTGCN_Missing:
    def __init__(self,Dataset, df, iterable, Method, Missingrate, Missingtype, lag, Number_of_filters, Interpolation):
        self.Dataset = Dataset
        self.df = df
        self.iterable = iterable
        self.Method = Method
        self.Missingrate = Missingrate
        self.Missingtype = Missingtype
        self.lag = lag
        self.Number_of_filters = Number_of_filters
        self.Interpolation = Interpolation
        self.iterable = iterable
    def iter(self):
        self.XX = x_test
        self.yy = y_test

        self.real_y = y_train
        for i in range(self.iterable):
    
            _zero = Missing(x_train_f)
            _zero.miss(percent = self.Missingrate)
            _zero.second_linear()

            missing_index = _zero.number
            interpolated_signal = _zero.train_linear

            net = RecurrentGCN(node_features=self.lag, filters=self.Number_of_filters)
            optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
            net.train()
            signal = interpolated_signal.copy()
            for epoch in range(50):
                signal = update_from_freq_domain(signal,missing_index)
                X = torch.tensor(np.stack([signal[i:(T_train+epoch+i),:] for i in range(self.lag)],axis = -1)).reshape(-1,N,self.lag).float()
                y = torch.tensor(signal).reshape(-1,N,1).float()[self.lag:,:,:]
                for time, (xt,yt) in enumerate(zip(X,y)):        
                    yt_hat = net(xt, edge_index, edge_attr)
                    cost = torch.mean((yt_hat-yt)**2)
                    cost.backward()
                    optimizer.step()
                    optimizer.zero_grad()
                signal = torch.concat([X[:-1,:,0], X[-1,:,:].T, yt_hat.detach().reshape(1,-1)]).squeeze()

            yhat = torch.stack([net(xt, edge_index, edge_attr) for xt in X]).detach().numpy()
            yyhat = torch.stack([net(xt, edge_index, edge_attr) for xt in self.XX]).detach().numpy()

            train_mse_total_estgcn = (((self.real_y-yhat[:T_train,:].squeeze()).squeeze())**2).mean()
            test_mse_total_estgcn = (((self.yy-yyhat.squeeze()).squeeze())**2).mean()

            df_row = pd.DataFrame(columns=col)
            df_row['Dataset'] = self.Dataset,
            df_row['iteration'] = i+1, # 1,2,3,...,10 
            df_row['method'] = self.Method, # 'stgcn','estgcn','gnar' 
            df_row['missingrate'] = self.Missingrate, # 0.0, 0.2, 0.4, 0.6, 0.8 
            df_row['missingtype'] = self.Missingtype,  # None, 'randomly' and 'block' 
            df_row['lag'] = self.lag, # 1,2,3,4 ... 
            df_row['number_of_filters'] = self.Number_of_filters, # 16,24,32, ... 
            df_row['interpolation'] = self.Interpolation, # None, 'mean', 'linear'
            df_row['MSE_train'] = train_mse_total_estgcn.tolist()
            df_row['MSE_test'] = test_mse_total_estgcn.tolist()

            self.df = pd.concat([self.df,df_row])

## GNAR

In [44]:
%load_ext rpy2.ipython

In [45]:
%%R
library(GNAR)
library(igraph)

R[write to console]: Loading required package: igraph

R[write to console]: 
Attaching package: ‘igraph’


R[write to console]: The following objects are masked from ‘package:stats’:

    decompose, spectrum


R[write to console]: The following object is masked from ‘package:base’:

    union


R[write to console]: Loading required package: wordcloud

R[write to console]: Loading required package: RColorBrewer



In [46]:
GNAR = importr('GNAR') # import GNAR 
igraph = importr('igraph') # import igraph 

In [47]:
w=np.zeros((N,N))

In [48]:
for k in range(len(edge_index[0])):
    w[edge_index[0][k],edge_index[1][k]] = 1

In [49]:
m = robjects.r.matrix(FloatVector(w), nrow = N, ncol = N)

In [50]:
class GNAR_Missing:
    def __init__(self,Dataset, df, iterable, Method, Missingrate, Missingtype, lag, Number_of_filters, Interpolation):
        self.Dataset = Dataset
        self.df = df
        self.iterable = iterable
        self.Method = Method
        self.Missingrate = Missingrate
        self.Missingtype = Missingtype
        self.lag = lag
        self.Number_of_filters = Number_of_filters
        self.Interpolation = Interpolation
        self.iterable = iterable
    def iter(self):
        self.yy = torch.tensor(y_test).float()
        for i in range(self.iterable):

            _zero = Missing(x_train_f)
            _zero.miss(percent = self.Missingrate)
            _zero.second_linear()

            missing_index = _zero.number
            interpolated_signal = _zero.train_linear

            X = torch.tensor(np.stack([interpolated_signal[i:(T_train+i),:] for i in range(self.lag)],axis = -1)).float()

            answer = GNAR.GNARfit(vts=robjects.r.matrix(rpyn.numpy2rpy(np.array(X).squeeze()), nrow = T_train, ncol = N),net = GNAR.matrixtoGNAR(m), alphaOrder = 4, betaOrder = FloatVector([1, 1, 1, 1]))             
            predict = GNAR.predict_GNARfit(answer,n_ahead=T_test)


            train_mse_total_gnar = ((pd.DataFrame(GNAR.residuals_GNARfit(answer)).values.reshape(-1,N))**2).mean()
            test_mse_total_gnar = ((self.yy - pd.DataFrame(predict).values.reshape(-1,N))**2).mean()

            df_row = pd.DataFrame(columns=col)
            df_row['Dataset'] = self.Dataset,
            df_row['iteration'] = i+1, # 1,2,3,...,10 
            df_row['method'] = self.Method, # 'stgcn','estgcn','gnar' 
            df_row['missingrate'] = self.Missingrate, # 0.0, 0.2, 0.4, 0.6, 0.8 
            df_row['missingtype'] = self.Missingtype,  # None, 'randomly' and 'block' 
            df_row['lag'] = self.lag, # 1,2,3,4 ... 
            df_row['number_of_filters'] = self.Number_of_filters, # 16,24,32, ... 
            df_row['interpolation'] = self.Interpolation, # None, 'mean', 'linear'
            df_row['MSE_train'] = train_mse_total_gnar.tolist()
            df_row['MSE_test'] = test_mse_total_gnar.tolist()

            self.df = pd.concat([self.df,df_row])

## STGCN 

In [51]:
Dataset = 'PedalMeDatasetLoader'
Method = 'stgcn' # 'stgcn','estgcn','gnar' 
Missingtype = 'randomly'  # None, 'randomly' and 'block' 
lag = 4 # 1,2,3,4 ... 
Number_of_filters = 4 # 16,24,32, ... 
Interpolation = 'Linear' # None, 'mean', 'linear'
iterable = 100

In [52]:
df_stgcn= pd.DataFrame(columns=col)

In [None]:
for Missingrate in rate:
    df = pd.DataFrame(columns=col)
    stgcn = STGCN_Missing(Dataset,df, iterable,Method, Missingrate, Missingtype, lag, Number_of_filters, Interpolation)
    stgcn.iter()
    df_add = stgcn.df.copy()
    df_stgcn = pd.concat([df_stgcn,df_add],axis=0)

In [None]:
save_data(df_stgcn, './data/Pedal_stgcn_randomly_by_rate.pkl')

## Enhencement of STGCN 

In [None]:
Dataset = 'PedalMeDatasetLoader'
Method = 'estgcn' # 'stgcn','estgcn','gnar' 
Missingtype = 'randomly'  # None, 'randomly' and 'block' 
lag = 4 # 1,2,3,4 ... 
Number_of_filters = 4 # 16,24,32, ... 
Interpolation = 'Linear' # None, 'mean', 'linear'
iterable = 100

In [None]:
df_estgcn = pd.DataFrame(columns=col)

In [None]:
for Missingrate in rate:
    df = pd.DataFrame(columns=col)
    estgcn = ESTGCN_Missing(Dataset,df, iterable,Method, Missingrate, Missingtype, lag, Number_of_filters, Interpolation)
    estgcn.iter()
    df_add = estgcn.df.copy()
    df_estgcn = pd.concat([df_estgcn,df_add],axis=0)

In [None]:
save_data(df_estgcn, './data/Pedal_estgcn_randomly_by_rate.pkl')

## GNAR

In [None]:
Dataset = 'PedalMeDatasetLoader'
Method = 'gnar' # 'stgcn','estgcn','gnar' 
Missingtype = 'randomly'  # None, 'randomly' and 'block' 
lag = 4 # 1,2,3,4 ... 
Number_of_filters = None # 16,24,32, ... 
Interpolation = 'Linear' # None, 'mean', 'linear'
iterable = 100

In [None]:
df_gnar = pd.DataFrame(columns=col)

In [None]:
for Missingrate in rate:
    df = pd.DataFrame(columns=col)
    gnar = GNAR_Missing(Dataset, df, iterable,Method, Missingrate, Missingtype, lag, Number_of_filters, Interpolation)
    gnar.iter()
    df_add = gnar.df.copy()
    df_gnar = pd.concat([df_gnar,df_add],axis=0)

In [None]:
save_data(df_gnar, './data/Pedal_gnar_randomly_by_rate.pkl')