# Static Graph Predictions

> Indented block


Mostly copied from: https://github.com/mims-harvard/graphml-tutorials/blob/master/01-intro/gnn-intro.ipynb

In [1]:
#import sys; sys.path.insert(0, '/content/drive/Shareddrives/DL_525/pgeo_packages')
# Above line links in the packages needed to run Pytorch Geometric

import sys, os
import numpy as np
from collections import Counter
import warnings

import torch 
import torch.nn as nn
import torch.optim as optim
from torch.nn import Linear
import torch.nn.functional as F

from torch_geometric.data import DataLoader, Dataset
from torch_geometric.datasets import Planetoid, Entities
from torch_geometric.nn import GCNConv, RGCNConv, GATConv, SAGEConv, JumpingKnowledge, GINConv, DeepGraphInfomax, global_mean_pool
from torch_geometric.utils.convert import from_networkx, to_networkx

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# To load in the data from pickles:
import pickle, os
import pandas as pd
import networkx as nx

import matplotlib.pyplot as plt

from tqdm import tqdm

In [2]:
from torch_geometric_temporal.dataset import ChickenpoxDatasetLoader
from torch_geometric_temporal.signal import temporal_signal_split
from torch_geometric_temporal.nn.recurrent import DCRNN

loader = ChickenpoxDatasetLoader()

# Time lag - how many weeks to look back
node_features = 8
filter_size = 4
dataset = loader.get_dataset(node_features)

train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.8)

In [3]:
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import DCRNN

class RecurrentGCN(torch.nn.Module):
  def __init__(self, node_features):
      super(RecurrentGCN, self).__init__()
      self.recurrent = DCRNN(node_features, 32, filter_size)
      self.linear = torch.nn.Linear(32, 1)

  def forward(self, x, edge_index, edge_weight):
      h = self.recurrent(x, edge_index, edge_weight)
      h = F.relu(h)
      h = self.linear(h)
      return h

In [4]:
class RecurrentGCN2(torch.nn.Module):
  def __init__(self, node_features):
      super(RecurrentGCN2, self).__init__()
      self.recurrent = DCRNN(node_features, 32, filter_size)
      self.recurrent2 = DCRNN(32, 32, 1)
      self.linear = torch.nn.Linear(32, 1)

  def forward(self, x, edge_index, edge_weight):
      h = self.recurrent(x, edge_index, edge_weight)
      h = F.relu(h)
      h = self.recurrent2(h, edge_index, edge_weight)
      h = F.relu(h)
      h = self.linear(h)
      return h

In [5]:
class RecurrentGCN3(torch.nn.Module):
  def __init__(self, node_features):
      super(RecurrentGCN3, self).__init__()
      self.recurrent = DCRNN(node_features, 32, filter_size)
      self.recurrent2 = DCRNN(32, 32, 1)
      self.recurrent3 = DCRNN(32, 32, 1)
      self.linear = torch.nn.Linear(32, 1)

  def forward(self, x, edge_index, edge_weight):
      h = self.recurrent(x, edge_index, edge_weight)
      h = F.relu(h)
      h = self.recurrent2(h, edge_index, edge_weight)
      h = F.relu(h)
      h = self.recurrent3(h, edge_index, edge_weight)
      h = F.relu(h)
      h = self.linear(h)
      return h

In [6]:
from torch_geometric_temporal.nn.recurrent import GConvGRU

class GCGRU(torch.nn.Module):
  def __init__(self, node_features):
    super(GCGRU, self).__init__()
    self.recurrent = GConvGRU(node_features, 32, filter_size)
    self.linear = torch.nn.Linear(32, 1)

  def forward(self, x, edge_index, edge_weight):
    h = self.recurrent(x, edge_index, edge_weight)
    h = F.relu(h)
    h = self.linear(h)
    return h

In [7]:
class GCGRU2(torch.nn.Module):
  def __init__(self, node_features):
    super(GCGRU2, self).__init__()
    self.recurrent1 = GConvGRU(node_features, 32, filter_size)
    self.recurrent2 = GConvGRU(32, 32, 1)
    self.linear = torch.nn.Linear(32, 1)

  def forward(self, x, edge_index, edge_weight):
    h = self.recurrent1(x, edge_index, edge_weight)
    h = F.relu(h)
    h = self.recurrent2(h, edge_index, edge_weight)
    h = F.relu(h)
    h = self.linear(h)
    return h

In [8]:
class GCGRU3(torch.nn.Module):
  def __init__(self, node_features):
    super(GCGRU3, self).__init__()
    self.recurrent1 = GConvGRU(node_features, 32, filter_size)
    self.recurrent2 = GConvGRU(32, 32, 1)
    self.recurrent3 = GConvGRU(32, 32, 1)
    self.linear = torch.nn.Linear(32, 1)

  def forward(self, x, edge_index, edge_weight):
    h = self.recurrent1(x, edge_index, edge_weight)
    h = F.relu(h)
    h = self.recurrent2(h, edge_index, edge_weight)
    h = F.relu(h)
    h = self.recurrent3(h, edge_index, edge_weight)
    h = F.relu(h)
    h = self.linear(h)
    return h

In [9]:
def MSEloss(y_hat, y):
  return torch.mean((y_hat-y)**2)

In [10]:
def test_eval(model, test_dataset):
  """
  loop for evaluating dataset passed
  
  Args:
    model (Pytorch model): model
    test_dataset (Iterator): dataset iterator
    
  Returns:
    r2 (float): average R2 accuracy over dataset
    cost (float): average MSE loss over dataset
  """
  model.eval()
  cost = 0
  r2 = 0
  for time, snapshot in enumerate(test_dataset):
    y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
    cost = cost + torch.mean((y_hat-snapshot.y)**2)
    y = snapshot.y
    r2 += r2_score(y.detach().numpy(), y_hat.cpu().detach().numpy())
  r2 = r2 / (time+1)
  cost = cost / (time+1)
  return r2, cost

In [11]:
def train_loop(model, epochs, train_dataset, test_dataset, optimizer):
  """
    loop for training
    
    Args:
        model (pytorch model): model
        epochs (int): number of epochs
        train_dataset (Iterator): PyG data iterator
        test_dataset (Iterator): PyG data iterator
        optimizer (torch optimizer): Pytorch model optimizer
    Returns:
        train_loss (list): train loss values
        test_loss (list): test loss values
        train_acc (list): train accuracy values
        test_acc (list): test accuracy values
  """
  model.train()
  train_loss = []
  test_loss = []
  train_acc = []
  test_acc = []

  for epoch in tqdm(range(epochs)):
    cost = 0
    for time, snapshot in enumerate(train_dataset):
      y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
      mse = MSEloss(y_hat, snapshot.y)
      cost = cost + mse
    cost = cost / (time+1)
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()
    
    train_loss.append(cost)
    te_acc, te_loss = test_eval(model, test_dataset) # Get accuracy after the epoch
    tr_acc, tr_loss = test_eval(model, train_dataset)
    
    train_acc.append(tr_acc)
    test_acc.append(te_acc)
    test_loss.append(te_loss)
  
  return train_loss, test_loss, train_acc, test_acc

In [12]:
def plot(model, epochs, train_dataset, test_dataset, optimizer, ml_name="blah"):
    """
    Controller for training, evaluation at each time step, and plotting loss and accuracy curves
    
    Args:
        model (pytorch model): model
        epochs (int): number of epochs
        train_dataset (Iterator): PyG data iterator
        test_dataset (Iterator): PyG data iterator
        optimizer (torch optimizer): Pytorch model optimizer
        ml_name (str, optional): name for plots and saving CSVs and PNGs
    """
    tr_loss, te_loss, tr_acc, te_acc = train_loop(model, epochs, train_dataset, test_dataset, optimizer)
    x = list(range(len(tr_loss)))
    xt = list(range(len(te_loss)))

    results = pd.DataFrame({'Train_Acc':tr_acc, 'Test_Acc':te_acc, 'Train_Loss':[i.detach().numpy() for i in tr_loss], 'Test_Loss': te_loss})
    results['model'] = [ml_name] + [None] * (len(tr_loss) - 1)
    results.to_csv(ml_name+'.csv', index = False)

    fig, ax1 = plt.subplots()
    ax1.plot(x, tr_acc, label = 'Train R^2')
    ax1.plot(x, te_acc, label = 'Test R^2')
    r2_title = 'Coef. of Determination Scores '+ml_name
    ax1.set_title(r2_title)
    ax1.legend()
    fig.savefig(ml_name+'_r2.png')
    plt.close(fig)

    fig, ax2 = plt.subplots()
    ax2.plot(xt, tr_loss, label = 'Train MSE loss')
    ax2.plot(xt, te_loss, label = 'Test MSE loss')
    mse_title = 'MSE loss '+ml_name
    ax2.set_title(mse_title)
    ax2.legend()
    fig.savefig(ml_name+'_loss.png')
    plt.close(fig)

In [13]:
model = RecurrentGCN(node_features)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

plot(model, 200, train_dataset, test_dataset, optimizer, 'dcrnn_1_layer')

100%|██████████| 200/200 [10:24<00:00,  3.12s/it]


In [13]:
model = RecurrentGCN2(node_features)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

plot(model, 200, train_dataset, test_dataset, optimizer, 'dcrnn_2_layer')

100%|██████████| 200/200 [13:22<00:00,  4.01s/it]


In [13]:
model = RecurrentGCN3(node_features)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

plot(model, 170, train_dataset, test_dataset, optimizer, 'dcrnn_3_layer')

100%|██████████| 170/170 [13:53<00:00,  4.91s/it]


In [13]:
model = GCGRU(node_features)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

plot(model, 200, train_dataset, test_dataset, optimizer, 'gcgru_1_layer')

100%|██████████| 200/200 [12:08<00:00,  3.64s/it]


In [13]:
model = GCGRU2(node_features)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

plot(model, 195, train_dataset, test_dataset, optimizer, 'gcgru_2_layer')

100%|██████████| 195/195 [16:31<00:00,  5.08s/it]


In [13]:
model = GCGRU3(node_features)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

plot(model, 150, train_dataset, test_dataset, optimizer, 'gcgru_3_layer')

100%|██████████| 150/150 [17:09<00:00,  6.86s/it]
