In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
  sys.path.append(module_path)

In [2]:
from pathlib import Path
from src.gnn_dataset import CityGraphDataset
import time
import numpy as np
from torch_geometric.data import Dataset, Data, DataLoader

In [3]:
import json
from tqdm.notebook import tqdm
from torch_geometric.data import Batch

def create_submission(base_dir, city, output_folder, dataset, pretrained_model):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    pretrained_model.to(device)
    pretrained_model.freeze()
    pretrained_model.eval()
    if base_dir==None:
        input_path= city
    else:
        input_path= base_dir+'/'+city
    output_path=output_folder + '/' + city      

    if not os.path.exists(output_path):
        os.makedirs(output_path)

    test_slots = get_test_slots(input_path)

    for date, frame in tqdm(test_slots.items()):
        data_list = []
        with h5py.File(f'{input_path}/{city}_testing_5.h5', 'r') as h5_file:
            all_data = h5_file[f'{date}_test.h5']

            for i in range(len(frame)):
                x=all_data[i]
                slice_id=frame[i]+12
                #print(f'slice_id: {slice_id}')
                x = dataset.get_training_data(x, dataset.city_static, 12, slice_id)
                data = Data(x=x, edge_index = dataset.edges)
                out = pretrained_model(data.to(device))
                #print(out.shape)
                zeros_image = torch.zeros([1, 12, 495, 436, 8], dtype=torch.float)
                out = dataset.convert_graph_minibatch_y_to_image(out.to('cpu'), zeros_image) 
                out = out[:,dataset.forward_steps-1, :,:, :]
                out = out.round()
                out = torch.clamp(out, min=0, max=255)
                out = out.byte()
                if i==0:
                    sub=out
                else:
                    sub=torch.cat((sub, out),0)

        with h5py.File(f'{output_path}/{date}_test.h5', 'w') as h5_file:
            h5_file.create_dataset('array', data=sub, compression="gzip")

def get_test_slots(file_dir):
    with open(f'{file_dir}/test_slots.json', 'r') as json_file:
        test_slots = json.load(json_file)
        test_slots = {k:v for each in test_slots for k,v in each.items()}
    return test_slots

In [4]:
!ls

create_graph_dataset.ipynb train.ipynb


In [5]:
# Parameters
params = {'batch_size': 1,
          'shuffle': False,
          'num_workers': 0}
#training_set=CityGraphDataset('BERLIN', np.array([5, 10, 15, 30, 45, 60]),mode='validation',overlap=False)
training_set=CityGraphDataset('../data/raw', '../data/processed', 'BERLIN', np.array([5, 10, 15, 30, 45, 60]),mode='validation',overlap=False, normalise='Active', full_val=True)
training_generator = DataLoader(training_set, **params)

../data/raw/BERLIN
Normalising by: Active


In [6]:
ite=iter(training_generator)
d =next(ite)

In [8]:
from src.models_graph_ensnet import Graph_ensnet

In [9]:
import torch
from torch import nn
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
#from pytorch_lightning.callbacks import LearningRateLogger
from torch.nn import functional as F
from torch_geometric.nn import GCNConv, NNConv, Set2Set, EdgeConv, GatedGraphConv, GATConv, PNAConv, SAGEConv, SGConv, PointConv, ChebConv
from torch_geometric.nn import GraphUNet, global_mean_pool, InstanceNorm, LayerNorm
from torch.nn import Sequential, Linear, ReLU, GRU, Tanh, Sigmoid, LeakyReLU, ELU
from torch_geometric.utils import degree
import os

class LightningGCN(pl.LightningModule):

  def __init__(self, base_dir, city, forward_mins, learning_rate, overlap):
    super(LightningGCN, self).__init__()
    self.learning_rate = learning_rate
    self.loss_fn = nn.MSELoss()

    self.batch_size = 2
    self.pca_static = False
    #self.normalise = None
    self.normalise = 'Active'
    #self.normalise = 'noZeros'
    #self.normalise = 'lmdas'
    
    self.full_val = True

    self.train_data = CityGraphDataset('../data/raw',base_dir, city, forward_mins, overlap=overlap, normalise=self.normalise, pca_static=self.pca_static)
    self.val_data = CityGraphDataset('../data/raw', base_dir, city, forward_mins, mode='validation', overlap=overlap, normalise=self.normalise, full_val=self.full_val, pca_static=self.pca_static)

    self.net=Graph_ensnet(self.train_data.num_node_features,self.train_data[0].y.shape[-1], nh=100, K=4, K_mix=2, depth=4)

  def forward(self, data):
      x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
      x=self.net(data)
      return x

  def training_step(self, train_batch, batch_idx):
      out = self.forward(train_batch)
      loss = self.loss_fn(out, train_batch.y)
      logs={"train_loss": loss}
      return {"loss": loss,"log": logs, "progress_bar": logs}

  def validation_step(self, val_batch, batch_idx):
      if self.full_val:
        val_graph, y_label_image, zeros_image, day, timestamp = val_batch
        out = self.forward(val_graph)
        out = self.val_data.convert_graph_minibatch_y_to_image(out, zeros_image) 
        out = out.round()
        out = torch.clamp(out, min=0, max=255)
        out = out[:,self.val_data.forward_steps-1, :,:, :]
        y_label_image = y_label_image[:,self.val_data.forward_steps-1, :,:, :]
        loss = self.loss_fn(out, y_label_image)
        #self.val_store.append([day.cpu.numpy(), timestamp.cpu.numpy(), loss.cpu.numpy()])
      else:
        out = self.forward(val_batch)
        out = out.round()
        out = torch.clamp(out, min=0, max=255)
        loss = self.loss_fn(out, val_batch.y)
      # logging
      logs={"val_loss": loss}
      return {"val_loss": loss,"log": logs}

  def test_step(self, val_batch, batch_idx):
      if self.full_val:
        val_graph, y_label_image, zeros_image, day, timestamp = val_batch
        out = self.forward(val_graph)
        out = self.val_data.convert_graph_minibatch_y_to_image(out, zeros_image) 
        out = out.round()
        out = torch.clamp(out, min=0, max=255)
        out = out[:,self.val_data.forward_steps-1, :,:, :]
        y_label_image = y_label_image[:,self.val_data.forward_steps-1, :,:, :]
        loss = self.loss_fn(out, y_label_image)
        self.val_store.append([day.cpu().numpy()[0].astype(float), timestamp.cpu().numpy()[0].astype(float), loss.cpu().numpy()])
      else:
        out = self.forward(val_batch)
        out = out.round()
        out = torch.clamp(out, min=0, max=255)
        loss = self.loss_fn(out, val_batch.y)
      # logging
      logs={"test_loss": loss}
      return {"test_loss": loss,"log": logs}

  def training_epoch_end(self, outputs):
      avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
      print(f'Average Training Loss: {avg_loss}')
      # creating log dictionary
      tensorboard_logs = {'avg_train_loss': avg_loss}
      return {'avg_train_loss': avg_loss,'log': tensorboard_logs}

  def validation_epoch_end(self, outputs):
      avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
      print(f'Average Val Loss: {avg_loss}')
      # creating log dictionary
      tensorboard_logs = {'avg_val_loss': avg_loss}
      return {'avg_val_loss': avg_loss,'log': tensorboard_logs}

  def test_epoch_end(self, outputs):
      avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
      print(f'Average Test Loss: {avg_loss}')
      # creating log dictionary
      tensorboard_logs = {'avg_test_loss': avg_loss}
      return {'avg_test_loss': avg_loss,'log': tensorboard_logs}

  def train_dataloader(self):
    return DataLoader(self.train_data, shuffle=True, batch_size=self.batch_size, num_workers=8)

  def val_dataloader(self):
    return DataLoader(self.val_data, batch_size=self.batch_size, num_workers=2)

  def test_dataloader(self):
    return DataLoader(self.val_data, batch_size=1, num_workers=2)

  def configure_optimizers(self):
    optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=0.0001)
    #optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
    #optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate)
    #step_size = 5
    #clr = self.cyclical_lr(step_size, min_lr=self.learning_rate/4, max_lr=self.learning_rate)
    #scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr])
    #scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,5*self.train_data.len)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,4000, 1)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    schedulers = [
        {'scheduler': scheduler,
         'monitor': 'val_recall', # Default: val_loss
         'interval': 'step',
         'frequency': 1},]

    return [optimizer], schedulers
    #return optimizer

  def cyclical_lr(self, stepsize, min_lr=3e-4, max_lr=3e-3):

    # Scaler: we can adapt this if we do not want the triangular CLR
    scaler = lambda x: 1.

    # Lambda function to calculate the LR
    lr_lambda = lambda it: min_lr + (max_lr - min_lr) * relative(it, stepsize)

    # Additional function to see where on the cycle we are
    def relative(it, stepsize):
        cycle = math.floor(1 + it / (2 * stepsize))
        x = abs(it / stepsize - 2 * cycle + 1)
        return max(0, (1 - x)) * scaler(cycle)

    return lr_lambda

In [10]:
# train
overlap=False
base_dir = '../data/processed'
city = 'BERLIN'
model = LightningGCN(base_dir, city, np.array([5, 10, 15, 30, 45, 60]), 1e-2, overlap)

../data/raw/BERLIN
Normalising by: Active
../data/raw/BERLIN
Normalising by: Active


In [None]:
#checkpoint = torch.load('/content/drive/My Drive/t4c/lightning_logs/version_22/checkpoints/epoch=6.ckpt', map_location=lambda storage, loc: storage)
#model.load_state_dict(checkpoint['state_dict'])

In [None]:
# resume from a specific checkpoint
#trainer = pl.Trainer(max_epochs=64, gpus=1,default_root_dir='/content/drive/My Drive/t4c/', resume_from_checkpoint='/content/drive/My Drive/t4c/lightning_logs/version_65/checkpoints/epoch=9.ckpt')

In [11]:
#lr_logger = LearningRateLogger(logging_interval='step')
#trainer = pl.Trainer(max_epochs=10, gpus=1,limit_train_batches=0.05, default_root_dir='/content/drive/My Drive/t4c/')
#trainer = pl.Trainer(max_epochs=80, gpus=1, default_root_dir='/content/drive/My Drive/t4c/')
trainer = pl.Trainer(max_epochs=20)
#trainer = pl.Trainer(max_epochs=20, gpus=1, precision=16, profiler=True, auto_lr_find=True)
#trainer = pl.Trainer(max_epochs=2, gpus=1, profiler=True)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


In [None]:
#lr_finder = trainer.tuner.lr_find(model, min_lr=1e-03)
# Plot with
#fig = lr_finder.plot(suggest=True)
#fig.show()

In [13]:
trainer.fit(model)

In [None]:
create_submission(None, city, '/content/drive/My Drive/t4c/submission', model.val_data, model)