In [1]:
import sys
import copy
import random
import itertools
import numpy   as np
import pandas  as pd
import os.path as osp
from   glob import glob

import invisible_cities.io.dst_io as dio

import torch
from   torch_geometric.data import Data, Dataset
from   torch_geometric.data.makedirs import makedirs

In [20]:
class Dataset(Dataset):
    def __init__(self, root, tag = '0nubb', transform=None, pre_transform=None, pre_filter=None, directed = False, simplify_segclass = False):
        self.sort = lambda x: int(x.split('_')[-2])
        self.tag = tag
        self.directed = directed
        self.simplify_segclass = simplify_segclass
        super().__init__(root, transform, pre_transform, pre_filter)
        
    @property
    def raw_file_names(self):
        ''' 
        Returns a list of the raw files in order (supossing they are beersheba labelled files that have the structure beersheba_label_N_tag.h5)
        '''
        rfiles = [i.split('/')[-1] for i in glob(self.raw_dir + '/*_{}.h5'.format(self.tag))]
        return sorted(rfiles, key = self.sort)

    @property
    def processed_file_names(self):
        '''
        Returns a list of the processed files in order (supossing they are stored tensors with the structure data_N.pt)
        '''
        pfiles = [i.split('/')[-1] for i in glob(self.processed_dir + '/data_*_{}.pt'.format(self.tag))]
        return sorted(pfiles, key = self.sort)
    
    def process(self):
        makedirs(self.processed_dir)
        already_processed = [self.sort(i) for i in self.processed_file_names]
        for raw_path in self.raw_paths:
            idx = self.sort(raw_path)
            if np.isin(idx, already_processed):
                #to avoid processing already processed files
                continue
            data = graphDataset(raw_path, directed=self.directed, simplify_segclass=self.simplify_segclass)

            #if self.pre_filter is not None and not self.pre_filter(data):
            #    continue

            #if self.pre_transform is not None:
            #    data = self.pre_transform(data)

            torch.save(data, osp.join(self.processed_dir, f'data_{idx}_{self.tag}.pt'))
        

    def len(self):
        return len(self.processed_file_names)

    def get(self, idx):
        data = torch.load(osp.join(self.processed_dir, f'data_{idx}_{self.tag}.pt'))
        return data

    def join(self):
        #print('Joining ', self.processed_file_names)
        dataset = []
        for processed_path in self.processed_paths:
            dataset += torch.load(processed_path)
        return dataset

In [4]:
from torch_geometric.nn import GENConv, DeepGCNLayer
from torch.nn import BatchNorm1d, CrossEntropyLoss
import torch.nn.functional as F
from torch_geometric.loader import DataLoader

I'm going to create a GCN with skip connections using the DeepGCNLayer. I'll use 'res+' structure as it seems to perform better, and for the convolution I'll use the GENeralized Graph Convolution (GENConv), created and tested by the same authors as the skip connections structure.

Then, for normalization will be just batch norm, and for activation ReLU

More precisely, it is the DyResGen network from https://arxiv.org/pdf/2006.07739.pdf.

![Alt text](skip_connections.png)

In [77]:
class DeeperGCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers,
                 dropout):
        super(DeeperGCN, self).__init__()
        self.dropout = dropout
        # A list of DeepGCNLayers
        self.layers = torch.nn.ModuleList()

        ############ BUILDING THE LAYERS ##############
        ## 1. Use a linear transformation to encode the node features from input_dim to hidden_dim
        ## 2. Create a group of num_layers with the DeepGCNLayer for skipping connections using the
        ##    GENConv operation 
        ## 3. Apply a final linear transformation to have the number of features corresponding to the 
        ##    number of classes per node

        self.encoder = torch.nn.Linear(input_dim, hidden_dim)

        for i in range(1, num_layers + 1):
            conv = GENConv(hidden_dim, hidden_dim, aggr='softmax',
                           t=1.0, learn_t=True, num_layers=2, norm='layer')
            norm = torch.nn.BatchNorm1d(hidden_dim)
            act = torch.nn.ReLU(inplace=True)

            layer = DeepGCNLayer(conv, norm, act, block='res+', dropout=dropout,
                                 ckpt_grad=False) #i % 3)
            self.layers.append(layer)
        
        self.lin = torch.nn.Linear(hidden_dim, output_dim)

    def reset_parameters(self):
        for lay in self.layers:
            lay.reset_parameters()
        self.encoder.reset_parameters()
        self.lin.reset_parameters()


    def forward(self, x, edge_index):
        x = self.encoder(x)

        x = self.layers[0].conv(x, edge_index)

        for layer in self.layers[1:]:
            x = layer(x, edge_index)

        x = self.layers[0].act(self.layers[0].norm(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        out = self.lin(x)
        return out
    

In [71]:
def train(model, loader, device, optimizer, loss_fn):
    # Tell the model it's going to train
    model.train()
    loss = 0

    # Iterate for the batches in the data loader
    for batch in loader:
        # Pass the batch to device (cuda)
        batch = batch.to(device)

        # Zero grad the optimizer
        optimizer.zero_grad()

        # Pass the data to the model
        out = model.forward(batch.x.type(torch.float), batch.edge_index) 

        # Now we pass the output and the labels to the loss function
        # We will use nll_loss (negative log likelihood, useful to train C classes bc we can add weights for each class)
        # This loss will need input (N, C) target (N); being C = num of classes, N = batch size
        
        # We read the label, transform into long tensor (needed by this loss function), pass to cuda device and shifted by one 
        # because for the output the classes are from [0, 6] and for the labels they are [1, 7]
        label = batch.y.type(torch.LongTensor).to(device) - 1

        # The reshape is needed to pass from a (N, 1) shape (automatically appears when doing
        # batch.y), to a (N) shape as we need; the output of the net is already (N, C) if it's properly built
        loss = loss_fn(out, torch.reshape(label, (-1,)))
        
        # Back propagation (compute gradients of the loss with respect to the weights in the model)
        loss.backward()
        # Gradient descent (update the optimizer)
        optimizer.step()
    return loss.item()

In [72]:
def accuracy(pred, true):
    acc = sum(pred == true) / len(pred)
    return acc

In [73]:
def IoU(pred, true, nclass = 3):
    """
        Intersection over union is a metric for semantic segmentation.
        It returns a IoU value for each class of our input tensors/arrays.
    """
    eps = sys.float_info.epsilon
    confusion_matrix = np.zeros((nclass, nclass))

    for i in range(len(true)):
        confusion_matrix[true[i]][pred[i]] += 1

    IoU = []
    for i in range(nclass):
        IoU.append((confusion_matrix[i, i] + eps) / (sum(confusion_matrix[:, i]) + sum(confusion_matrix[i, :]) - confusion_matrix[i, i] + eps))
    return np.array(IoU)

In [74]:
def eval(model, loader, device):
    # Set the model to evaluate
    model.eval()
    y_true = []
    y_pred = []
    # Iterate for the batches in the data loader
    for batch in loader:
        # Put batch into device (cuda)
        batch = batch.to(device)

        # Pass the data to the model
        with torch.no_grad():
            out = model.forward(batch.x.type(torch.float), batch.edge_index)
        
        # For each node set the maximum argument to pick a class
        pred = out.argmax(dim=-1, keepdim=True)  

        #Once again, the labels are shifted by 1 to match the prediction positions (explained in train fun)
        true = torch.reshape(batch.y, (-1,)).detach().cpu() - 1
        
        #Append the results to lists
        y_pred.append(torch.reshape(pred, (-1,)).detach().cpu())
        y_true.append(true)
    
    #Concatenate the items in the list and transform into array
    y_pred = torch.cat(y_pred).numpy()
    y_true = torch.cat(y_true).numpy()

    #Identify the neighbor segclass with their original segclass to compare each node
    label_map = {0:0, 1:1, 2:2, 3:0, 4:1, 5:2, 6:3}
    y_pred = np.array([label_map[i] for i in y_pred])
    y_true = np.array([label_map[i] for i in y_true])
    
    # Compare and return an accuracy (number of success nodes / all nodes)
    # Not the best, it is better the IoU for segmentation
    acc = accuracy(y_pred, y_true)
    iou = IoU(y_pred, y_true, nclass=4)
    return acc, iou, y_pred, y_true

In [5]:
def create_idx_split(dataset, train_perc):
    indices = np.arange(len(dataset))
    valid_perc = (1 - train_perc) / 2
    random.shuffle(indices)
    train_data = torch.tensor(np.sort(indices[:int((len(indices)+1)*train_perc)])) #Remaining 80% to training set
    valid_data = torch.tensor(np.sort(indices[int((len(indices)+1)*train_perc):int((len(indices)+1)*(train_perc + valid_perc))]))
    test_data = torch.tensor(np.sort(indices[int((len(indices)+1)*(train_perc + valid_perc)):]))
    idx_split = {'train':train_data, 'valid':valid_data, 'test':test_data}
    return idx_split

In [9]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device: {}'.format(device))

Device: cuda


In [78]:
# Arguments for the net and the train
args = {
      'device': device,
      'nclass':7,
      'num_layers': 25,
      'hidden_dim': 30,
      'dropout': 0.1,
      'lr': 0.001,
      'epochs': 100,
      'batch_size': 50
  }

In [11]:
# Creation of the dataset, index split and data loaders for each case
file_path = '/mnt/lustre/scratch/nlsas/home/usc/ie/mpm/NEXT100/labelled_data/0nubb/554mm_voxels/'

dataset = Dataset(file_path, '0nubb').join()
idx_split = create_idx_split(dataset, 0.8)

train_loader = DataLoader([dataset[i] for i in idx_split['train']], batch_size=args['batch_size'], shuffle=True, num_workers=0)
valid_loader = DataLoader([dataset[i] for i in idx_split['valid']], batch_size=args['batch_size'], shuffle=False, num_workers=0)
test_loader = DataLoader([dataset[i] for i in idx_split['test']], batch_size=args['batch_size'], shuffle=False, num_workers=0)

In [75]:
def weight_loss(file_names, correct = False):
    #correct assigns to the ghost class the desired inverse freq and redistributes the rest
    seg = pd.Series(dtype='int')
    for f in file_names:
        seg = seg.append(dio.load_dst(f, 'DATASET', 'BeershebaVoxels').segclass)
    freq = np.bincount(seg - 1, minlength=max(seg))
    inv_freq = 1. / freq
    inv_freq = inv_freq / sum(inv_freq)
    if correct:
        redistr = inv_freq[:-1] * (1 - correct) / sum(inv_freq[:-1])
        inv_freq = np.append(redistr, correct)
    return inv_freq

In [76]:
files_for_weights = glob(file_path + 'raw/*.h5')
inv_freq = weight_loss(files_for_weights, correct = 0.1)

In [80]:
# Initiate the model with the previous args and set to device
model = DeeperGCN(dataset[0].num_features, args['hidden_dim'],
                args['nclass'], args['num_layers'],
                args['dropout']).to(device)

In [81]:
# Set true if we want to train in the next cell
start_train = True

In [84]:
if start_train:
  # Start from zero the model (not using a trained model)
  model.reset_parameters()

  # Initiate the optimizer with the model parameters and a learning rate
  optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])

  # Pick the loss function
  loss_fn = CrossEntropyLoss(weight=torch.Tensor(inv_freq).to(device)) #torch.nn.NLLLoss(weight=torch.Tensor(inv_freq).to(device)) #

  best_model = None
  best_valid_acc = 0

  # Iterate on the number of epochs
  pred = []
  true = []
  for epoch in range(1, 1 + args["epochs"]):
    # Train the model with the fucntion
    print('Training...')
    loss = train(model, train_loader, device, optimizer, loss_fn)

    #Evaluate the model with the function and for the 3 sets of data
    print('Evaluating...')
    train_acc, train_iou, _, _ = eval(model, train_loader, device)
    val_acc, val_iou, y_pred, y_true = eval(model, valid_loader, device)
    test_acc, test_iou, _, _ = eval(model, test_loader, device)
    #pred.append(y_pred)
    #true.append(y_true)
    # Store the model if the validation accuracy improved
    if val_acc > best_valid_acc:
        best_valid_acc = val_acc
        best_model = copy.deepcopy(model)
    # Print the important variables for epoch
    print(f'Epoch: {epoch:02d}, '
          f'Loss: {loss:.4f}, '
          f'Train: {100 * train_acc:.2f}%, '
          f'Valid: {100 * val_acc:.2f}% '
          f'Test: {100 * test_acc:.2f}%')
    print(f'Blob IoU train: {100 * train_iou[-2]:.2f}%, '
          f'Blob IoU valid: {100 * val_iou[-2]:.2f}%, '
          f'Blob IoU test: {100 * test_iou[-2]:.2f}%, ')

Training...
Evaluating...
Epoch: 01, Loss: 1.8163, Train: 4.53%, Valid: 3.59% Test: 5.04%
Blob IoU train: 0.00%, Blob IoU valid: 0.00%, Blob IoU test: 0.00%, 
Training...
Evaluating...
Epoch: 02, Loss: 1.7844, Train: 77.27%, Valid: 77.56% Test: 77.03%
Blob IoU train: 0.00%, Blob IoU valid: 0.00%, Blob IoU test: 0.00%, 
Training...
Evaluating...
Epoch: 03, Loss: 1.7395, Train: 77.27%, Valid: 77.56% Test: 77.03%
Blob IoU train: 0.00%, Blob IoU valid: 0.00%, Blob IoU test: 0.00%, 
Training...
Evaluating...
Epoch: 04, Loss: 1.7838, Train: 77.27%, Valid: 77.57% Test: 77.04%
Blob IoU train: 0.00%, Blob IoU valid: 0.00%, Blob IoU test: 0.00%, 
Training...
Evaluating...
Epoch: 05, Loss: 1.7481, Train: 77.27%, Valid: 77.57% Test: 77.04%
Blob IoU train: 0.00%, Blob IoU valid: 0.00%, Blob IoU test: 0.00%, 
Training...
Evaluating...
Epoch: 06, Loss: 1.6568, Train: 77.27%, Valid: 77.57% Test: 77.04%
Blob IoU train: 0.00%, Blob IoU valid: 0.00%, Blob IoU test: 0.00%, 
Training...
Evaluating...
Epoch

In [85]:
train_acc, train_iou, _, _ = eval(best_model, train_loader, device)
valid_acc, valid_iou, _, _ = eval(best_model, valid_loader, device)
test_acc, test_iou, _, _  = eval(best_model, test_loader, device)

print(f'Best model: '
      f'Train: {100 * train_acc:.2f}%, '
      f'Valid: {100 * valid_acc:.2f}% '
      f'Test: {100 * test_acc:.2f}%')
print(f'Best model IoU blob: '
      f'Train: {100 * train_iou[-2]:.2f}%, '
      f'Valid: {100 * valid_iou[-2]:.2f}% '
      f'Test: {100 * test_iou[-2]:.2f}%')

Best model: Train: 75.77%, Valid: 77.61% Test: 75.85%
Best model IoU blob: Train: 20.19%, Valid: 20.67% Test: 23.68%
