In [1]:
import sys
import copy
import random
import itertools
import numpy   as np
import pandas  as pd
import os.path as osp
from   glob import glob

import invisible_cities.io.dst_io as dio

import torch
from   torch_geometric.data import Data, Dataset
from   torch_geometric.data.makedirs import makedirs

In [2]:
class Dataset(Dataset):
    def __init__(self, root, tag = '0nubb', transform=None, pre_transform=None, pre_filter=None, directed = False, simplify_segclass = False):
        self.sort = lambda x: int(x.split('_')[-2])
        self.tag = tag
        self.directed = directed
        self.simplify_segclass = simplify_segclass
        super().__init__(root, transform, pre_transform, pre_filter)
        
    @property
    def raw_file_names(self):
        ''' 
        Returns a list of the raw files in order (supossing they are beersheba labelled files that have the structure beersheba_label_N_tag.h5)
        '''
        rfiles = [i.split('/')[-1] for i in glob(self.raw_dir + '/*_{}.h5'.format(self.tag))]
        return sorted(rfiles, key = self.sort)

    @property
    def processed_file_names(self):
        '''
        Returns a list of the processed files in order (supossing they are stored tensors with the structure data_N.pt)
        '''
        pfiles = [i.split('/')[-1] for i in glob(self.processed_dir + '/data_*_{}.pt'.format(self.tag))]
        return sorted(pfiles, key = self.sort)
    
    def process(self):
        makedirs(self.processed_dir)
        already_processed = [self.sort(i) for i in self.processed_file_names]
        for raw_path in self.raw_paths:
            idx = self.sort(raw_path)
            if np.isin(idx, already_processed):
                #to avoid processing already processed files
                continue
            data = graphDataset(raw_path, directed=self.directed, simplify_segclass=self.simplify_segclass)

            #if self.pre_filter is not None and not self.pre_filter(data):
            #    continue

            #if self.pre_transform is not None:
            #    data = self.pre_transform(data)

            torch.save(data, osp.join(self.processed_dir, f'data_{idx}_{self.tag}.pt'))
        

    def len(self):
        return len(self.processed_file_names)

    def get(self, idx):
        data = torch.load(osp.join(self.processed_dir, f'data_{idx}_{self.tag}.pt'))
        return data

    def join(self):
        #print('Joining ', self.processed_file_names)
        dataset = []
        for processed_path in self.processed_paths:
            dataset += torch.load(processed_path)
        return dataset

In [3]:
from torch_geometric.nn.models import GraphUNet
from torch.nn import BatchNorm1d, CrossEntropyLoss
import torch.nn.functional as F
from torch_geometric.loader import DataLoader

I'm going to create a UNet structure with GCN layers, graph pooling and unpooling, and skip connections between paralell steps of the downsample/upsample. This GraphUNet is already implemented in torch geometric, from the paper https://arxiv.org/abs/1905.05178.

In the UNEt they allow me to pass forward the batch tensor so different graphs in the same event are joint by some information now :)

![Alt text](UNet.png)

![Alt text](gPool.png)
![Alt text](gUnPool.png)

In [3]:
def accuracy(true, pred, **kwrgs):
    acc = sum(true == pred) / len(true)
    return acc

In [29]:
def train_one_epoch(epoch_id, model, loader, device, optimizer, loss_fn, metrics = 'IoU', nclass = 4, model_uses_batch = True):
    label_map = {0:0, 1:1, 2:2, 3:0, 4:1, 5:2, 6:3}
    # Tell the model it's going to train
    model.train()
    loss_epoch = 0
    if metrics == 'acc':
        metric_fn = accuracy
        met_epoch = 0
    elif metrics == 'IoU':
        metric_fn = IoU
        met_epoch = np.zeros(nclass)

    # Iterate for the batches in the data loader
    for batch in loader:
        # Pass the batch to device (cuda)
        batch = batch.to(device)

        # Zero grad the optimizer
        optimizer.zero_grad()

        # Pass the data to the model
        if model_uses_batch:
            out = model.forward(batch.x.type(torch.float), batch.edge_index, batch.batch) 
        else:
            out = model.forward(batch.x.type(torch.float), batch.edge_index)

        # Now we pass the output and the labels to the loss function
        # We will use nll_loss (negative log likelihood, useful to train C classes bc we can add weights for each class)
        # This loss will need input (N, C) target (N); being C = num of classes, N = batch size
        
        # We read the label, transform into long tensor (needed by this loss function), pass to cuda device and shifted by one 
        # because for the output the classes are from [0, 6] and for the labels they are [1, 7]
        label = batch.y.type(torch.LongTensor).to(device) - 1

        # The reshape is needed to pass from a (N, 1) shape (automatically appears when doing
        # batch.y), to a (N) shape as we need; the output of the net is already (N, C) if it's properly built
        loss = loss_fn(out, torch.reshape(label, (-1,)))
        
        # Back propagation (compute gradients of the loss with respect to the weights in the model)
        loss.backward()
        # Gradient descent (update the optimizer)
        optimizer.step()

        loss_epoch += loss.item()


        # ####### REVISIT THIS PART, PROBABLY CHANGE LABELS FROM THE BEGGINING WHEN
        # ####### CREATING THE GRAPHS ALREADY, NOT NOW TO COMPARE!!
        #MAYBE adapt metrics function so it can take these things without that much transformation
        #This is the out of the net with a shape that is valid as input of the metrics function
        pred = torch.reshape(out.argmax(dim=-1, keepdim=True), (-1,)).detach().cpu().numpy()
        #This true is the same as label but ready to input the metrics function
        true = torch.reshape(batch.y, (-1,)).detach().cpu().numpy() - 1

        #Identify the neighbor segclass with their original segclass to compare each node
        pred = np.array([label_map[i] for i in pred])
        true = np.array([label_map[i] for i in true])

        met_epoch += metric_fn(true, pred, nclass = nclass)
    
    loss_epoch = loss_epoch / len(loader)
    #rms
    met_epoch  = met_epoch / len(loader)
    epoch_ = f"Train Epoch: {epoch_id}"
    loss_  = f"\t Loss: {loss_epoch:.6f}"
    print(epoch_ + loss_)

    return loss_epoch, met_epoch

For now, we should have a config with the information:

* path: path to the prepared dataset

### TRAIN ####
* model: model I implemented and want to use (UNet for now, maybe other models after)
* optimizer: what i chose to optimize
* loss_fn: what i chose to compute loss
* metrics: acc if doing classification, IoU if doing segmentation
* nclass: will use 3 for MC, and 4 for Beersheba (given that we will take neighbour )

In [32]:
def valid_one_epoch(model, loader, device, loss_fn, metrics = 'IoU', nclass = 4, model_uses_batch = True):
    label_map = {0:0, 1:1, 2:2, 3:0, 4:1, 5:2, 6:3}
    # Set the model to evaluate
    model.eval()

    loss_epoch = 0
    if metrics == 'acc':
        metric_fn = accuracy
        met_epoch = 0
    elif metrics == 'IoU':
        metric_fn = IoU
        met_epoch = np.zeros(nclass)

    with torch.no_grad():
    # Iterate for the batches in the data loader
        for batch in loader:
            # Put batch into device (cuda)
            batch = batch.to(device)

            if model_uses_batch:
                out = model.forward(batch.x.type(torch.float), batch.edge_index, batch.batch)
            else:
                out = model.forward(batch.x.type(torch.float), batch.edge_index)
        
            label = batch.y.type(torch.LongTensor).to(device) - 1

            # The reshape is needed to pass from a (N, 1) shape (automatically appears when doing
            # batch.y), to a (N) shape as we need; the output of the net is already (N, C) if it's properly built
            loss = loss_fn(out, torch.reshape(label, (-1,)))
            
            loss_epoch += loss.item()

            
            # For each node set the maximum argument to pick a class
            pred = torch.reshape(out.argmax(dim=-1, keepdim=True), (-1,)).detach().cpu().numpy()

            #Once again, the labels are shifted by 1 to match the prediction positions (explained in train fun)
            true = torch.reshape(batch.y, (-1,)).detach().cpu().numpy() - 1

            pred = np.array([label_map[i] for i in pred])
            true = np.array([label_map[i] for i in true])
            
            met_epoch += metric_fn(true, pred, nclass = nclass)
            

        loss_epoch = loss_epoch / len(loader)
        met_epoch  = met_epoch / len(loader)
        loss_ = f"\t Validation Loss: {loss_epoch:.6f}"
        print(loss_)

    return loss_epoch, met_epoch

In [4]:
def create_idx_split(dataset, train_perc):
    indices = np.arange(len(dataset))
    valid_perc = (1 - train_perc) / 2
    random.shuffle(indices)
    train_data = torch.tensor(np.sort(indices[:int((len(indices)+1)*train_perc)])) #Remaining 80% to training set
    valid_data = torch.tensor(np.sort(indices[int((len(indices)+1)*train_perc):int((len(indices)+1)*(train_perc + valid_perc))]))
    test_data = torch.tensor(np.sort(indices[int((len(indices)+1)*(train_perc + valid_perc)):]))
    idx_split = {'train':train_data, 'valid':valid_data, 'test':test_data}
    return idx_split

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device: {}'.format(device))

Device: cuda


In [6]:
# Arguments for the net and the train
args = {
      'device': device,
      'nclass':7,
      'depth': 4,
      'hidden_dim': 20,
      'pool_ratio': 0.3,
      'lr': 0.001,
      'epochs': 100,
      'batch_size': 50
  }

In [7]:
# Creation of the dataset, index split and data loaders for each case
file_path = '/mnt/lustre/scratch/nlsas/home/usc/ie/mpm/NEXT100/labelled_data/0nubb/554mm_voxels/'

dataset = Dataset(file_path, '0nubb').join()
idx_split = create_idx_split(dataset, 0.8)

train_loader = DataLoader([dataset[i] for i in idx_split['train']], batch_size=args['batch_size'], shuffle=True, num_workers=0)
valid_loader = DataLoader([dataset[i] for i in idx_split['valid']], batch_size=args['batch_size'], shuffle=False, num_workers=0)
test_loader = DataLoader([dataset[i] for i in idx_split['test']], batch_size=args['batch_size'], shuffle=False, num_workers=0)

In [8]:
def weight_loss(file_names, correct = False):
    #correct assigns to the ghost class the desired inverse freq and redistributes the rest
    seg = pd.Series(dtype='int')
    for f in file_names:
        seg = seg.append(dio.load_dst(f, 'DATASET', 'BeershebaVoxels').segclass)
    freq = np.bincount(seg - 1, minlength=max(seg))
    inv_freq = 1. / freq
    inv_freq = inv_freq / sum(inv_freq)
    if correct:
        redistr = inv_freq[:-1] * (1 - correct) / sum(inv_freq[:-1])
        inv_freq = np.append(redistr, correct)
    return inv_freq

In [38]:
files_for_weights = glob(file_path + 'raw/*.h5')
inv_freq = weight_loss(files_for_weights, correct = 0.1)

In [9]:
# Initiate the model with the previous args and set to device
#Activation is relu
model = GraphUNet(dataset[0].num_features, args['hidden_dim'],
                args['nclass'], args['depth'],
                args['pool_ratio']).to(device)

In [40]:
# Set true if we want to train in the next cell
start_train = True

In [41]:
if start_train:
  # Start from zero the model (not using a trained model)
  model.reset_parameters()

  # Initiate the optimizer with the model parameters and a learning rate
  optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])

  # Pick the loss function
  loss_fn = CrossEntropyLoss(weight=torch.Tensor(inv_freq).to(device)) #torch.nn.NLLLoss(weight=torch.Tensor(inv_freq).to(device)) #

  best_model = None
  best_valid_acc = 0

  # Iterate on the number of epochs
  pred = []
  true = []
  for epoch in range(1, 1 + args["epochs"]):
    # Train the model with the fucntion
    print('Training...')
    loss = train(model, train_loader, device, optimizer, loss_fn)

    #Evaluate the model with the function and for the 3 sets of data
    print('Evaluating...')
    train_acc, train_iou, _, _ = eval(model, train_loader, device)
    val_acc, val_iou, y_pred, y_true = eval(model, valid_loader, device)
    test_acc, test_iou, _, _ = eval(model, test_loader, device)
    #pred.append(y_pred)
    #true.append(y_true)
    # Store the model if the validation accuracy improved
    if val_acc > best_valid_acc:
        best_valid_acc = val_acc
        best_model = copy.deepcopy(model)
    # Print the important variables for epoch
    print(f'Epoch: {epoch:02d}, '
          f'Loss: {loss:.4f}, '
          f'Train: {100 * train_acc:.2f}%, '
          f'Valid: {100 * val_acc:.2f}% '
          f'Test: {100 * test_acc:.2f}%')
    print(f'Blob IoU train: {100 * train_iou[-2]:.2f}%, '
          f'Blob IoU valid: {100 * val_iou[-2]:.2f}%, '
          f'Blob IoU test: {100 * test_iou[-2]:.2f}%, ')

Training...
Evaluating...
Epoch: 01, Loss: 1.9408, Train: 4.63%, Valid: 3.84% Test: 4.00%
Blob IoU train: 0.00%, Blob IoU valid: 0.00%, Blob IoU test: 0.00%, 
Training...
Evaluating...
Epoch: 02, Loss: 1.9336, Train: 4.63%, Valid: 3.84% Test: 4.00%
Blob IoU train: 0.00%, Blob IoU valid: 0.00%, Blob IoU test: 0.00%, 
Training...
Evaluating...
Epoch: 03, Loss: 1.9270, Train: 4.63%, Valid: 3.84% Test: 4.00%
Blob IoU train: 0.00%, Blob IoU valid: 0.00%, Blob IoU test: 0.00%, 
Training...
Evaluating...
Epoch: 04, Loss: 1.9179, Train: 4.63%, Valid: 3.84% Test: 4.00%
Blob IoU train: 0.01%, Blob IoU valid: 0.00%, Blob IoU test: 0.00%, 
Training...
Evaluating...
Epoch: 05, Loss: 1.9080, Train: 4.96%, Valid: 4.25% Test: 4.26%
Blob IoU train: 2.01%, Blob IoU valid: 2.37%, Blob IoU test: 1.63%, 
Training...
Evaluating...
Epoch: 06, Loss: 1.9047, Train: 11.26%, Valid: 10.15% Test: 10.36%
Blob IoU train: 24.35%, Blob IoU valid: 21.82%, Blob IoU test: 23.46%, 
Training...
Evaluating...
Epoch: 07, Los

In [43]:
train_acc, train_iou, _, _ = eval(best_model, train_loader, device)
valid_acc, valid_iou, _, _ = eval(best_model, valid_loader, device)
test_acc, test_iou, _, _  = eval(best_model, test_loader, device)

print(f'Best model: '
      f'Train: {100 * train_acc:.2f}%, '
      f'Valid: {100 * valid_acc:.2f}% '
      f'Test: {100 * test_acc:.2f}%')
print(f'Best model IoU blob: '
      f'Train: {100 * train_iou[-2]:.2f}%, '
      f'Valid: {100 * valid_iou[-2]:.2f}% '
      f'Test: {100 * test_iou[-2]:.2f}%')

Best model: Train: 61.09%, Valid: 61.14% Test: 62.03%
Best model IoU blob: Train: 28.01%, Valid: 25.99% Test: 28.19%
