In [1]:
import comet_ml
from comet_ml import Experiment

import pandas as pd
import numpy as np
from collections import deque
from collections import defaultdict

import torch
import torch.nn as nn
import click
from nets import GraphNN_KNN_v3, EdgeClassifier_v3
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve, accuracy_score, average_precision_score
from torch_geometric.data import DataLoader
from preprocessing import preprocess_dataset
from utils import RunningAverageMeter, plot_aucs
from tqdm import tqdm
import networkx as nx

from st_library_clustering_metrics import class_disbalance, class_disbalance__
from st_library_clustering_metrics import estimate_start_xyz, estimate_txty
from sklearn.linear_model import TheilSenRegressor, LinearRegression, HuberRegressor, RANSACRegressor
from sklearn.model_selection import cross_val_predict

import sys
from random import seed
from random import randrange

from sklearn.cluster import MiniBatchKMeans
import hdbscan

from loss import FocalLoss

from time import time

In [None]:
class EarlyStopping_:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, embeder, classifier, experiment_key):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, embeder, classifier, experiment_key)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, embeder, classifier, experiment_key)
            self.counter = 0

    def save_checkpoint(self, val_loss, embeder, classifier, experiment_key):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(embeder.state_dict(), "graph_embedder_{}.pt".format(experiment_key))
        torch.save(classifier.state_dict(), "edge_classifier_{}.pt".format(experiment_key))
        self.val_loss_min = val_loss



In [9]:
def str_to_class(classname: str):
    """
    Function to get class object by its name signature
    :param classname: str
        name of the class
    :return: class object with the same name signature as classname
    """
    return getattr(sys.modules[__name__], classname)


def predict_one_shower(shower, graph_embedder, edge_classifier):
    # TODO: batch training
    embeddings = graph_embedder(shower)
    edge_labels_true = (shower.y[shower.edge_index[0]] == shower.y[shower.edge_index[1]]).view(-1)
    edge_data = torch.cat([
        embeddings[shower.edge_index[0]],
        embeddings[shower.edge_index[1]]
    ], dim=1)
    edge_labels_predicted = edge_classifier(edge_data).view(-1)

    return edge_labels_true, edge_labels_predicted

In [4]:
def preprocess_torch_shower_to_nx(shower, graph_embedder, edge_classifier, threshold=0.5):
    node_id = 0
    G = nx.DiGraph()
    nodes_to_add = []
    showers_data = []
    y = shower.y.cpu().detach().numpy()
    x = shower.x.cpu().detach().numpy()
    y_torch = shower.y
    for shower_id in tqdm(np.unique(y)):
        shower_data = shower.shower_data[y_torch == shower_id].unique(dim=0).detach().cpu().numpy()[0]
        #numtracks\tsignal\tele_P\tele_SX\tele_SY\tele_SZ\tele_TX\tele_TY"
        showers_data.append(
            {
                'numtracks': shower_data[0],
                'signal': shower_id,
                'ele_P': shower_data[2],
                'ele_SX': shower_data[3],
                'ele_SY': shower_data[4],
                'ele_SZ': shower_data[5],
                'ele_TX': shower_data[6],
                'ele_TY': shower_data[7]
            }
        )
    print(len(showers_data))
    for k in range(len(y)):
        nodes_to_add.append(
            (
                node_id,
                {
                    'features': {
                        'SX': x[k, 0],
                        'SY': x[k, 1],
                        'SZ': x[k, 2],
                        'TX': x[k, 3],
                        'TY': x[k, 4],
                    },
                    'signal': y[k]
                }
            )
        )
        node_id += 1

    edges_to_add = []
    _, weights = predict_one_shower(shower, graph_embedder=graph_embedder, edge_classifier=edge_classifier)
    weights = weights.detach().cpu().numpy()
    
    edge_index = shower.edge_index.t().detach().cpu().numpy()
    edge_index = edge_index[weights > threshold]
    weights = weights[weights > threshold]
    weights = -np.log(weights) # TODO: which transformation to use?
    print(len(weights))
    for k, (p0, p1) in enumerate(edge_index):
        edges_to_add.append((p0, p1, weights[k]))

    G.add_nodes_from(nodes_to_add)
    G.add_weighted_edges_from(edges_to_add)

    G.graph['showers_data'] = showers_data
    return G



## Preprocessing

In [6]:
datafile='train1.pt'

project_name='em-showers-network-training'
work_space='ketrint'

experiment = Experiment('6O55PoJt4tkp9LyupIE86eikH', project_name=project_name, workspace=work_space)
device = torch.device('cuda')

showers = preprocess_dataset(datafile)

k = showers[0].x.shape[1]
print(k)

10


In [10]:
epochs=10
learning_rate=1e-3
dim_out=10
threshold =0.9

graph_embedder='GraphNN_KNN_v3'
edge_classifier='EdgeClassifier_v3'

In [13]:
showers

[Data(edge_attr=[4574767, 1], edge_index=[2, 4574767], mask=[58, 4574767], pos=[103926, 5], shower_data=[200, 8], x=[103926, 10], y=[103926]),
 Data(edge_attr=[4806582, 1], edge_index=[2, 4806582], mask=[58, 4806582], pos=[108505, 5], shower_data=[200, 8], x=[108505, 10], y=[108505]),
 Data(edge_attr=[4096292, 1], edge_index=[2, 4096292], mask=[58, 4096292], pos=[104220, 5], shower_data=[200, 8], x=[104220, 10], y=[104220])]

In [14]:
showers_train, showers_test = train_test_split(showers, random_state=1337)

train_loader = DataLoader(showers_train, batch_size=1, shuffle=True)
test_loader = DataLoader(showers_test, batch_size=1, shuffle=True)

In [16]:
graph_embedder = str_to_class(graph_embedder)(dim_out=dim_out, k=k).to(device)
edge_classifier = str_to_class(edge_classifier)(dim_out=dim_out).to(device)

In [17]:
criterion = FocalLoss(gamma=2.)
optimizer = torch.optim.Adam(list(graph_embedder.parameters()) + list(edge_classifier.parameters()),
                             lr=learning_rate)

loss_train = RunningAverageMeter()
loss_test = RunningAverageMeter()
roc_auc_test = RunningAverageMeter()
pr_auc_test = RunningAverageMeter()
acc_test = RunningAverageMeter()
class_disbalance = RunningAverageMeter()

experiment = Experiment('6O55PoJt4tkp9LyupIE86eikH', project_name=project_name, workspace=work_space)
early_stopping = EarlyStopping_(patience=100, verbose=True)

In [18]:
for _ in tqdm(range(epochs)):
    for shower in train_loader:
            shower = shower.to(device)
            edge_labels_true, edge_labels_predicted = predict_one_shower(shower,
                                                                         graph_embedder=graph_embedder,
                                                                         edge_classifier=edge_classifier)
            # calculate the batch loss
            loss = criterion(edge_labels_predicted, edge_labels_true.float())
            # Zero gradients, perform a backward pass, and update the weights.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_train.update(loss.item())
            class_disbalance.update((edge_labels_true.sum().float() / len(edge_labels_true)).item())

    y_true_list = []
    y_pred_list = []
    for shower in test_loader:
            shower = shower.to(device)
            edge_labels_true, edge_labels_predicted = predict_one_shower(shower,
                                                                         graph_embedder=graph_embedder,
                                                                         edge_classifier=edge_classifier)

            # calculate the batch loss
            loss = criterion(edge_labels_predicted, edge_labels_true.float())
            y_true, y_pred = edge_labels_true.detach().cpu().numpy(), edge_labels_predicted.detach().cpu().numpy()
            y_true_list.append(y_true)
            y_pred_list.append(y_pred)
            acc = accuracy_score(y_true, y_pred.round())
            roc_auc = roc_auc_score(y_true, y_pred)
            pr_auc = average_precision_score(y_true, y_pred)
            loss_test.update(loss.item())
            acc_test.update(acc)
            roc_auc_test.update(roc_auc)
            pr_auc_test.update(pr_auc)
            class_disbalance.update((edge_labels_true.sum().float() / len(edge_labels_true)).item())


    
    experiment_key = experiment.get_key()

    eval_loss = loss_test.val
    early_stopping(eval_loss, graph_embedder, edge_classifier, experiment_key)


    if early_stopping.early_stop:	
        print("Early stopping")
        break

    experiment.log_metric('loss_test', loss_test.val)	
    experiment.log_metric('acc_test', acc_test.val)
    experiment.log_metric('roc_auc_test', roc_auc_test.val)
    experiment.log_metric('pr_auc_test', pr_auc_test.val)
    experiment.log_metric('class_disbalance', class_disbalance.val)

    y_true = np.concatenate(y_true_list)
    y_pred = np.concatenate(y_pred_list)

# load the last checkpoint with the best model
	
graph_embedder.load_state_dict(torch.load("graph_embedder_{}.pt".format(experiment_key)))
edge_classifier.load_state_dict(torch.load("edge_classifier_{}.pt".format(experiment_key)))

  0%|          | 0/10 [00:00<?, ?it/s]

torch.Size([103926, 10]) torch.Size([2, 4574767]) torch.Size([4574767, 1])
torch.Size([4574767, 10]) 4574767
torch.Size([103926, 10]) torch.Size([2, 4574767]) torch.Size([4574767, 1])
torch.Size([4574767, 10]) 4574767
torch.Size([103926, 10]) torch.Size([2, 4574767]) torch.Size([4574767, 1])
torch.Size([4574767, 10]) 4574767
torch.Size([108505, 10]) torch.Size([2, 4806582]) torch.Size([4806582, 1])
torch.Size([4806582, 10]) 4806582
torch.Size([108505, 10]) torch.Size([2, 4806582]) torch.Size([4806582, 1])
torch.Size([4806582, 10]) 4806582
torch.Size([108505, 10]) torch.Size([2, 4806582]) torch.Size([4806582, 1])
torch.Size([4806582, 10]) 4806582


RuntimeError: CUDA out of memory. Tried to allocate 552.00 MiB (GPU 0; 11.17 GiB total capacity; 5.84 GiB already allocated; 488.81 MiB free; 6.39 GiB reserved in total by PyTorch)