In [8]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from torch import nn, optim
import time
from torch.utils.data import Dataset, DataLoader
import torch
from preprocess import preprocess
from preprocess import weights_to_labels
from GraphBuilder.model_loader import model_loader
from GraphBuilder.model.model import GNNmodel as myModel
from trackml.score import score_event

# Get results

This notebook used to obtain results from the GNN model and check the performance

This notebook is working from the docker

In [9]:
from trackml.dataset import load_dataset
PATH_TO_DATA = "/home/data/train_sample"

## input selection:

Generate inputs from a single file:

In [10]:
data = load_dataset(PATH_TO_DATA, parts=['hits', 'cells', 'truth', 'particles'])
event_id, hits, cells, truth, particles = next(data)
print('load event',event_id)

load event 21100


In [11]:
cells.keys()

Index(['hit_id', 'ch0', 'ch1', 'value'], dtype='object')

Filter the events if needed:

In [None]:
def getLayer(volume_id, layer_id):
    if volume_id==8:
        return layer_id//2
    elif volume_id==13:
        return layer_id//2 + 4
    elif volume_id==17:
        return layer_id//2 + 8
    else:
        return -1   

In [None]:
keys = ['hit_id','x','y','z','particle_id','volume_id','layer_id']
print('evaluate and store event',event_id,' with',hits.shape[0],'hits')
    
new_truth = truth.merge(hits[['hit_id','volume_id','layer_id']], on='hit_id').copy()

hits['layer'] = hits.apply(lambda x: getLayer(x['volume_id'],x['layer_id']), axis=1)
new_truth['layer'] = new_truth.apply(lambda x: getLayer(x['volume_id'],x['layer_id']), axis=1)

#filter event - remove noise, use only barrel, keep hits associated to tracks with more than 10 hits
new_hits = hits.merge(truth[['hit_id','particle_id']], on='hit_id').copy()
group_hits = new_hits.groupby(by=['particle_id'])
new_hits = group_hits.filter(lambda x: x['layer'].min() > 0)

group_hits = new_truth.groupby(by=['particle_id'])
new_truth = group_hits.filter(lambda x: x['layer'].min() > 0)

print('remove tracks oursize the barrel: ',new_hits.shape[0])
    
    
group_hits = new_hits.groupby(by=['particle_id'])
track_idx = group_hits.indices
new_hits = pd.concat([group_hits.get_group(pid).assign(nhits=len(idx)) for pid, idx in track_idx.items()])
new_hits = new_hits.loc[(new_hits['nhits']>7)]

group_hits = new_truth.groupby(by=['particle_id'])
track_idx = group_hits.indices
new_truth = pd.concat([group_hits.get_group(pid).assign(nhits=len(idx)) for pid, idx in track_idx.items()])
new_truth = new_truth.loc[(new_truth['nhits']>7)]

print('total hits in the file:' ,new_hits.shape[0], 'truth',new_truth.shape[0])

Run the preprocess code to obtain inputs and labels:

In [None]:
list_y, list_X, list_Is, list_hits_id, list_labels = preprocess(new_hits.copy(), np.array([-4.6,4.6]))
y, X, Is, hits_id, dummy_labels  = list_y[0], list_X[0], list_Is[0], list_hits_id[0], list_labels[0]
y = y.astype(np.float32)
#Is = Is.values

Evaluate event using the model:

In [None]:
graph_model = model_loader()
graph_model.set_model(myModel(edge_dim = 8, hidden_dim=64))
graph_model.load_weights('GraphBuilder/data/weights_gnn1.pt')

In [None]:
y_pred = graph_model.fit_predict(X, Is.values)

## plot weights according to their labels:

In [None]:
binning=dict(bins=50, range=(0,1), histtype='bar', log=True)
plt.hist(y_pred[y<0.5], label='fake', **binning, alpha=0.7)
plt.hist(y_pred[y>0.5], label='true', **binning, alpha=0.7)
plt.xlabel('Model output')
plt.legend(loc=0)
plt.show()

## Weight propogation through the event:

In [None]:
labels = weights_to_labels(X, Is, y_pred, dummy_labels, hits_id, threshold = 0.8)

In [None]:
sub = pd.DataFrame(data=np.column_stack((hits_id, labels)),
                   columns=["hit_id", "track_id"]).astype(int)
sub_truth = pd.DataFrame(data=np.column_stack((new_hits.hit_id.values,new_hits.particle_id.values)),
                   columns=["hit_id", "track_id"]).astype(int)

sub['event_id'] = event_id

Evaluate accuracy of the predictions:

In [None]:
score = score_event(new_truth, sub)
print('accuracy score = ',score)

In [None]:
X.shape

In [None]:
y.nonzero()

In [1]:
import pandas as pd
import numpy as np
edges = pd.DataFrame(data=np.column_stack(([0, 0, 1, 2, 3, 3, 4, 4], [3, 4, 4, 4, 5, 6, 6, 7],[1,0,1,1,1,0,0,1])),
              columns=["index_1", "index_2", "weight"])
X = np.random.rand(8,5)
hit_id = np.arange(1,9).astype(np.int32)
labels_test = np.arange(1,9)
Is = edges[["index_1", "index_2"]]
y_pred = np.array([0.9,0.7,0.9,0.8,0.7,0.55,0.2,0.6])

In [2]:
from preprocess import weights_to_labels
labels_test = weights_to_labels(X, Is, y_pred, labels_test, hit_id, threshold = 0.6)

In [3]:
labels_test

array([1, 2, 2, 1, 1, 1, 7, 2])

In [None]:
Is

In [4]:
edges

Unnamed: 0,index_1,index_2,weight
0,0,3,1
1,0,4,0
2,1,4,1
3,2,4,1
4,3,5,1
5,3,6,0
6,4,6,0
7,4,7,1


In [7]:
edges.loc[[4,4,5]]

Unnamed: 0,index_1,index_2,weight
4,3,5,1
4,3,5,1
5,3,6,0
