In [None]:
# imports
import numpy as np
import pandas as pd
import torch
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse
from torch_geometric.utils import to_networkx
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import torch_geometric
import networkx as nx
plt.style.use("dark_paper")
def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(out.detach().cpu().numpy())

    fig,ax = plt.subplots(1,1,figsize=(10,10))
    plt.xticks([])
    plt.yticks([])
    plt.scatter(z[:, 0], z[:, 1], s=70, c=color,cmap="Dark2")
    return fig,ax

In [None]:
def plot_graph(h, color, epoch=None, loss=None):
    fig,ax = plt.subplots(1,1,figsize=(7,7))
    plt.xticks([])
    plt.yticks([])

    if torch.is_tensor(h):
        h = h.detach().cpu().numpy()
        plt.scatter(h[:, 0], h[:, 1], s=140, c=color, cmap="Set2")
        if epoch is not None and loss is not None:
            plt.xlabel(f'Epoch: {epoch}, Loss: {loss.item():.4f}', fontsize=16)
    else:
        nx.draw_networkx(G, pos=nx.spring_layout(G, seed=42), with_labels=False,
                         node_color=color,edge_color="w", cmap="Set2")
    return fig,ax
def _process(data): 
    x=data.loc[:,["E","px","py","pz"]].to_numpy()
    y=data.loc[:,["mask"]].to_numpy()
    data = Data(x=torch.from_numpy(x).float(),
        y=torch.from_numpy(y).float(),
        #edge_index=dense_to_sparse(torch.ones(x.shape[0],x.shape[0]).fill_diagonal_(0))[0]#complete
        edge_index=dense_to_sparse(torch.eye(x.shape[0]))[0]#self only
        #edge_index=dense_to_sparse(torch.ones(x.shape[0],x.shape[0]))[0]#complete graph
        ) 
    return data

In [None]:
train_set = [_process(pd.read_hdf("data/W/evts.h5", key=f"evt{i}")) for i in range(1,2000)] +\
[_process(pd.read_hdf("data/QCD/evts_qcd.h5", key=f"evt{i}")) for i in range(1,2000)]
val_set = [_process(pd.read_hdf("data/W/evts.h5", key=f"evt{i}")) for i in range(2000,2201)]+\
[_process(pd.read_hdf("data/QCD/evts_qcd.h5", key=f"evt{i}")) for i in range(2000,2201)]

In [4]:
data = train_set[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Contains isolated nodes: {data.contains_isolated_nodes()}')
print(f'Contains self-loops: {data.contains_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Data(edge_index=[2, 302], x=[302, 4], y=[302, 1])
Number of nodes: 302
Number of edges: 302
Average node degree: 1.00
Contains isolated nodes: True
Contains self-loops: True
Is undirected: True


In [44]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
#         self.conv1 = torch_geometric.nn.GCNConv(4, hidden_channels)
#         self.conv2 = torch_geometric.nn.GCNConv(hidden_channels, hidden_channels)
#         self.conv3 = torch_geometric.nn.GCNConv(hidden_channels, 1)
        self.mlp1 = torch.nn.Sequential(torch.nn.Linear(4*2,hidden_channels))
        self.mlp2 = torch.nn.Sequential(torch.nn.Linear(hidden_channels*2,hidden_channels))
        self.mlp3 = torch.nn.Sequential(torch.nn.Linear(hidden_channels*2,hidden_channels))
        self.mlp4 = torch.nn.Sequential(torch.nn.Linear(hidden_channels*2,hidden_channels))        
        self.mlp5 = torch.nn.Sequential(torch.nn.Linear(hidden_channels*2,1))
        self.conv1 = torch_geometric.nn.DynamicEdgeConv(self.mlp1,k=15,aggr="max")
        self.conv2 = torch_geometric.nn.DynamicEdgeConv(self.mlp2,k=5,aggr="max")
        self.conv3 = torch_geometric.nn.DynamicEdgeConv(self.mlp3,k=5,aggr="max")
        self.conv4 = torch_geometric.nn.DynamicEdgeConv(self.mlp4,k=5,aggr="max")
        self.conv5 = torch_geometric.nn.DynamicEdgeConv(self.mlp5,k=5,aggr="max")
        self.batchnorm = torch.nn.BatchNorm1d(hidden_channels)
    def forward(self, x, edge_index):
#         x = self.conv1(x, edge_index).relu()
#         #x = self.batchnorm(x)
#         x = self.conv2(x, edge_index).relu()
#         #x = F.dropout(x, p=0.5, training=self.training)
#         x = self.conv3(x, edge_index)
        x = self.conv1(x).relu()
        x = self.conv2(x).relu()
        x = self.conv3(x).relu()
        x = self.conv4(x).relu()
        x = self.conv5(x)
        return x

# Complete graph 

In [51]:
targets = torch.from_numpy(np.concatenate([data.y.tolist() for data in train_set]))
ones_weight = targets.sum().item()/len(targets)
weight = 1/torch.tensor([1-ones_weight,ones_weight])/2

In [52]:
def get_weights(targets):
    weight_ = weight[targets.data.view(-1).long()].view_as(targets)
    weight_ = weight_.to(targets.device)
    return weight_
    
def train(data,weighted=True):
    gcn.train()
    weight_ = get_weights(data.y)
    out = gcn(data.x,data.edge_index)#[data.train_mask]
    out = torch.sigmoid(out)
    loss = criterion(out,
                     data.y#[data.train_mask]
                    )
    if weighted:
        loss = (loss*weight_).mean()
    else:
        loss = loss.mean()
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    return loss.item()

In [53]:
def val(data,weighted=True):
    gcn.eval()
    weight_ = get_weights(data.y)
    with torch.no_grad():
        out = gcn(data.x,data.edge_index)
        out = torch.sigmoid(out)
        loss = criterion(out,data.y)
        if weighted:
            loss = (loss*weight_).mean()
        else:
            loss = loss.mean()
    return loss.item()

In [57]:
#single graph
gcn = GCN(512)
gcn.to(device)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(gcn.parameters(),weight_decay=5e-4,lr=1e-4)
epochs = 500
for epoch in range(1,epochs):
    loss = train(train_set[0].to(device))
    if epoch%50==1:
        print(f"Epoch {epoch:^3d}/{epochs} | Loss {loss:^4.4f}")

Epoch  1 /500 | Loss 0.8895
Epoch 51 /500 | Loss 0.4374
Epoch 101/500 | Loss 0.2438
Epoch 151/500 | Loss 0.1079
Epoch 201/500 | Loss 0.0757
Epoch 251/500 | Loss 0.0471
Epoch 301/500 | Loss 0.0292
Epoch 351/500 | Loss 0.0173
Epoch 401/500 | Loss 0.0116
Epoch 451/500 | Loss 0.0073


In [None]:
gcn

In [58]:
gcn = GCN(512)
gcn.to(device)
criterion = torch.nn.BCELoss(reduction="none")
optimizer = torch.optim.Adam(gcn.parameters(),weight_decay=5e-4,lr=1e-4)
epochs = 500
samples = 200
losses = []
val_losses = []
for epoch in range(1,epochs):
    loss = 0
    for data in train_set[:samples]:
        loss += train(data.to(device))
    loss /= samples
    losses.append(loss)
    if epoch%50==1:
        loss_val = 0 
        for data in val_set[:samples]:
            loss_val+= val(data.to(device))
        loss_val /= samples
        val_losses.append(loss_val)
        print(f"Epoch {epoch:^3d}/{epochs} | Loss {loss:^4.4f}| Val loss {loss_val:^4.4f}")

Epoch  1 /500 | Loss 0.3901| Val loss 0.3849
Epoch 51 /500 | Loss 0.3030| Val loss 0.4026
Epoch 101/500 | Loss 0.2076| Val loss 0.6491
Epoch 151/500 | Loss 0.0929| Val loss 1.2550
Epoch 201/500 | Loss 0.0370| Val loss 1.8134
Epoch 251/500 | Loss 0.0176| Val loss 1.9532
Epoch 301/500 | Loss 0.0323| Val loss 1.5949
Epoch 351/500 | Loss 0.0521| Val loss 1.3552
Epoch 401/500 | Loss 0.0198| Val loss 1.6480
Epoch 451/500 | Loss 0.0153| Val loss 1.5032


In [59]:
pred = (np.concatenate([torch.sigmoid(gcn(data.x,data.edge_index)).detach().flatten().tolist() 
                        for data in train_set[:10]]
                      )>.5).astype(int)
target = np.concatenate([data.y.flatten().tolist() for data in train_set[:10]])
print(classification_report(target,pred))

              precision    recall  f1-score   support

         0.0       1.00      0.99      1.00      3406
         1.0       0.95      1.00      0.97       414

    accuracy                           0.99      3820
   macro avg       0.97      1.00      0.98      3820
weighted avg       0.99      0.99      0.99      3820



In [60]:
pred = (np.concatenate([torch.sigmoid(gcn(data.x,data.edge_index)).detach().flatten().tolist() 
                        for data in val_set[:20]]
                      )>.5).astype(int)
target = np.concatenate([data.y.flatten().tolist() for data in val_set[:20]])
print(classification_report(target,pred))

              precision    recall  f1-score   support

         0.0       0.92      0.93      0.92     37386
         1.0       0.23      0.22      0.22      3706

    accuracy                           0.86     41092
   macro avg       0.58      0.57      0.57     41092
weighted avg       0.86      0.86      0.86     41092



In [61]:
confusion_matrix(target,pred,)

array([[34657,  2729],
       [ 2900,   806]])

# Pt cloud

In [32]:
class eye_GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        torch.manual_seed(12345)
        self.conv1 = torch_geometric.nn.GCNConv(4, hidden_channels,add_self_loops=False)
        self.conv2 = torch_geometric.nn.GCNConv(hidden_channels, hidden_channels,add_self_loops=False)
        self.conv3 = torch_geometric.nn.GCNConv(hidden_channels, hidden_channels,add_self_loops=False)
        self.conv4 = torch_geometric.nn.GCNConv(hidden_channels, hidden_channels,add_self_loops=False)
        self.conv5 = torch_geometric.nn.GCNConv(hidden_channels, 1,add_self_loops=False)
        self.batchnorm = torch.nn.BatchNorm1d(hidden_channels)
    def forward(self, x, edge_index):
#         x = self.conv1(x, edge_index).relu()
#         #x = self.batchnorm(x)
#         x = self.conv2(x, edge_index).relu()
#         #x = F.dropout(x, p=0.5, training=self.training)
#         x = self.conv3(x, edge_index)
        x = self.conv1(x,edge_index).relu()
        x = self.conv2(x,edge_index).relu()
        x = self.conv3(x,edge_index).relu()
        x = self.conv4(x,edge_index).relu()
        x = self.conv5(x,edge_index)
        return x

In [36]:
#single graph
gcn = eye_GCN(256)
gcn.to(device)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(gcn.parameters(),weight_decay=5e-4,lr=1e-2)
epochs = 500
for epoch in range(1,epochs):
    loss = train(train_set[0])
    if epoch%50==1:
        print(f"Epoch {epoch:^3d}/{epochs} | Loss {loss:^4.4f}")

Epoch  1 /500 | Loss 0.8501
Epoch 51 /500 | Loss 0.2819
Epoch 101/500 | Loss 0.1226
Epoch 151/500 | Loss 0.0663
Epoch 201/500 | Loss 1.2177
Epoch 251/500 | Loss 0.1757
Epoch 301/500 | Loss 0.1056
Epoch 351/500 | Loss 0.1041
Epoch 401/500 | Loss 0.0778
Epoch 451/500 | Loss 0.0296


In [41]:
gcn = eye_GCN(512)
gcn.to(device)
criterion = torch.nn.BCELoss(reduction="none")
optimizer = torch.optim.Adam(gcn.parameters(),weight_decay=5e-4,lr=1e-3)
epochs = 1500
samples = 2000
losses = []
val_losses = []
for epoch in range(1,epochs):
    loss = 0
    for data in train_set[:samples]:
        loss += train(data.to(device))
    loss /= samples
    losses.append(loss)
    if epoch%50==1:
        loss_val = 0 
        for data in val_set[:samples]:
            loss_val+= val(data.to(device))
        loss_val /= samples
        val_losses.append(loss_val)
        print(f"Epoch {epoch:^3d}/{epochs} | Loss {loss:^4.4f}| Val loss {loss_val:^4.4f}")

Epoch  1 /1500 | Loss 0.0603| Val loss 0.0377
Epoch 51 /1500 | Loss 0.0318| Val loss 0.0328
Epoch 101/1500 | Loss 0.0315| Val loss 0.0325
Epoch 151/1500 | Loss 0.0316| Val loss 0.0332
Epoch 201/1500 | Loss 0.0318| Val loss 0.0332
Epoch 451/1500 | Loss 0.0310| Val loss 0.0325
Epoch 501/1500 | Loss 0.0311| Val loss 0.0325
Epoch 551/1500 | Loss 0.0312| Val loss 0.0326
Epoch 601/1500 | Loss 0.0310| Val loss 0.0325
Epoch 651/1500 | Loss 0.0313| Val loss 0.0326
Epoch 701/1500 | Loss 0.0310| Val loss 0.0326
Epoch 751/1500 | Loss 0.0312| Val loss 0.0325
Epoch 801/1500 | Loss 0.0311| Val loss 0.0331
Epoch 851/1500 | Loss 0.0310| Val loss 0.0325
Epoch 901/1500 | Loss 0.0309| Val loss 0.0326
Epoch 951/1500 | Loss 0.0308| Val loss 0.0328
Epoch 1001/1500 | Loss 0.0309| Val loss 0.0324
Epoch 1051/1500 | Loss 0.0310| Val loss 0.0329
Epoch 1101/1500 | Loss 0.0308| Val loss 0.0331
Epoch 1151/1500 | Loss 0.0308| Val loss 0.0328
Epoch 1201/1500 | Loss 0.0313| Val loss 0.0327
Epoch 1251/1500 | Loss 0.0308

In [42]:
pred = (np.concatenate([torch.sigmoid(gcn(data.x,data.edge_index)).detach().flatten().tolist() 
                        for data in val_set[:100]]
                      )>.5).astype(int)
target = np.concatenate([data.y.flatten().tolist() for data in val_set[:100]])
print(classification_report(target,pred))

              precision    recall  f1-score   support

         0.0       0.96      0.49      0.65     37386
         1.0       0.13      0.77      0.22      3706

    accuracy                           0.52     41092
   macro avg       0.54      0.63      0.44     41092
weighted avg       0.88      0.52      0.61     41092



In [43]:
confusion_matrix(target,pred,)

array([[18500, 18886],
       [  854,  2852]])

# Old
trained for 1500 epochs without weights

In [82]:
epochs = 1000
for epoch in range(1,epochs):
    for data in train_set:
        loss = train(data)
    if epoch%50==1:
        print(f"Epoch {epoch:^3d}/{epochs} | Loss {loss:^4.4f}")

Epoch  1 /1000 | Loss 0.2497
Epoch 51 /1000 | Loss 0.2832
Epoch 101/1000 | Loss 0.2369
Epoch 151/1000 | Loss 0.3024
Epoch 201/1000 | Loss 0.3346
Epoch 251/1000 | Loss 0.2763
Epoch 301/1000 | Loss 0.1760
Epoch 351/1000 | Loss 0.1935
Epoch 401/1000 | Loss 0.1592
Epoch 451/1000 | Loss 0.1584
Epoch 501/1000 | Loss 0.1971
Epoch 551/1000 | Loss 0.1928
Epoch 601/1000 | Loss 0.1564
Epoch 651/1000 | Loss 0.1390
Epoch 701/1000 | Loss 0.1664
Epoch 751/1000 | Loss 0.1535
Epoch 801/1000 | Loss 0.2994
Epoch 851/1000 | Loss 0.2604
Epoch 901/1000 | Loss 0.2284
Epoch 951/1000 | Loss 0.2221


In [87]:
data = train_set[8]
print(*zip(torch.sigmoid(gcn(data.x,data.edge_index)).detach().flatten().tolist(),
      data.y.flatten().tolist()), sep="\n")

(0.1462511122226715, 0.0)
(2.850352454650726e-24, 0.0)
(0.991333544254303, 1.0)
(0.8665170669555664, 1.0)
(0.8194782733917236, 1.0)
(0.4785202741622925, 1.0)
(0.6124110221862793, 1.0)
(0.42588767409324646, 1.0)
(0.9294651746749878, 1.0)
(0.9990184307098389, 1.0)
(0.7726152539253235, 1.0)
(0.13509753346443176, 1.0)
(0.9999765157699585, 1.0)
(0.21755670011043549, 1.0)
(0.1605711728334427, 0.0)
(0.006053730845451355, 0.0)
(2.066202343547696e-29, 0.0)
(0.06368965655565262, 0.0)
(0.023447738960385323, 0.0)
(4.5762932797345415e-15, 0.0)
(0.13979420065879822, 0.0)
(0.00016625090211164206, 0.0)
(0.01608888804912567, 0.0)
(0.21124699711799622, 0.0)
(0.23464731872081757, 0.0)
(0.15603823959827423, 0.0)
(0.051417045295238495, 0.0)
(0.0005795211181975901, 0.0)
(8.600208219189387e-12, 0.0)
(0.0, 0.0)
(0.0, 0.0)
(0.0, 0.0)
(0.0024768030270934105, 0.0)
(0.001288735307753086, 0.0)
(6.665423361185767e-17, 0.0)
(6.021772801492505e-26, 0.0)
(0.0, 0.0)
(0.0, 0.0)
(0.0, 0.0)
(0.0, 0.0)
(0.0, 0.0)
(0.0, 0.0

In [96]:
pred = (np.concatenate([torch.sigmoid(gcn(data.x,data.edge_index)).detach().flatten().tolist() 
                        for data in train_set[:100]]
                      )>.5).astype(int)
target = np.concatenate([data.y.flatten().tolist() for data in train_set[:100]])
print(classification_report(target,pred))

              precision    recall  f1-score   support

         0.0       0.92      0.97      0.94     35914
         1.0       0.34      0.17      0.23      3688

    accuracy                           0.89     39602
   macro avg       0.63      0.57      0.58     39602
weighted avg       0.87      0.89      0.88     39602



In [94]:
pred = (np.concatenate([torch.sigmoid(gcn(data.x,data.edge_index)).detach().flatten().tolist() 
                        for data in val_set[:2]]
                      )>.5).astype(int)
target = np.concatenate([data.y.flatten().tolist() for data in val_set[:2]])
print(classification_report(target,pred))

              precision    recall  f1-score   support

         0.0       0.89      0.97      0.93       536
         1.0       0.10      0.03      0.05        67

    accuracy                           0.86       603
   macro avg       0.49      0.50      0.49       603
weighted avg       0.80      0.86      0.83       603

