First attempt of graph network. Running on CPU.

In [1]:
import numpy as np
import itertools
import os, sys

import torch
import torch.nn as nn
import torch.nn.functional  as F

from progressBar import ProgressBar

## Get the dataset

In [2]:
from torch.utils import data

In [3]:
class Dataset(data.Dataset):
    def __init__(self, names = 'SM', 
                 template='../data/20190621_5part_PtOrder/{}_lepFilter_13TeV.npy'):
        if names == 'SM':
            self.SM_names = ['Wlnu', 'qcd', 'ttbar']
            names = self.SM_names
        self.names = names
        
        for i, n in enumerate(names):
            ins = np.load(template.format(n)).astype(np.float32)
            out = i*np.ones(ins.shape[0]).astype(np.int8)
            
            print(n, ':', str(ins.shape[0]))
            
            if i == 0:
                self.inputs = ins
                self.outputs = out
            else:
                self.inputs = np.concatenate((self.inputs, ins))
                self.outputs = np.concatenate((self.outputs, out))
    
    def __len__(self):
        return self.inputs.shape[0]
    
    def __getitem__(self, idx):
        return self.inputs[idx], self.outputs[idx]

In [4]:
dataset = {}
# Use to do validation split
# torch.utils.data.random_split(dataset, lengths)
dataset['train'] = Dataset()

Wlnu : 10000
qcd : 10000
ttbar : 10000


## Define the model

In [5]:
class ClfGraphNet(nn.Module):
    def __init__(self, N_nodes, N_features, dim_hidden, De, Do, dim_output, verbose = False):
        super(ClfGraphNet, self).__init__()
        self.verbose = verbose
        
        self.p = N_features
        self.No = N_nodes
        self.De = De
        self.Do = Do
        
        self.Rr, self.Rs = self.buildEdgesMatrixes()
        self.fr = self.build_fr(dim_hidden)
        self.fo = self.build_fo(dim_hidden)
        self.PhiC = self.build_PhiC(dim_output)
        
        self.onGPU = False
        
        
    def buildEdgesMatrixes(self):
        ### Assume fully connected graph
        Ne = self.No * (self.No - 1)
        self.Ne = Ne
        Rr = torch.zeros(self.No, Ne)
        Rs = torch.zeros(self.No, Ne)
        receiver_sender_list = [i for i in itertools.product(range(self.No), range(self.No)) if i[0]!=i[1]]
        for i, (r, s) in enumerate(receiver_sender_list):
            Rr[r, i] = 1
            Rs[s, i] = 1
        return Rr, Rs
            
    def build_fr(self, dim_hidden):
        fr = nn.Sequential(
                            nn.Linear(2 * self.p, dim_hidden),
                            nn.ReLU(),
                            nn.Linear(dim_hidden, int(dim_hidden/2)),
                            nn.ReLU(),
                            nn.Linear(int(dim_hidden/2), self.De),
                            nn.ReLU(),
                          )
        return fr
        
    def build_fo(self, dim_hidden):
        fo = nn.Sequential(
                            nn.Linear(self.p + self.De, dim_hidden),
                            nn.ReLU(),
                            nn.Linear(dim_hidden, int(dim_hidden/2)),
                            nn.ReLU(),
                            nn.Linear(int(dim_hidden/2), self.Do),
                            nn.ReLU(),
                          )    
        return fo

    def build_PhiC(self, dim_output):
        self.PhiC_layer = nn.Linear(self.Do, dim_output)
        
        def PhiC(x):
            return F.sigmoid(self.PhiC_layer(x))
        
        return PhiC
         
    def INlayer(self, x):
        x = torch.transpose(x, 1, 2).contiguous()
        Orr = torch.matmul(x, self.Rr)
        Ors = torch.matmul(x, self.Rs)
        B = torch.cat([Orr, Ors], dim=1)
        
        ### First MLP ###
        E = self.fr(B.view(-1, 2 * self.p))
        del B
        
        E = E.view(-1, self.Ne, self.De)
        E = torch.transpose(E, 1, 2).contiguous()
        Ebar = torch.matmul(E, torch.transpose(self.Rr, 0, 1).contiguous())
        del E
        
        C = torch.cat([x, Ebar], 1)
        del Ebar
        
        C = torch.transpose(C, 1, 2).contiguous()
        O = self.fo(C.view(-1, self.p + self.De))
        del C
        O = torch.sigmoid(O.view(-1, self.No, self.Do))
        return O
        
    def forward(self, x):
        O = self.INlayer(x)
        
        # now sum over No nodes to obtain the Do latent quantities
        O = torch.sum(O, 1)
        y = self.PhiC(O)
        return y
    
    def useGPU(self):
        if torch.cuda.is_available():
            print('Current device: {} ({} available)'.format(torch.cuda.current_device(), 
                                                             torch.cuda.device_count()))
            
            for o in self.__dict__.values():
                if o.__class__ == torch.Tensor:
                    o.cuda()
            
            self.onGPU = True
        else: 
            print('GPU not available')
            

In [6]:
# Does not work for the moment
# def weights_init(M):
#     nn.init.xavier_normal(M.weight.data)

In [7]:
model = ClfGraphNet(N_nodes=dataset['train'].inputs.shape[1],
                    N_features=dataset['train'].inputs.shape[2],
                    dim_hidden=10,
                    De=3,
                    Do=7,
                    dim_output=len(dataset['train'].names)
                   )
print(model)

ClfGraphNet(
  (fr): Sequential(
    (0): Linear(in_features=10, out_features=10, bias=True)
    (1): ReLU()
    (2): Linear(in_features=10, out_features=5, bias=True)
    (3): ReLU()
    (4): Linear(in_features=5, out_features=3, bias=True)
    (5): ReLU()
  )
  (fo): Sequential(
    (0): Linear(in_features=8, out_features=10, bias=True)
    (1): ReLU()
    (2): Linear(in_features=10, out_features=5, bias=True)
    (3): ReLU()
    (4): Linear(in_features=5, out_features=7, bias=True)
    (5): ReLU()
  )
  (PhiC_layer): Linear(in_features=7, out_features=3, bias=True)
)


In [8]:
trainablePars = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainablePars)

394


In [9]:
# print(model.fr_layers[0].weight)
# model.apply(weights_init)
# print(model.fr_layers[0].weight)

## Loss

In [10]:
criterion = nn.CrossEntropyLoss()

## Training

In [11]:
training_params = {'batch_size': 64,
                   'shuffle': True,
                   'num_workers': 0
                  }

max_epochs = 100

In [12]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

In [13]:
optimizer.zero_grad()
data_train_iter = data.DataLoader(dataset['train'], **training_params)

train_loss = []
for epoch in range(max_epochs):
    batch_loss = []
    pb = ProgressBar(len(data_train_iter), percentPrecision=5, headLabel='Ep {} '.format(epoch))
    for local_x, local_y in data_train_iter:
        if model.onGPU:
            local_x = local_x.cuda()
            local_y = local_y.cuda()
        
        y_pred = model(local_x)
        loss = criterion(y_pred, local_y.long())
        batch_loss.append(loss.item())
        
        tail_label = 'Loss: {:2.2e}'.format(loss.item())
        if len(train_loss) > 0:
            tail_label += ' ({:2.2e})'.format(loss.item() - train_loss[-1][-1])
        pb.show(len(batch_loss)-1, tail_label=tail_label)
        loss.backward()
        optimizer.step()

    printout = 'Loss: {:2.2e}'.format(batch_loss[-1])
    if len(train_loss) > 0:
        printout += ' - Delta: {:2.2e}'.format(batch_loss[-1] - train_loss[-1][-1])
    if len(train_loss) > 1:
        d2L_de2 = batch_loss[-1] - 2*train_loss[-1][-1] + train_loss[-2][-1]
        printout +=' - Trend: {:2.2e}'.format(d2L_de2)
    print(printout+'\n')
    train_loss.append(batch_loss)

Ep 0 [#-------------------]  9% - ETA:    1 s   Loss: 1.14e+00



Ep 0 [####################]  100% - Tot. time: 1.3 s            
Loss: 1.08e+00
Ep 1 [####################]  100% - Tot. time: 1.3 s                        
Loss: 1.06e+00 - Delta: -2.37e-02
Ep 2 [####################]  100% - Tot. time: 1.2 s                        
Loss: 1.01e+00 - Delta: -4.72e-02 - Trend: -2.35e-02
Ep 3 [####################]  100% - Tot. time: 1.2 s                        
Loss: 1.01e+00 - Delta: -1.90e-03 - Trend: 4.53e-02
Ep 4 [####################]  100% - Tot. time: 1.2 s                        
Loss: 9.74e-01 - Delta: -3.85e-02 - Trend: -3.66e-02
Ep 5 [####################]  100% - Tot. time: 1.4 s                        
Loss: 9.63e-01 - Delta: -1.01e-02 - Trend: 2.84e-02
Ep 6 [####################]  100% - Tot. time: 1.3 s                        
Loss: 9.63e-01 - Delta: -3.20e-04 - Trend: 9.81e-03
Ep 7 [####################]  100% - Tot. time: 1.3 s                        
Loss: 9.02e-01 - Delta: -6.14e-02 - Trend: -6.11e-02
Ep 8 [####################]  100

Ep 64 [####################]  100% - Tot. time: 1.3 s                        
Loss: 8.93e-01 - Delta: 2.00e-02 - Trend: -1.63e-02
Ep 65 [####################]  100% - Tot. time: 1.3 s                        
Loss: 8.50e-01 - Delta: -4.30e-02 - Trend: -6.30e-02
Ep 66 [####################]  100% - Tot. time: 1.3 s                        
Loss: 7.95e-01 - Delta: -5.48e-02 - Trend: -1.18e-02
Ep 67 [####################]  100% - Tot. time: 1.3 s                       
Loss: 8.79e-01 - Delta: 8.34e-02 - Trend: 1.38e-01
Ep 68 [####################]  100% - Tot. time: 1.3 s                        
Loss: 8.48e-01 - Delta: -3.02e-02 - Trend: -1.14e-01
Ep 69 [####################]  100% - Tot. time: 1.3 s                        
Loss: 8.79e-01 - Delta: 3.06e-02 - Trend: 6.09e-02
Ep 70 [####################]  100% - Tot. time: 1.3 s                        
Loss: 9.16e-01 - Delta: 3.69e-02 - Trend: 6.28e-03
Ep 71 [####################]  100% - Tot. time: 1.3 s                        
Loss: 8.56e-0