First attempt of graph network. Running on CPU.

In [1]:
import numpy as np
import itertools
import os, sys

import torch
import torch.nn as nn
import torch.nn.functional  as F

## Get the dataset

In [2]:
from torch.utils import data

In [3]:
class Dataset(data.Dataset):
    def __init__(self, names = 'SM', template='../data/20190619_10part_PtOrder/{}_lepFilter_13TeV.npy'):
        if names == 'SM':
            self.SM_names = ['Wlnu', 'qcd', 'ttbar']
            names = self.SM_names
        
        for i, n in enumerate(names):
            ins = np.load(template.format(n)).astype(np.float32)
            out = np.zeros((ins.shape[0], len(names)))
            out[:,i] = 1
            
            print(n, ':', str(ins.shape[0]))
            
            if i == 0:
                self.inputs = ins
                self.outputs = out
            else:
                self.inputs = np.concatenate((self.inputs, ins))
                self.outputs = np.concatenate((self.outputs, out))
    
    def __len__(self):
        return self.inputs.shape[0]
    
    def __getitem__(self, idx):
        return self.inputs[idx], self.outputs[idx]

In [4]:
dataset = {}
dataset['train'] = Dataset()

Wlnu : 200
qcd : 200
ttbar : 200


## Define the model

In [5]:
class ClfGraphNet(nn.Module):
    def __init__(self, N_nodes, N_features, dim_hidden, De, Do, dim_output, verbose = False):
        super(ClfGraphNet, self).__init__()
        self.verbose = verbose
        
        self.p = N_features
        self.No = N_nodes
        self.De = De
        self.Do = Do
        
        self.Rr, self.Rs = self.buildEdgesMatrixes()
        self.fr = self.build_fr(dim_hidden)
        self.fo = self.build_fo(dim_hidden)
        self.PhiC = self.build_PhiC(dim_output)
        
        self.onGPU = False
        
        
    def buildEdgesMatrixes(self):
        ### Assume fully connected graph
        Ne = self.No * (self.No - 1)
        self.Ne = Ne
        Rr = torch.zeros(self.No, Ne)
        Rs = torch.zeros(self.No, Ne)
        receiver_sender_list = [i for i in itertools.product(range(self.No), range(self.No)) if i[0]!=i[1]]
        for i, (r, s) in enumerate(receiver_sender_list):
            Rr[r, i] = 1
            Rs[s, i] = 1
        return Rr, Rs
            
    def build_fr(self, dim_hidden):
        self.fr_layers = [nn.Linear(2 * self.p, dim_hidden),
                          nn.Linear(dim_hidden, int(dim_hidden/2)),
                          nn.Linear(int(dim_hidden/2), self.De)
                         ]
        
        def fr(x):
            aux = x
            for l in self.fr_layers:
                aux = F.relu(aux)
            return aux
        
        return fr
        
    def build_fo(self, dim_hidden):
        self.fo_layers = [nn.Linear(self.p + self.De, dim_hidden),
                          nn.Linear(dim_hidden, int(dim_hidden/2)),
                          nn.Linear(int(dim_hidden/2), self.Do)
                         ]
        
        def fo(x):
            aux = x
            for l in self.fr_layers:
                aux = F.relu(aux)
            return aux
        
        return fo

    def build_PhiC(self, dim_output):
        self.PhiC_layer = nn.Linear(self.Do, dim_output)
        
        def PhiC(x):
            return F.sigmoid(self.PhiC_layer(x))
        
        return PhiC
         
    def INlayer(self, x):
        Orr = torch.matmul(x, self.Rr)
        Ors = torch.matmul(x, self.Rs)
        B = torch.cat([Orr, Ors], dim=1)
        
        ### First MLP ###
        B = torch.transpose(B, 1, 2).contiguous()
        E = self.fr(B.view(-1, 2 * P)).view(-1, self.Ne, self.De)
        del B
        
        E = torch.transpose(E, 1, 2).contiguous()
        Ebar = torch.matmul(E, torch.transpose(self.Rr, 0, 1).contiguous())
        del E
        
        C = torch.cat([x, Ebar], 1)
        del Ebar
        
        C = torch.transpose(C, 1, 2).contiguous()
        C = self.fo(C.view(-1, self.p + self.De))
        O = nn.functional.sigmoid(C.view(-1, self.No, self.Do))
        del C
        return O
        
    def forward(self, x):
        O = self.INlayer(x)
        
        # now sum over No nodes to obtain the Do latent quantities
        O = torch.sum(O, 1)
        return self.PhiC(O)
    
    def useGPU(self):
        if torch.cuda.is_available():
            print('Current device: {} ({} available)'.format(torch.cuda.current_device(), 
                                                             torch.cuda.device_count()))
            
            for o in self.__dict__.values():
                if o.__class__ == torch.Tensor:
                    o.cuda()
            
            self.onGPU = True
        else: 
            print('GPU not available')
            

In [6]:
# Does not work for the moment
# def weights_init(M):
#     nn.init.xavier_normal(M.weight.data)

In [7]:
model = ClfGraphNet(N_nodes=dataset['train'].inputs.shape[1],
                    N_features=dataset['train'].inputs.shape[2],
                    dim_hidden=10,
                    De=5,
                    Do=7,
                    dim_output=dataset['train'].outputs.shape[1]
                   )

In [8]:
# print(model.fr_layers[0].weight)
# model.apply(weights_init)
# print(model.fr_layers[0].weight)

## Loss

In [9]:
criterion = nn.CrossEntropyLoss()

## Training

In [10]:
training_params = {'batch_size': 64,
                   'shuffle': True,
                   'num_workers': 0
                  }

max_epochs = 100

In [11]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

In [12]:
optimizer.zero_grad()

train_loss = []
for epoch in range(max_epochs):
    
    batch_loss = []
    for local_x, local_y in data.DataLoader(dataset['train'], **training_params):
        if model.onGPU:
            local_x = local_x.cuda()
            local_y = local_y.cuda()
        
        loss = criterion(model(local_x), local_y)
        batch_loss.append(loss.data[0])
        loss.backward()
        
        optimizer.step()
    train_loss.append(batch_loss)

RuntimeError: size mismatch, m1: [640 x 5], m2: [10 x 90] at /Users/distiller/project/conda/conda-bld/pytorch_1556653464916/work/aten/src/TH/generic/THTensorMath.cpp:961