## Task 3

In [41]:
!pip install --upgrade pip
!pip install torch-geometric
!pip install torch-sparse==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
!pip install torch-cluster==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
!pip install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
!pip install torch-spline-conv==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
!pip install energyflow


Requirement already up-to-date: pip in /usr/local/lib/python3.6/dist-packages (20.0.2)
Looking in links: https://pytorch-geometric.com/whl/torch-1.4.0.html
Collecting torch-sparse==latest+cu101
  Using cached https://s3.eu-central-1.amazonaws.com/pytorch-geometric.com/whl/torch-1.4.0/torch_sparse-latest%2Bcu101-cp36-cp36m-linux_x86_64.whl (15.2 MB)
Installing collected packages: torch-sparse
  Attempting uninstall: torch-sparse
    Found existing installation: torch-sparse 0.6.1
    Uninstalling torch-sparse-0.6.1:
      Successfully uninstalled torch-sparse-0.6.1
Successfully installed torch-sparse-0.6.1


Looking in links: https://pytorch-geometric.com/whl/torch-1.4.0.html
Collecting torch-cluster==latest+cu101
  Using cached https://s3.eu-central-1.amazonaws.com/pytorch-geometric.com/whl/torch-1.4.0/torch_cluster-latest%2Bcu101-cp36-cp36m-linux_x86_64.whl (14.5 MB)
Installing collected packages: torch-cluster
  Attempting uninstall: torch-cluster
    Found existing installation: torch-cluster 1.5.3
    Uninstalling torch-cluster-1.5.3:
      Successfully uninstalled torch-cluster-1.5.3
Successfully installed torch-cluster-1.5.3


Looking in links: https://pytorch-geometric.com/whl/torch-1.4.0.html
Collecting torch-scatter==latest+cu101
  Using cached https://s3.eu-central-1.amazonaws.com/pytorch-geometric.com/whl/torch-1.4.0/torch_scatter-latest%2Bcu101-cp36-cp36m-linux_x86_64.whl (10.6 MB)
Installing collected packages: torch-scatter
  Attempting uninstall: torch-scatter
    Found existing installation: torch-scatter 2.0.4
    Uninstalling torch-scatter-2.0.4:
      Successfully uninstalled torch-scatter-2.0.4
Successfully installed torch-scatter-2.0.4


Looking in links: https://pytorch-geometric.com/whl/torch-1.4.0.html
Collecting torch-spline-conv==latest+cu101
  Using cached https://s3.eu-central-1.amazonaws.com/pytorch-geometric.com/whl/torch-1.4.0/torch_spline_conv-latest%2Bcu101-cp36-cp36m-linux_x86_64.whl (5.1 MB)
Installing collected packages: torch-spline-conv
  Attempting uninstall: torch-spline-conv
    Found existing installation: torch-spline-conv 1.2.0
    Uninstalling torch-spline-conv-1.2.0:
      Successfully uninstalled torch-spline-conv-1.2.0
Successfully installed torch-spline-conv-1.2.0




In [0]:
import numpy as np
import torch
from torch_geometric.data import Data, DataLoader
from torch.nn import Sequential as Seq, Linear, ReLU
from torch_geometric.nn import MessagePassing, knn_graph
import torch_geometric.transforms as T
import energyflow
from sklearn.preprocessing import normalize

#### Loading Data

In [0]:
x, y = energyflow.qg_jets.load(num_data=8*10**5, generator='pythia', pad=True, with_bc=False, cache_dir='~/.energyflow')

In [0]:
data = np.reshape(x, (x.shape[0], x.shape[1]*x.shape[2]))
label = np.array(y, dtype='int')

#### Edge Convolutional Layer

In [0]:
class EdgeConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(EdgeConv, self).__init__(aggr='max') #  "Max" aggregation.
        self.mlp = Seq(Linear(2 * in_channels, out_channels),
                       ReLU(),
                       Linear(out_channels, out_channels))

    def forward(self, x, edge_index):
        # x has shape [N, in_channels]
        # edge_index has shape [2, E]

        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)

    def message(self, x_i, x_j):
        # x_i has shape [E, in_channels]
        # x_j has shape [E, in_channels]

        tmp = torch.cat([x_i, x_j - x_i], dim=1)  # tmp has shape [E, 2 * in_channels]
        return self.mlp(tmp)

    def update(self, aggr_out):
        # aggr_out has shape [N, out_channels]

        return aggr_out
    
class DynamicEdgeConv(EdgeConv):
    def __init__(self, in_channels, out_channels, k=30):
        super(DynamicEdgeConv, self).__init__(in_channels, out_channels)
        self.k = k

    def forward(self, x, batch=None):
        edge_index = knn_graph(x, self.k, batch, loop=False, flow=self.flow)
        return super(DynamicEdgeConv, self).forward(x, edge_index)


### Model

In [0]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = DynamicEdgeConv(data.shape[1], 128)
        self.conv2 = DynamicEdgeConv(128, 256)
        self.conv3 = DynamicEdgeConv(256, 128)
        self.conv4 = DynamicEdgeConv(128, 32)
        self.conv5 = DynamicEdgeConv(32, 2)
        self.relu = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(0.3)
        self.softmax = torch.nn.Softmax(dim=1)
        
    def forward(self, data):
        x = data.x

        x = self.conv1(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.conv4(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.conv5(x)

        return self.softmax(x)

#### Creating mini-batches

In [0]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
raw_data = []
batch_size=100
t_size = 2*10**5
n = 10**5
t_data = normalize(data[:t_size], axis=0)
for i in range(int(t_data.shape[0]/batch_size)):
    x = torch.from_numpy(t_data[i*batch_size:(i+1)*batch_size]).float()
    y = torch.from_numpy(label[i*batch_size:(i+1)*batch_size])
    raw_data.append(Data(x=x, y=y))


### Training Model

In [9]:
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0004)
loss_func = torch.nn.CrossEntropyLoss()

# model.train()
epochLoss = []
num_epochs = 75
for epoch in range(num_epochs):
    total_loss = 0; cntr = 0
    for batch in raw_data:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = loss_func(out, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        cntr += 1
        if cntr%500 == 0:
            print(cntr, total_loss/cntr, end=" ")
    print()
    print ('Epoch [%d/%d], Loss: %.4f' 
                   %(epoch+1, num_epochs, total_loss/cntr))
    epochLoss.append(total_loss/cntr)


500 0.6089878984093666 1000 0.5737396126091481 1500 0.5621184630990028 2000 0.5547156317234039 
Epoch [1/75], Loss: 0.5547
500 0.5325500974655152 1000 0.5331735238730907 1500 0.5334095007777214 2000 0.5323453467339277 
Epoch [2/75], Loss: 0.5323
500 0.5308255921006203 1000 0.5311992216408252 1500 0.5314167400995891 2000 0.5303901437222958 
Epoch [3/75], Loss: 0.5304
500 0.5281738523840904 1000 0.5287449304163456 1500 0.5290461152990659 2000 0.5281871314197779 
Epoch [4/75], Loss: 0.5282
500 0.5266513560414314 1000 0.5275274778306485 1500 0.527858126560847 2000 0.526847633779049 
Epoch [5/75], Loss: 0.5268
500 0.5255070666074753 1000 0.5262863366007805 1500 0.5266648045579593 2000 0.5257789367139339 
Epoch [6/75], Loss: 0.5258
500 0.5244067690372467 1000 0.5252837683260441 1500 0.5258563279906908 2000 0.5249658707827329 
Epoch [7/75], Loss: 0.5250
500 0.5238183180689812 1000 0.5248047896027564 1500 0.5249836120406787 2000 0.524130471482873 
Epoch [8/75], Loss: 0.5241
500 0.5223714443445

In [0]:
torch.save(model.state_dict(), './model1.pt')

#### Validating model

In [0]:
v_data = normalize(data[3*t_size:3*t_size+n], axis=0)

def gen_data(temp_data, k):
  raw_data = []
  for i in range(int(temp_data.shape[0]/batch_size)):
      x = torch.from_numpy(temp_data[i*batch_size:(i+1)*batch_size]).float()
      y = torch.from_numpy(label[k+i*batch_size:k+(i+1)*batch_size])
      raw_data.append(Data(x=x, y=y))
  return raw_data

val_data = gen_data(v_data, 3*t_size)
pred = np.zeros(len(val_data*batch_size), dtype='int')
i=0
for batch in val_data:
  out = model(batch.to(device))
  out = out.detach().cpu().numpy()
  pred[i*batch_size:(i+1)*batch_size] = np.argmax(out, axis=1) 
  i += 1
np.savez_compressed('./predicted1', pred1=pred)


In [0]:
pred = [[], [], []]
with np.load('predicted1.npz') as file:
    pred[0].append(file['pred1'])
with np.load('predicted2.npz') as file:
    pred[1].append(file['pred2'])
with np.load('predicted3.npz') as file:
    pred[2].append(file['pred3'])

#### Computing weight for each model

In [14]:
count = np.zeros(3)
for i in range(n):
  for j in range(3):
    if pred[j][0][i] == label[3*t_size+i]:
      count[j] += 1
count = count/n
print(count)

[0.76512 0.75535 0.76752]


#### Loading all models

In [7]:

model1 = Net().to(device)
model1.load_state_dict(torch.load('./model1.pt'))
model1.eval()

model2 = Net().to(device)
model2.load_state_dict(torch.load('./model2.pt'))
model2.eval()

model3 = Net().to(device)
model3.load_state_dict(torch.load('./model3.pt'))
model3.eval()

Net(
  (conv1): DynamicEdgeConv(
    (mlp): Sequential(
      (0): Linear(in_features=1120, out_features=128, bias=True)
      (1): ReLU()
      (2): Linear(in_features=128, out_features=128, bias=True)
    )
  )
  (conv2): DynamicEdgeConv(
    (mlp): Sequential(
      (0): Linear(in_features=256, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=256, bias=True)
    )
  )
  (conv3): DynamicEdgeConv(
    (mlp): Sequential(
      (0): Linear(in_features=512, out_features=128, bias=True)
      (1): ReLU()
      (2): Linear(in_features=128, out_features=128, bias=True)
    )
  )
  (conv4): DynamicEdgeConv(
    (mlp): Sequential(
      (0): Linear(in_features=256, out_features=32, bias=True)
      (1): ReLU()
      (2): Linear(in_features=32, out_features=32, bias=True)
    )
  )
  (conv5): DynamicEdgeConv(
    (mlp): Sequential(
      (0): Linear(in_features=64, out_features=2, bias=True)
      (1): ReLU()
      (2): Linear(in_features=2, out_fe

In [0]:
te_data = normalize(data[3*t_size+n:], axis=0)

def gen_data(temp_data, k):
  raw_data = []
  for i in range(int(temp_data.shape[0]/batch_size)):
      x = torch.from_numpy(temp_data[i*batch_size:(i+1)*batch_size]).float()
      y = torch.from_numpy(label[k+i*batch_size:k+(i+1)*batch_size])
      raw_data.append(Data(x=x, y=y))
  return raw_data

test_data = gen_data(te_data, 3*t_size)
pred = np.zeros(len(test_data*batch_size), dtype='int')


### Computing Accuracy of model

In [10]:
def predict(model):
  i=0
  pred = np.zeros(n, dtype='int')
  for batch in test_data:
    out = model(batch.to(device))
    out = out.detach().cpu().numpy()
    pred[i*batch_size:(i+1)*batch_size] = np.argmax(out, axis=1) 
    i += 1
  return pred

def find_accuracy(pred):
  count = 0
  for i in range(n):
    if pred[i] != label[3*t_size+n+i]:
      count += 1
  return 100*(1 - count/n)

pred1 = predict(model1)
print("Accuracy of model1 =", find_accuracy(pred1))
pred2 = predict(model2)
print("Accuracy of model2 =", find_accuracy(pred2))
pred3 = predict(model3)
print("Accuracy of model3 =", find_accuracy(pred3))

Accuracy of model1 = 76.519
Accuracy of model2 = 75.374
Accuracy of model3 = 76.538


### Final prediction  (Bagging approach)
Final label is predicted by taking the average of weighted sum of the predictions made by each model. 

In [15]:
final_label = np.round((count[0]*pred1 + count[1]*pred2 + count[2]*pred3)/3)  
print("Accuracy of overall model =", find_accuracy(final_label))

Accuracy of overall model = 77.568
