# Download and install required libraries

In [None]:
!python -c "import torch; print(torch.__version__)"
!python -c "import torch; print(torch.version.cuda)"

In [None]:
# ### For CPU ###

# ! pip -qq install torch==1.5.1+cpu torchvision==0.6.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
# ! pip -qq install torch-scatter==latest+cpu -f https://pytorch-geometric.com/whl/torch-1.5.0.html
# ! pip -qq install torch-sparse==latest+cpu -f https://pytorch-geometric.com/whl/torch-1.5.0.html
# ! pip -qq install torch-cluster==latest+cpu -f https://pytorch-geometric.com/whl/torch-1.5.0.html
# ! pip -qq install torch-spline-conv==latest+cpu -f https://pytorch-geometric.com/whl/torch-1.5.0.html
# ! pip -qq install torch-geometric

In [None]:
## For GPU ###

! pip -qq install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
! pip -qq install torch-sparse==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
! pip -qq install torch-cluster==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
! pip -qq install torch-spline-conv==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
! pip -qq install torch-geometric

# Import Libraries

In [None]:
from typing import Union, Tuple
from torch_geometric.typing import OptPairTensor, Adj, Size

from torch import Tensor
from torch.nn import Linear
import torch.nn.functional as F
from torch_sparse import SparseTensor, matmul
from torch_geometric.nn.conv import MessagePassing

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import os
import time
import random
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from joblib import Parallel, delayed

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import torch
import torch.nn.functional as F
import torch_geometric.nn as gnn
from torch_geometric.nn import MessagePassing
from torch_geometric.data import Dataset, Data, DataLoader
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
from torch_geometric.utils import softmax

# Pre-processing

In [None]:
df = pd.read_csv('../input/cmsdata/CMS_trigger.csv').drop(columns = 'Unnamed: 0')
df = df.drop(columns = [i for i in df.columns if '_1' in i])
df['non_hits'] = df[[i for i in df.columns if 'mask' in i]].sum(axis=1)
df = df[df['non_hits']==0].reset_index(drop=True)

df['1/pT'] = df['q/pt'].abs()
def label(a):
    if a<=10:
        return 0
    if a>10 and a<=30:
        return 1
    if a>30 and a<=100:
        return 2
    if a>100:
        return 3

df['pT'] = 1/df['1/pT']
    
df['pT_classes'] = df['pT'].apply(label)

features = ['Phi_'+str(i) for i in [0,2,3,4]] + ['Theta_'+str(i) for i in [0,2,3,4]] + ['Front_'+str(i) for i in [0,2,3,4]]
labels_1 = ['pT']
labels_2 = ['pT_classes']

scaler_1 = StandardScaler()
df[features] = scaler_1.fit_transform(df[features])

In [None]:
shuffled_list = list(range(len(df)))
random.Random(242).shuffle(shuffled_list)
shuffled_list = np.array_split(np.array(shuffled_list), 10)

In [None]:
edge_index = torch.tensor([(0,1),(1,2),(2,3),(3,2),(2,1),(1,0)], dtype=torch.long).T
X_data = df[features].to_numpy()
Y_data = df[labels_1].to_numpy()
def process_data(i):
  graph = X_data[i].reshape(-1,4).T
  y = Y_data[i]
  data = Data(x=torch.tensor(graph, dtype=torch.float), y=torch.tensor(y, dtype=torch.float), edge_index=edge_index)
  return data

# PyTorch Geometric Dataset Class

In [None]:
class TriggerDataset(Dataset):
    def __init__(self, root, indexes=list(range(len(df))), transform=None, pre_transform=None):
        super(TriggerDataset, self).__init__(root, transform, pre_transform)
        self.indexes = indexes
        self.length = len(self.indexes)

    @property
    def raw_file_names(self):
        return ['vgc']

    @property
    def processed_file_names(self):
        return ['vghv']

    def download(self):
        return None

    def process(self):
        return None

    def len(self):
        return self.length

    def get(self, idx):
        return process_data(self.indexes[idx])

# Custom MPL

In [None]:
class MPL(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(MPL, self).__init__(aggr='add')
        self.mlp1 = torch.nn.Linear(in_channels*2, out_channels)
        self.mlp2 = torch.nn.Linear(in_channels, out_channels)
        self.mlp3 = torch.nn.Linear(2*out_channels, 1)
        self.mlp4 = torch.nn.Linear(2*out_channels, 1)
        self.mlp5 = torch.nn.Linear(in_channels,16)
        self.mlp6 = torch.nn.Linear(out_channels,16)
        self.mlp7 = torch.nn.Linear(16,1)

    def forward(self, x, edge_index):

        msg = self.propagate(edge_index, x=x)
        x = F.relu(self.mlp2(x))
        w1 = F.sigmoid(self.mlp3(torch.cat([x,msg], dim=1)))
        w2 = F.sigmoid(self.mlp4(torch.cat([x,msg], dim=1)))
        out = w1*msg + w2*x
        
        return out

    def message(self, x_i, x_j, edge_index):
        msg = F.relu(self.mlp1(torch.cat([x_i, x_j-x_i], dim=1)))
        w1 = F.tanh(self.mlp5(x_i))
        w2 = F.tanh(self.mlp6(msg))
        w = self.mlp7(w1*w2)
        w = softmax(w, edge_index[0])
        return msg*w

# GNN Architecture

In [None]:
class MPNN(torch.nn.Module):
    def __init__(self):
      super(MPNN, self).__init__()
      self.conv1 = MPL(3,128 )
      self.conv2 = MPL(128,64)
      self.conv3 = MPL(64,64 )
      self.conv4 = MPL(64,64 )
      self.lin1 = torch.nn.Linear(128, 128)
      self.lin2 = torch.nn.Linear(128, 16)
      self.lin3 = torch.nn.Linear(16, 16)
      self.lin4 = torch.nn.Linear(16, 1)
      self.lin5 = torch.nn.Linear(128, 128)
      self.lin6 = torch.nn.Linear(128, 16)
      self.lin7 = torch.nn.Linear(16, 16)
      self.lin8 = torch.nn.Linear(16, 1)
      self.global_att_pool1 = gnn.GlobalAttention(torch.nn.Sequential(torch.nn.Linear(64, 1)))
      self.global_att_pool2 = gnn.GlobalAttention(torch.nn.Sequential(torch.nn.Linear(64, 1)))
    
    def forward(self, data):
      x, edge_index, batch = data.x, data.edge_index, data.batch
      x = F.relu(self.conv1(x, edge_index))
      x = F.relu(self.conv2(x, edge_index))
      x1 = self.global_att_pool1(x, batch)
      x = F.relu(self.conv3(x, edge_index))
      x = F.relu(self.conv4(x, edge_index))
      x2 = self.global_att_pool2(x, batch)
      x_out = torch.cat([x1, x2], dim=1)
      x = F.relu(self.lin1(x_out))
      x = F.relu(self.lin2(x))
      x = self.lin3(x)
      x = self.lin4(x).squeeze(1)

      return x

# Training Parameters

In [None]:
batch_size = 512
epochs = 10
progress_bar=True

# Custom Loss

In [None]:
def mse(outputs, labels):
    weights = torch.tensor(labels<80, dtype=torch.float).to(device)*labels + torch.tensor(labels>=80, dtype=torch.float).to(device)*torch.tensor(labels<160, dtype=torch.float).to(device)*labels*2.4 + torch.tensor(labels>=160, dtype=torch.float).to(device)*10
    error = weights*(((outputs-labels)/labels)**2)
    return torch.mean(error)

# Training function

In [None]:
val_batch=1
test_batch=2
train_loader = DataLoader(TriggerDataset('./',np.concatenate([shuffled_list[j] for j in range(10) if j not in (val_batch, test_batch)])), batch_size=batch_size, shuffle=True, num_workers = 4) 
for i in train_loader:
    print(i)
    break

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def train_fn(val_batch=1, test_batch=2):
    scale = 750
    mse = torch.nn.MSELoss()

    train_loader = DataLoader(TriggerDataset('./',np.concatenate([shuffled_list[j] for j in range(10) if j not in (val_batch, test_batch)])), batch_size=batch_size, shuffle=True, num_workers = 4) 
    val_loader = DataLoader(TriggerDataset('./',shuffled_list[val_batch]), batch_size=batch_size) 
    test_loader = DataLoader(TriggerDataset('./',shuffled_list[test_batch]), batch_size=batch_size)

    model = MPNN().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=1, factor=0.5)
    
    m_train_loss = []
    m_val_loss = []
    m_test_loss = []
    min_val_loss = float('inf')
    for epoch in range(epochs):
      train_loss = 0
      val_loss = 0
      if progress_bar:
          pbar = tqdm(train_loader)
      else:
          pbar = train_loader
      for data in pbar:
        data = data.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        labels = data.y
        loss = mse(outputs, 1/labels)
        loss.backward()
        optimizer.step()
        if progress_bar:
          pbar.set_description('MSELoss: '+str(loss.cpu().detach().numpy()))
        train_loss += loss.cpu().detach()/len(train_loader)
#         return 0

      for data in val_loader:
        data = data.to(device)
        optimizer.zero_grad()
        outputs = model(data)
        labels = data.y
        loss = mse(outputs, 1/labels)
        val_loss += loss.cpu().detach()/len(val_loader)
      if val_loss.detach().numpy()<min_val_loss:
        min_val_loss = val_loss.cpu().detach().numpy()
        torch.save(model.state_dict(), 'model.pth')
      lr_scheduler.step(val_loss)
      print('Epoch: ', str(epoch+1)+'/'+str(epochs),'| Training MSELoss: ', train_loss.numpy(), '| Validation MSELoss: ', val_loss.numpy())
      m_train_loss.append(train_loss.numpy())
      m_val_loss.append(val_loss.numpy())
      if epoch>20 and min(m_val_loss[-7:])>min_val_loss+0.0001:
        break
    
    if progress_bar==False:
        plt.plot(range(1,len(m_val_loss)+1), m_val_loss, label='val_loss')
        plt.plot(range(1,len(m_train_loss)+1), m_train_loss, label='train_loss')
        plt.xlabel('epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()
    model = MPNN().to(device)
    model.load_state_dict(torch.load('model.pth'))
    test_loss = 0
    true = []
    preds1 = []
    preds2 = []
    for data in test_loader:
      data = data.to(device)
      optimizer.zero_grad()
      outputs = model(data)
      labels = data.y
      true += list(labels.detach().cpu().numpy())
      preds1 += list(outputs.detach().cpu().numpy())
      loss = mse(outputs, 1/labels)
      test_loss += loss/len(test_loader)
    print('Test MSELoss: ', test_loss.detach().cpu().numpy())
    OOF_preds = pd.DataFrame()
    OOF_preds['true_value'] = true
    OOF_preds['preds1'] = [1/p for p in preds1]
    OOF_preds['row'] = shuffled_list[test_batch]
    OOF_preds.to_csv('OOF_preds_'+str(val_batch)+'.csv')
    return 0

In [None]:
## Training fold 8 & 9

for i in range(2,4):
    train_fn(val_batch=i, test_batch=(i+1)%10)

In [None]:
files = os.listdir('/kaggle/working')
df = pd.concat([pd.read_csv('/kaggle/working/'+i).drop(columns = ['Unnamed: 0']) for i in files if 'OOF_preds_' in i])
df.to_csv('OOF_preds.csv')

# Results

In [None]:
df = pd.read_csv('OOF_preds.csv').drop(columns = ['Unnamed: 0'])
df = df.sort_values(by = 'row').reset_index(drop = True)
df['True_pT'] = df['true_value']
df['Predicted_pT'] = df['preds1']

In [None]:
df_cnn = pd.read_csv('../input/cms-example/results/OOF_preds.csv')
df_cnn = df_cnn.sort_values(by = 'row').reset_index(drop = True)
df_cnn['True_pT'] = df_cnn['true_value']
df_cnn['Predicted_pT'] = df_cnn['preds']

In [None]:
df

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae

def MAE(df):
    MAE1 = []
    dx = 0.5
    for i in range(int(2/dx),int(150/dx)):
        P = df[(df['True_pT']>=(i-1)*dx)&(df['True_pT']<=(i+1)*dx)]
        try:
            p = mae(P['True_pT'],P['Predicted_pT'])
        except:
            p=0
        MAE1.append(p)
    MAE1 = MAE1[:236]
    return MAE1

In [None]:
dx = 0.5
MAE1 = MAE(df)
plt.plot([i*dx for i in range(4,240)],MAE1,label = 'GNN')
plt.plot([i*dx for i in range(4,240)],MAE(df_cnn),label = 'FCNN')
plt.legend()
plt.show()

In [None]:
print(sum(MAE1[:196]))

In [None]:
def pT_classes(x):
    if x>=25:
        return 'Above 25 GeV'
    else:
        return 'Below 25 GeV'

print(classification_report(df['True_pT'].apply(pT_classes), df['Predicted_pT'].apply(pT_classes)))

In [None]:
df = pd.read_csv('OOF_preds.csv').drop(columns = ['Unnamed: 0'])
df = df.sort_values(by = 'row').reset_index(drop = True)
df['True_pT'] = df['true_value']
df['Predicted_pT'] = 1/df['preds2']

In [None]:
df_cnn = pd.read_csv('../input/cms-example/results/OOF_preds.csv')
df_cnn = df_cnn.sort_values(by = 'row').reset_index(drop = True)
df_cnn['True_pT'] = df_cnn['true_value']
df_cnn['Predicted_pT'] = df_cnn['preds']

In [None]:
df

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae

def MAE(df):
    MAE1 = []
    dx = 0.5
    for i in range(int(2/dx),int(150/dx)):
        P = df[(df['True_pT']>=(i-1)*dx)&(df['True_pT']<=(i+1)*dx)]
        try:
            p = mae(P['True_pT'],P['Predicted_pT'])
        except:
            p=0
        MAE1.append(p)
    MAE1 = MAE1[:236]
    return MAE1

In [None]:
dx = 0.5
MAE1 = MAE(df)
plt.plot([i*dx for i in range(4,240)],MAE1,label = 'GNN')
plt.plot([i*dx for i in range(4,240)],MAE(df_cnn),label = 'FCNN')
plt.legend()
plt.show()

In [None]:
print(sum(MAE1[:196]))

In [None]:
def pT_classes(x):
    if x>=25:
        return 'Above 25 GeV'
    else:
        return 'Below 25 GeV'

print(classification_report(df['True_pT'].apply(pT_classes), df['Predicted_pT'].apply(pT_classes)))