# NeuralNets for b-tagging

In [15]:
%load_ext autoreload
%autoreload 2
from bob import *
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import torch
from torch.autograd import Variable
from torch import nn
from torch.nn import functional as F

from tqdm import tqdm

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Load dataframe

In [4]:
inputFileName = 'Hybrid_25_July_bugfixed_fullStat.pkl'

# Subsample the dataset for fast execution
subsampleFlag = True
gpuFlag = False

In [31]:
tree = pd.read_pickle(inputFileName)
features = select_features(tree, to_remove=[])

# Add flag for missing values in SV1
tree['nan_flag'] = tree['jet_sv1_sig3d'] == -100
features.append('nan_flag')

if subsampleFlag:
    tree = tree.head(int(tree.shape[0]*0.05))
    num_boost_round=100
else:
    num_boost_round=1000
    
# Replace missing values with NaNs
d = dict.fromkeys([-100, -1, -99, -1000], np.nan)
tree.replace(d, inplace=True)

# Normalization
tree[features] = tree[features].apply(lambda x: (x-x.mean())/x.std(), axis=0)

tree.replace(np.nan, 0, inplace=True)

tree['jet_LabDr_HadF'].replace(to_replace=5, value=2, inplace=True) 
tree['jet_LabDr_HadF'].replace(to_replace=4, value=1, inplace=True) 

In [32]:
train, test = train_test_splitting(tree)
train = train.head(train.shape[0]//10000*10000)
test = test.head(400000)
train['weights'] = train['weights'] / train['weights'].sum()

In [33]:
del tree

In [34]:
train_input, train_target = Variable(torch.from_numpy(train[features].values.astype(np.float32))), \
                            Variable(torch.from_numpy((train['jet_LabDr_HadF'].values).astype(np.int))).long()

In [35]:
test_input, test_target = Variable(torch.from_numpy(test[features].values.astype(np.float32)), volatile=True), \
                            Variable(torch.from_numpy((test['jet_LabDr_HadF'].values).astype(np.int)), volatile=True).long()

  """Entry point for launching an IPython kernel.


In [36]:
if gpuFlag:
    train_input, train_target, test_input, test_target = train_input.cuda(), train_target.cuda(), test_input.cuda(), test_target.cuda()

# Define the network

as a class

In [37]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(25, 100)
        self.fc1_bn = nn.BatchNorm1d(100)
        self.fc2 = nn.Linear(100, 100)
        self.fc2_bn = nn.BatchNorm1d(100)
        self.fc3 = nn.Linear(100, 3)

    def forward(self, x):
        x = self.fc1_bn(F.relu(self.fc1(x)))
        x = self.fc2_bn(F.relu(self.fc2(x)))

        x = self.fc3(x)
        return x
    
model, criterion = Net(), nn.CrossEntropyLoss()

using the function nn.sequential

In [29]:
layers = [nn.Linear(25, 128),
          nn.ReLU(),
          nn.Dropout(0.75),
         nn.BatchNorm1d(128),
         nn.Linear(128, 64),
          nn.ReLU(),
          nn.Dropout(0.75),
         nn.BatchNorm1d(64),
         nn.Linear(64, 32),
          nn.ReLU(),
          nn.Dropout(0.75),
         nn.BatchNorm1d(32),
         nn.Linear(32, 3)]
#model = nn.Sequential(*layers)
#criterion = nn.CrossEntropyLoss()

Training

In [None]:
if gpuFlag:
    model.cuda()

learning_rate = .001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.95)

mini_batch_size = 10000
generalization_loss = []

for e in tqdm(range(80)):
    sum_loss = 0
    model.train()
    # We do this with mini-batches
    for batch in range(240):
    #Choose indices of minibatch considering weights
        idxs = np.random.choice(train_input.shape[0], size=mini_batch_size, p=train['weights'].values)
        output = model(train_input[idxs])
        loss = criterion(output, train_target[idxs])
        sum_loss = sum_loss + loss.item()
        optimizer.zero_grad()        
        loss.backward()
        optimizer.step()
        
    model.eval()
    output = model(test_input.narrow(0, 0, mini_batch_size))
    loss = criterion(output, test_target.narrow(0, 0, mini_batch_size))
    generalization_loss.append(loss.item())
    
    scheduler.step()
    
print(e, sum_loss)
plt.plot(generalization_loss);
plt.grid()
plt.show()

 15%|████████████                                                                    | 12/80 [11:43<1:06:28, 58.65s/it]

Save the model

In [None]:
#torch.save(model.state_dict(), 'mymodel.pt')

Load the model

In [None]:
#model.load_state_dict(torch.load('mymodel.pt'))

In [None]:
test_pred = F.softmax(model(test_input.narrow(0, 0, 400000)), dim=1)

In [None]:
if gpuFlag:
    test_pred = test_pred.cpu()
test_pred = test_pred.data.numpy()

In [None]:
test['jet_LabDr_HadF'].replace(to_replace=2, value=5, inplace=True) 
test['jet_LabDr_HadF'].replace(to_replace=1, value=4, inplace=True) 

Define discriminant with 3 outputs

In [None]:
f = 0.1
ll = np.log(test_pred.T[2]/(f*test_pred.T[1] + (1-f)*test_pred.T[0]))

In [None]:
fpr_c_dnn, tpr_c_dnn = compute_roc(test['jet_LabDr_HadF'].values, ll, 'c')
fpr_l_dnn, tpr_l_dnn = compute_roc(test['jet_LabDr_HadF'].values, ll, 'l')

In [None]:
fpr_c_mv, tpr_c_mv = compute_roc(test['jet_LabDr_HadF'].values, test['jet_mv2c10'].values, 'c')
fpr_l_mv, tpr_l_mv = compute_roc(test['jet_LabDr_HadF'].values, test['jet_mv2c10'].values, 'l')

In [None]:
rate_light = 1 /fpr_l_dnn[1:] / interpolate.spline(tpr_l_mv[1:], 1/fpr_l_mv[1:], tpr_l_dnn[1:], order=1)
rate_c = 1 /fpr_c_dnn[1:] / interpolate.spline(tpr_c_mv[1:], 1/fpr_c_mv[1:], tpr_c_dnn[1:], order=1)

rate_light[rate_light==np.inf] = np.nan
rate_c[rate_c==np.inf] = np.nan

In [None]:
fig=plt.figure(figsize=(10,7))

gs=GridSpec(5,1)

ax1=fig.add_subplot(gs[0:4,0])
ax2=fig.add_subplot(gs[4,0])

ax1.set_ylabel("light / c rejection")
ax1.semilogy(tpr_l_mv, 1/fpr_l_mv, label='light MV2', c='orangered')
ax1.semilogy(tpr_c_mv, 1/fpr_c_mv, label='c MV2', c='dodgerblue')

ax1.semilogy(tpr_l_dnn, 1/fpr_l_dnn, label='light dnn', c='brown')
ax1.semilogy(tpr_c_dnn, 1/fpr_c_dnn, label='c dnn', c='navy')

plt.setp(ax1.get_xticklabels(), visible=False)
ax1.set_xlim([0.55, 1])
ax1.set_ylim([1, 1e3])

ax1.grid()
ax1.legend()

ax2.plot(tpr_l_dnn[1:], 1/rate_light, c='r', lw=.8,  label='l-jets')

ax2.plot(tpr_c_dnn[1:], 1/rate_c, c='b', lw=.8,  label='c-jets')

ax2.axhline(y=1, color='black', linestyle='-.', lw=.5)
ax2.grid()
ax2.set_xlabel("b-efficiency")
ax2.set_ylabel("rate")
ax2.set_xlim([0.55, 1])
ax2.set_ylim([0.5, 1.5])
ax2.legend(fontsize = 'x-small')

plt.savefig('figures/dnn_mv2_efficiency_vs_rejection.eps', format='eps')
plt.show()

In [None]:
rejection_pt(test, test['jet_mv2c10'].values, ll, num_cuts=15)
plt.savefig('figures/dnn_mv2_rejection_vs_pt.eps', format='eps')
plt.show()