In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
from torch.utils.data import Dataset, DataLoader
import math
import sys
import cmath

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from torchvision import datasets
import torchvision.transforms as transforms

import time

In [None]:
class JetTrainData(Dataset):
    
    def __init__(self):

        self.x = torch.from_numpy( np.load('/data/github/data/diHiggs_21kin_train_data.npy').astype(np.float32) )
        #                                               hh               tt             tw             tth            ttv            llbj           tatabb
        self.y = torch.from_numpy( np.concatenate((np.ones(19000), np.zeros(9900), np.zeros(5500), np.zeros(200), np.zeros(250), np.zeros(1000), np.zeros(60))).astype(np.int) )
        self.n_samples = 35910
                
    def __getitem__(self,index):
        
        return self.x[index], self.y[index]
        
    def __len__(self):
        return self.n_samples

    
class JetTestData(Dataset):
    
    def __init__(self):

        self.x = torch.from_numpy( np.load('/data/github/data/diHiggs_21kin_test_data.npy').astype(np.float32) )
        #                                               hh              tt              tw            tth            ttv            llbj          tatabb
        self.y = torch.from_numpy( np.concatenate((np.ones(5000), np.zeros(19000), np.zeros(800), np.zeros(100), np.zeros(100), np.zeros(240), np.zeros(10))).astype(np.int) )
        self.n_samples = 25250
                
    def __getitem__(self,index):
        
        return self.x[index], self.y[index]
        
    def __len__(self):
        return self.n_samples

In [None]:
num_workers = 0

batch_size = 20

train_data = JetTrainData()
test_data = JetTestData()

train_loader = DataLoader(dataset=train_data, batch_size = batch_size, shuffle=True, num_workers=num_workers)
test_loader = DataLoader(dataset=test_data, batch_size = batch_size, shuffle=False, num_workers=num_workers)

In [None]:
class Fully_Connected_Layer(nn.Module):
    def __init__(self):
        super(Fully_Connected_Layer, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(21, 2400),
            nn.ReLU(inplace=True),
            nn.Linear(2400, 2400),
            nn.ReLU(inplace=True),
            nn.Linear(2400, 1200),
            nn.ReLU(inplace=True),
            nn.Linear(1200, 1200),
            nn.ReLU(inplace=True),
            nn.Linear(1200, 600),
            nn.ReLU(inplace=True),
            nn.Linear(600, 600),
            nn.ReLU(inplace=True),
            nn.Linear(600, 300),
            nn.ReLU(inplace=True),
            nn.Linear(300, 300),
            nn.ReLU(inplace=True),
            nn.Linear(300, 2),
            nn.Softmax(dim=1)
        )
        
    def forward(self, x):
        x = self.fc(x)
        return x

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = Fully_Connected_Layer().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=5e-4)

In [None]:
def train(model, train_loader, device, optimizer):
    model.train()
    loss_val = []
    correct = 0
    
    for batch_i, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)     
                   
        optimizer.zero_grad()
        
        output = model(data)

        loss = F.cross_entropy(output, target)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()       
        accuracy = correct / len(train_loader.dataset)
        
        loss.backward()
        optimizer.step()
        
        loss_val.append(loss.item())
        
    loss = np.mean(loss_val)  
    
    print(f'\n Train Epoch: {epoch} \tLoss: {loss:.6f}')

    return loss, accuracy

In [None]:
def test(model, test_loader, device):
    
    DNN_score = open('/data/github/result/Fully_Connected_Layer_DNN_score.TXT', 'a')    
   
    model.eval()
    loss_val = []
    correct = 0
    with torch.no_grad():
        for batch_i, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
  
            output = model(data)
            loss = F.cross_entropy(output, target)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
            accuracy = correct / len(test_loader.dataset)
            if batch_i < int(len(test_data)/batch_size):
                for n in range(0,batch_size):
                    hh=math.sqrt((output[n][1]**2))
                    DNN_score.write(str(hh))
                    DNN_score.write('\n')                   
                       
            if int(len(test_data)/batch_size) == batch_i:
                for n in range (0,int((len(test_data)/batch_size-int(len(test_data)/batch_size))*batch_size)):
                    hh=math.sqrt((output[n][1]**2))
                    DNN_score.write(str(hh))
                    DNN_score.write('\n')
            
            loss_val.append(loss.item())
            
    loss = np.mean(loss_val) 

    print(f'\nTest set: Loss: {loss:.6f}, Accuracy: {correct}/{len(test_loader.dataset)}({(100. * correct / len(test_loader.dataset)):.2f}%)\n')
    print('----------------------------------------------------')
    
    return loss, accuracy

In [None]:
def Like(s, b, u, n):
    return math.e**((n*s+b)*math.log(u*s+b)-math.lgamma(n*s+b+1)-(u*s+b))

def IndiLikeRatioDis(NS1, NB1):
    return math.sqrt(-2*math.log((Like(NS1, NB1, 0.0, 1.0))/(Like(NS1, NB1, 1.0, 1.0))))

In [None]:
DNN_score = open('/data/github/result/Fully_Connected_Layer_DNN_score.TXT', 'w')    

Train_Loss = []
Train_Accuracy = []
Test_Loss = []
Test_Accuracy = []

epochs = 1
best_sig = 0

for epoch in range(1, epochs + 1):
    start=time.time()
    print('########### Training epoch {} start ###########'.format(epoch))
    
    train_loss, train_accuracy = train(model, train_loader, device, optimizer)
    test_loss, test_accuracy = test(model, test_loader, device)

    Train_Loss.append(train_loss)
    Train_Accuracy.append(train_accuracy)
    Test_Loss.append(test_loss)
    Test_Accuracy.append(test_accuracy)
    
    Results = np.loadtxt('/data/github/result/Fully_Connected_Layer_DNN_score.TXT')

    epoch_ = epoch

    Results_hh_1=Results[len(test_data)*(epoch_-1):len(test_data)*(epoch_-1)+5000]
    Results_tt_1=Results[len(test_data)*(epoch_-1)+5000:len(test_data)*(epoch_-1)+5000+19000]
    Results_tw_1=Results[len(test_data)*(epoch_-1)+5000+19000:len(test_data)*(epoch_-1)+5000+19000+800]
    Results_tth_1=Results[len(test_data)*(epoch_-1)+5000+19000+800:len(test_data)*(epoch_-1)+5000+19000+800+100]
    Results_ttv_1=Results[len(test_data)*(epoch_-1)+5000+19000+800+100:len(test_data)*(epoch_-1)+5000+19000+800+100+100]
    Results_llbj_1=Results[len(test_data)*(epoch_-1)+5000+19000+800+100+100:len(test_data)*(epoch_-1)+5000+19000+800+100+100+240]
    Results_tatabb_1=Results[len(test_data)*(epoch_-1)+5000+19000+800+100+100+240:len(test_data)*epoch_]

    plt.rc('text', usetex=True)
    plt.rc('font', family='Time New Roman')

    logs = False

    axislabels = [ r'$DNN $']

    Yaxislabels = [ r'$DNN $']

    Bmax = 1
    Bmin = 0
    plt.xlim(0, 1)
    bins = np.linspace(Bmin, Bmax,  25)
    plt.hist(Results_hh_1, bins = bins, alpha=1, density=True, histtype='step', align = 'mid', linewidth = 1.5, log=logs, color='black', label= r'$h \; h$')
    plt.hist(Results_tt_1, bins = bins, alpha=1, density=True, histtype='step', align = 'mid', linewidth = 1.5, log=logs, color='blue', label= r'$t \; \overline{t}$')
    plt.hist(Results_tw_1, bins = bins, alpha=1, density=True, histtype='step', align = 'mid', linewidth = 1.5, log=logs, color='red', label= r'$t \; w$')
    plt.hist(Results_tth_1, bins = bins, alpha=1, density=True, histtype='step', align = 'mid', linewidth = 1.5, log=logs, label= r'$t \; \overline{t} \; h$')
    plt.hist(Results_ttv_1, bins = bins, alpha=1, density=True, histtype='step', align = 'mid', linewidth = 1.5, log=logs, label= r'$t \; \overline{t} \; v$')
    plt.hist(Results_llbj_1, bins = bins, alpha=1, density=True, histtype='step', align = 'mid', linewidth = 1.5, log=logs, label= r'$l \; l \; b \; j$')
    plt.hist(Results_tatabb_1, bins = bins, alpha=1, density=True, histtype='step', align = 'mid', linewidth = 1.5, log=logs, label= r'$\tau \; \tau \; b \; b$')
    plt.legend(loc=9,fontsize = 10)
    plt.xlabel(axislabels[0], fontsize = 20)
    plt.ylabel(r'$\rm{(1/\sigma) \; d \sigma / d }$' + Yaxislabels[0]    , fontsize = 20)
    plt.show()
    
    ROC_Results = open('/data/github/result/Fully_Connected_Layer_ROC.TXT'+str(epoch), 'w')
    
    XSig_box = []

    Xbkg_box = []

    Xbkg_tt_box = []

    Xbkg_tw_box = []

    Xbkg_tth_box = []

    Xbkg_ttv_box = []

    Xbkg_llbj_box = []

    Xbkg_tatabb_box = []

    nn = 10000

    Ival = 0.9

    Xreco_Sig = 0.0214964

    Xreco_tt = 120.907 * 1.596

    Xreco_tw = 4.38354

    Xreco_tth = 0.15258 * 1.27

    Xreco_ttv = 0.157968 * 1.54

    Xreco_llbj = 1.22936

    Xreco_tatabb = 0.011392

    for j in range(0, nn):

        roc_sig = 0
        roc_bkg_tt = 0
        roc_bkg_tw = 0
        roc_bkg_tth = 0
        roc_bkg_ttv = 0
        roc_bkg_llbj = 0
        roc_bkg_tatabb = 0

        for i in range(0, len(Results_hh_1)):
            if Results_hh_1[i] > float( Ival + float(0.1*j)/float(nn) ) :
                roc_sig = roc_sig + 1
            
        for i in range(0, len(Results_tt_1 )):
            if Results_tt_1[i] > float( Ival + float(0.1*j)/float(nn) ) :
                roc_bkg_tt = roc_bkg_tt + 1
            
        for i in range(0, len(Results_tw_1) ):
            if Results_tw_1[i] > float( Ival + float(0.1*j)/float(nn) ) :
                roc_bkg_tw = roc_bkg_tw + 1

        for i in range(0, len(Results_tth_1 )):
            if Results_tth_1[i] > float( Ival + float(0.1*j)/float(nn) ) :
                roc_bkg_tth = roc_bkg_tth + 1
            
        for i in range(0, len(Results_ttv_1) ):
            if Results_ttv_1[i] > float( Ival + float(0.1*j)/float(nn) ) :
                roc_bkg_ttv = roc_bkg_ttv + 1

        for i in range(0, len(Results_llbj_1 )):
            if Results_llbj_1[i] > float( Ival + float(0.1*j)/float(nn) ) :
                roc_bkg_llbj = roc_bkg_llbj + 1
            
        for i in range(0, len(Results_tatabb_1) ):
            if Results_tatabb_1[i] > float( Ival + float(0.1*j)/float(nn) ) :
                roc_bkg_tatabb = roc_bkg_tatabb + 1
            
        XSig_box.append( float( float(Xreco_Sig)*float( roc_sig ) / float( len(Results_hh_1) ) )   )

        Xbkg_box.append( float( float(Xreco_tt)*float( roc_bkg_tt ) / float( len(Results_tt_1) ) ) + float( float(Xreco_tw)*float( roc_bkg_tw ) / float( len(Results_tw_1) ) ) + float( float(Xreco_tth)*float( roc_bkg_tth ) / float( len(Results_tth_1) ) ) + float( float(Xreco_ttv)*float( roc_bkg_ttv ) / float( len(Results_ttv_1) ) ) + float( float(Xreco_llbj)*float( roc_bkg_llbj ) / float( len(Results_llbj_1) ) ) + float( float(Xreco_tatabb)*float( roc_bkg_tatabb ) / float( len(Results_tatabb_1) ) )   )

        Xbkg_tt_box.append( float( float(Xreco_tt)*float( roc_bkg_tt ) / float( len(Results_tt_1) ) )  )

        Xbkg_tw_box.append( float( float(Xreco_tw)*float( roc_bkg_tw ) / float( len(Results_tw_1) ) )   )

        Xbkg_tth_box.append( float( float(Xreco_tth)*float( roc_bkg_tth ) / float( len(Results_tth_1) ) )  )

        Xbkg_ttv_box.append( float( float(Xreco_ttv)*float( roc_bkg_ttv ) / float( len(Results_ttv_1) ) )   )

        Xbkg_llbj_box.append( float( float(Xreco_llbj)*float( roc_bkg_llbj ) / float( len(Results_llbj_1) ) )  )

        Xbkg_tatabb_box.append( float( float(Xreco_tatabb)*float( roc_bkg_tatabb ) / float( len(Results_tatabb_1) ) )   )    

    for j in range(0, len(XSig_box) ):

        if float( Xbkg_box[j] ) == 0 :
            break

        Nsig = round( float( 3000*XSig_box[j]*(0.8**2/0.7**2) ), 3)

        Nbkg = round( float( 3000*Xbkg_box[j]*(0.8**2/0.7**2) ), 3)
    
        Nbkg_tt = round( float( 3000*Xbkg_tt_box[j]*(0.8**2/0.7**2) ), 3)
    
        Nbkg_tw = round( float( 3000*Xbkg_tw_box[j]*(0.8**2/0.7**2) ), 3)
    
        Nbkg_tth = round( float( 3000*Xbkg_tth_box[j]*(0.8**2/0.7**2) ), 3)
    
        Nbkg_ttv = round( float( 3000*Xbkg_ttv_box[j]*(0.8**2/0.7**2) ), 3)
    
        Nbkg_llbj = round( float( 3000*Xbkg_llbj_box[j]*(0.8**2/0.7**2) ), 3)
    
        Nbkg_tatabb = round( float( 3000*Xbkg_tatabb_box[j]*(0.8**2/0.7**2) ), 3)

        SobSqrtB = round( float( IndiLikeRatioDis(float( Nsig ),float( Nbkg ) )  ) ,   3 )
        ROC_Results.write(str(Nsig) + ' ' + str(Nbkg) + ' ' + str(SobSqrtB) + ' ' + str(Nbkg_tt) + ' ' + str(Nbkg_tw) + ' ' + str(Nbkg_tth) + ' ' + str(Nbkg_ttv) + ' ' + str(Nbkg_llbj) + ' ' + str(Nbkg_tatabb)   ) 
        ROC_Results.write('\n')

    ROC_Results.close()
    
    ROC_Results= np.loadtxt('/data/github/result/Fully_Connected_Layer_ROC.TXT'+str(epoch))
    
    SB=[]
    hh=[]
    tt=[]
    tw=[]
    tth=[]
    ttv=[]
    llbj=[]
    tatabb=[]

    for n in range(len(ROC_Results)):  
        SB.append(ROC_Results[n][2])
        hh.append(ROC_Results[n][0])
        tt.append(ROC_Results[n][3])
        tw.append(ROC_Results[n][4])
        tth.append(ROC_Results[n][5])
        ttv.append(ROC_Results[n][6])
        llbj.append(ROC_Results[n][7])
        tatabb.append(ROC_Results[n][8])
        
    plt.plot(hh,SB, color='r', label='Significance')
    plt.xlabel(r'$ N_s $', fontsize=20)
    plt.ylabel(r'Significance', fontsize=20)
    plt.legend(loc='best', fontsize=15)
    plt.show()
    
    j=SB.index(max(SB))
    print('\nsignificance: {:.3f} hh: {:.3f} tt: {:.3f} tw: {:.3f} tth: {:.3f} ttv: {:.3f} llbj: {:.3f} tatabb: {:.3f} \n'.format(SB[j], hh[j], tt[j], tw[j], tth[j], ttv[j], llbj[j], tatabb[j]))         
 
    if epoch % 1 == 0:
        sig = max(SB)
        best_sig = max(best_sig,sig)           
        
    end=time.time()
    
    print('* Best Significance : {:.3f} *'.format(best_sig))

    print('Epoch time: {:.2f} mins'.format((end-start)/60))
    print('='*69)         