In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from matplotlib import pyplot as plt
from matplotlib import colors as mcolors
from sklearn.metrics import roc_curve
from scipy import interpolate

In [2]:
import sys
import logging

nblog = open("nb_2.log", "a+")
sys.stdout.echo = nblog
sys.stderr.echo = nblog

get_ipython().log.handlers[0].stream = nblog
get_ipython().log.setLevel(logging.INFO)

%autosave 5

Autosaving every 5 seconds


In [3]:
os.chdir(r'/home/manhducnmd/pp_dijet/Results_full')

In [4]:
np.random.seed(42)

In [5]:
tf.random.set_seed(42)

In [6]:
def CWoLA(p_i):
    
    input_1 = keras.layers.Input(shape = (p_i,p_i,1), name = "jet_1")
    y = keras.layers.BatchNormalization()(input_1)
    input_2 = keras.layers.Input(shape = (p_i,p_i,1), name = "jet_2")
    z = keras.layers.BatchNormalization()(input_2)
    
    x = keras.layers.Concatenate(axis = 0)([y, z])

    x = keras.layers.Conv2D(64, (5,5), padding = 'same')(input_1)
    x = keras.layers.MaxPool2D(pool_size = (2,2))(x)
        
    x = keras.layers.Conv2D(64, (5, 5), padding='same')(x)
    x = keras.layers.MaxPool2D(pool_size = (2,2))(x)

    x = keras.layers.Conv2D(128, (3,3), padding='same')(x)
    x = keras.layers.MaxPool2D(pool_size = (2,2))(x)
    
    x = keras.layers.Conv2D(128, (3,3))(x)
    x = keras.layers.Flatten()(x)
    
    x = keras.layers.Dense(128, activation = 'relu')(x)
    x = keras.layers.Dense(128, activation = 'relu')(x)
    x = keras.layers.Dense(128, activation = 'relu')(x)
    
    x = keras.layers.Dense(1, activation = 'sigmoid')(x)
    
    
    return keras.Model(         
    inputs=[input_1, input_2],
    outputs=x,)

In [None]:
n_train_sr_bkg = 25000

mean_results = []
std_results = []
for p_1 in [25]:
    bkg_sr_jet_1 = np.load(f'background_images_sr_{p_1}_jet_1.npy')
    bkg_sr_jet_2 = np.load(f'background_images_sr_{p_1}_jet_2.npy')
    
    bkg_sb_jet_1 = np.load(f'background_images_sb_{p_1}_jet_1.npy')
    bkg_sb_jet_2 = np.load(f'background_images_sb_{p_1}_jet_2.npy')
    
    sb_sr_bkg = np.shape(bkg_sb_jet_1)[0]/np.shape(bkg_sr_jet_2)[0]
    #n_val_sr_bkg = 5000
    n_train_sb_bkg = int(np.round(n_train_sr_bkg*sb_sr_bkg))
    #n_val_sb_bkg = int(np.round(n_val_sr_bkg*sb_sr_bkg))
    
    #print(n_train_sb_bkg, n_val_sb_bkg)
    n_test_sr_bkg = 20000
    #First jet, background, SR
    bkg_train_sr_1 = bkg_sr_jet_1[0:n_train_sr_bkg]
    #bkg_val_sr_1 = bkg_sr_jet_1[n_train_sr_bkg:n_train_sr_bkg+n_val_sr_bkg]
    bkg_test_1 = bkg_sr_jet_1[n_train_sr_bkg:n_train_sr_bkg+n_test_sr_bkg]
    #Second jet, background, SR
    bkg_train_sr_2 = bkg_sr_jet_2[0:n_train_sr_bkg]
    #bkg_val_sr_2 = bkg_sr_jet_2[n_train_sr_bkg:n_train_sr_bkg+n_val_sr_bkg]
    bkg_test_2 = bkg_sr_jet_2[n_train_sr_bkg:n_train_sr_bkg+n_test_sr_bkg]
    #First jet, background, SB
    bkg_train_sb_1 = bkg_sb_jet_1[0:n_train_sb_bkg]
    #bkg_val_sb_1 = bkg_sb_jet_1[n_train_sb_bkg:n_train_sb_bkg+n_val_sb_bkg]
    #Second jet, background, SB
    bkg_train_sb_2 = bkg_sb_jet_2[0:n_train_sb_bkg]
    #bkg_val_sb_2 = bkg_sb_jet_2[n_train_sb_bkg:n_train_sb_bkg+n_val_sb_bkg]

    #print(np.shape(bkg_train_sr_1),np.shape(bkg_val_sr_1),np.shape(bkg_test_1), np.shape(bkg_train_sb_1), np.shape(bkg_val_sb_1))
    #print(np.shape(bkg_train_sr_2),np.shape(bkg_val_sr_2),np.shape(bkg_test_2), np.shape(bkg_train_sb_2), np.shape(bkg_val_sb_2))
    
    #masked_data = np.ma.masked_equal(np.mean(bkg_test_1, axis = 0), 0.)

    # Create a colormap based on 'jet'
    #jet = plt.cm.jet

    # Create a custom colormap with white for masked values
    #colors = jet(np.linspace(0, 1, 256))
    #custom_cmap = mcolors.ListedColormap(colors)
    # Use a logarithmic norm, avoiding zero values
    #norm = mcolors.LogNorm(vmin=10**-3, vmax=10**3)

    # Plot using imshow with the custom colormap and logarithmic normalization
    #plt.figure(figsize = (5,5))
    #plt.imshow(masked_data, cmap=custom_cmap, norm=norm, interpolation='nearest')
    #cbar = plt.colorbar()
    #cbar.set_label("P_T")
    #plt.xlabel("Eta")
    #plt.ylabel("Phi")
    #plt.gca().invert_yaxis()
    #plt.show()
    
    signal_sr_jet_1 = np.load(f'dd10_sr_{p_1}_jet_1.npy')
    signal_sr_jet_2 = np.load(f'dd10_sr_{p_1}_jet_2.npy')
    
    signal_sb_jet_1 = np.load(f'dd10_sb_{p_1}_jet_1.npy')
    signal_sb_jet_2 = np.load(f'dd10_sb_{p_1}_jet_2.npy')
    
    #print(np.shape(signal_sr_jet_1), np.shape(signal_sr_jet_2), np.shape(signal_sb_jet_1), np.shape(signal_sb_jet_2))
    
    sb_sr_signal = np.shape(signal_sb_jet_1)[0]/np.shape(signal_sr_jet_1)[0]
    for n_train_sr_signal in [50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1100, 1200]:
        #n_val_sr_signal = int(np.round(n_train_sr_signal/4))

        n_train_sb_signal = int(np.round(n_train_sr_signal*sb_sr_signal))
        #n_val_sb_signal = int(np.round(n_val_sr_signal*sb_sr_signal))
        #print(n_train_sr_signal, n_val_sr_signal)
        n_test_sr_signal = 20000

        #First jet, signal, SR
        signal_train_sr_1 = signal_sr_jet_1[0:n_train_sr_signal]
        #signal_val_sr_1 = signal_sr_jet_1[n_train_sr_signal:n_train_sr_signal+n_val_sr_signal]
        signal_test_1 = signal_sr_jet_1[n_train_sr_signal:n_train_sr_signal+n_test_sr_signal]

        #Second jet, signal, SR
        signal_train_sr_2 = signal_sr_jet_2[0:n_train_sr_signal]
        #signal_val_sr_2 = signal_sr_jet_2[n_train_sr_signal:n_train_sr_signal+n_val_sr_signal]
        signal_test_2 = signal_sr_jet_2[n_train_sr_signal:n_train_sr_signal+n_test_sr_signal]

        #First jet, signal, SB
        signal_train_sb_1 = signal_sb_jet_1[0:n_train_sb_signal]
        #signal_val_sb_1 = signal_sb_jet_1[n_train_sb_signal:n_train_sb_signal+n_val_sb_signal]

        #Second jet, signal, SB
        signal_train_sb_2 = signal_sb_jet_2[0:n_train_sb_signal]
        #signal_val_sb_2 = signal_sb_jet_2[n_train_sb_signal:n_train_sb_signal+n_val_sb_signal]

        #print(np.shape(signal_train_sr_1),np.shape(signal_val_sr_1),np.shape(signal_test_1), np.shape(signal_train_sb_1), np.shape(signal_val_sb_1))
        #print(np.shape(signal_train_sr_2),np.shape(signal_val_sr_2),np.shape(signal_test_2), np.shape(signal_train_sb_2), np.shape(signal_val_sb_2)) 
        
        #masked_data = np.ma.masked_equal(np.mean(signal_train_sr_1, axis = 0), 0.)

        # Create a colormap based on 'jet'
        #jet = plt.cm.jet

        # Create a custom colormap with white for masked values
        #colors = jet(np.linspace(0, 1, 256))
        #custom_cmap = mcolors.ListedColormap(colors)
        # Use a logarithmic norm, avoiding zero values
        #norm = mcolors.LogNorm(vmin=10**-3, vmax=10**3)

        # Plot using imshow with the custom colormap and logarithmic normalization
        #plt.figure(figsize = (5,5))
        #plt.imshow(masked_data, cmap=custom_cmap, norm=norm, interpolation='nearest')
        #cbar = plt.colorbar()
        #cbar.set_label("P_T")
        #plt.xlabel("Eta")
        #plt.ylabel("Phi")
        #plt.gca().invert_yaxis()
        #plt.show()
        
        train_sr_1 = np.concatenate((signal_train_sr_1, bkg_train_sr_1))
        train_sr_2 = np.concatenate((signal_train_sr_2, bkg_train_sr_2))

        train_label_sr = np.ones(np.shape(train_sr_1)[0])
        
        train_sb_1 = np.concatenate((signal_train_sb_1, bkg_train_sb_1))
        train_sb_2 = np.concatenate((signal_train_sb_2, bkg_train_sb_2))

        train_label_sb = np.zeros(np.shape(train_sb_1)[0])
        
        #val_sr_1 = np.concatenate((signal_val_sr_1, bkg_val_sr_1))
        #val_sr_2 = np.concatenate((signal_val_sr_2, bkg_val_sr_2))

        #val_label_sr = np.ones(np.shape(val_sr_1)[0])
        
        #val_sb_1 = np.concatenate((signal_val_sb_1, bkg_val_sb_1))
        #val_sb_2 = np.concatenate((signal_val_sb_2, bkg_val_sb_2))

        #val_label_sb = np.zeros(np.shape(val_sb_1)[0])
        
        test_label_signal = np.ones(np.shape(signal_test_1)[0])
        test_label_bkg = np.zeros(np.shape(bkg_test_1)[0])
        
        x_train_1 = np.concatenate((train_sr_1, train_sb_1))
        x_train_2 = np.concatenate((train_sr_2, train_sb_2))
        y_train = np.concatenate((train_label_sr, train_label_sb))

        #x_val_1 = np.concatenate((val_sr_1, val_sb_1))
        #x_val_2 = np.concatenate((val_sr_2, val_sb_2))
        #y_val = np.concatenate((val_label_sr, val_label_sb))
        
        x_test_1 = np.concatenate((signal_test_1, bkg_test_1))
        x_test_2 = np.concatenate((signal_test_2, bkg_test_2))
        y_test = np.concatenate((test_label_signal, test_label_bkg))
        
        x_train_1 = x_train_1.reshape((np.shape(x_train_1)[0], p_1, p_1, 1))
        x_train_2 = x_train_2.reshape((np.shape(x_train_2)[0], p_1, p_1, 1))
        #x_val_1 = x_val_1.reshape((np.shape(x_val_1)[0], p_1, p_1, 1))
        #x_val_2 = x_val_2.reshape((np.shape(x_val_2)[0], p_1, p_1, 1))
        x_test_1 = x_test_1.reshape((np.shape(x_test_1)[0],p_1,p_1,1))
        x_test_2 = x_test_2.reshape((np.shape(x_test_2)[0],p_1,p_1,1))
        epsilon_results = []
        for i in range(10):
            cwola = CWoLA(p_1)
            loss_object = keras.losses.BinaryCrossentropy()
            optimizer = keras.optimizers.Adam(learning_rate = 1e-4)
            cwola.compile(loss = loss_object, optimizer = optimizer, metrics = ['accuracy'])
            early_stopping = keras.callbacks.EarlyStopping(monitor = 'val_loss', patience=30, restore_best_weights=True)
            cwola.fit({'jet_1': x_train_1, 'jet_2': x_train_2}, y_train, validation_split = 0.2,
              shuffle = True, batch_size = 500, callbacks = [early_stopping], epochs = 1000)  
            #validation_data=({'jet_1': x_val_1, 'jet_2': x_val_2}, y_val), 
        
            x_predict = cwola.predict([x_test_1, x_test_2])
            fpr, tpr, th = roc_curve(y_test, x_predict)
            f = interpolate.interp1d(fpr, tpr)
            epsilon_s = f([0.001, 0.01, 0.1])
            print(epsilon_s)
            epsilon_results.append(epsilon_s)
        epsilon_results = np.array(epsilon_results)
        mean_results.append(np.mean(epsilon_results, axis = 0))
        std_results.append(np.std(epsilon_results, axis = 0))
    

mean_results = np.array(mean_results)
std_results = np.array(std_results)
os.chdir(r'/home/manhducnmd/pp_dijet/Results_full')
with open('cwola_mean_dd10.npy','wb') as f:
    np.save(f, mean_results)
with open('cwola_std_dd10.npy', 'wb') as g:
    np.save(g, std_results)
    