In [1]:
import argparse
import pandas as pd
import numpy as np
import math
import h5py
from sklearn.model_selection import train_test_split
import joblib
import pickle
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import roc_curve, auc
import tensorflow as tf
import sys
import gc
import logging
import keras_tuner as kt
import os

# import setGPU
import tensorflow.keras as keras
import tensorflow_model_optimization as tfmot
tsk = tfmot.sparsity.keras
from tensorflow.keras import layers
from tensorflow.keras import backend as K
tf.keras.mixed_precision.set_global_policy('mixed_float16')
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
from tensorflow.keras.layers import (
    Lambda,
    Input,
    Dense,
    Conv2D,
    AveragePooling2D,
    MaxPooling2D,
    UpSampling2D,
    ZeroPadding2D,
    Conv2DTranspose,
    BatchNormalization,
    Flatten,
    Reshape,
    Activation,
    ReLU,
    LeakyReLU,
    Dropout,
    Concatenate,
    Cropping1D,
    Layer,
    )

from datetime import datetime
from tensorboard import program
import os
import pathlib
import matplotlib as mpl
import matplotlib.pyplot as plt
try:
    import mplhep as hep
    hep.style.use(hep.style.ROOT)
    print("Using MPL HEP for ROOT style formating")
except:
    print("Instal MPL HEP for style formating")
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=["#DB4437", "#4285F4", "#F4B400", "#0F9D58", "purple", "goldenrod", "peru", "coral","turquoise",'gray','navy','m','darkgreen','fuchsia','steelblue']) 

from autoencoder_classes import AE,VAE
from neptunecontrib.monitoring.keras import NeptuneMonitor
from losses import mse_split_loss, radius, kl_loss
from functions import make_mse_loss_numpy
from data_preprocessing import prepare_data
from model import build_AE, build_VAE, Sampling

def return_total_loss(loss, bsm_t, bsm_pred):
    total_loss = loss(bsm_t, bsm_pred.astype(np.float32))
    return total_loss

from qkeras.quantizers import quantized_bits
from keras.utils import tf_utils
quantize=False

import time
ktuner_results = f"{int(time.time())}"

Your GPU may run slowly with dtype policy mixed_float16 because it does not have compute capability of at least 7.0. Your GPU:
  Tesla P100-PCIE-12GB, compute capability 6.0
See https://developer.nvidia.com/cuda-gpus for a list of GPUs and their compute capabilities.
Using MPL HEP for ROOT style formating


In [2]:
def mse_loss(inputs, outputs):
    # remove last dimension
    inputs = tf.reshape(inputs, (tf.shape(inputs)[0],19,3))
    outputs = tf.reshape(outputs, (tf.shape(outputs)[0],19,3))
    
    mask0 = tf.math.not_equal(inputs[:,:,0],0)
    mask1 = tf.math.not_equal(inputs[:,:,1],0)
    mask2 = tf.math.not_equal(inputs[:,:,2],0)
    mask = tf.math.logical_and(mask0, mask1)
    mask = tf.math.logical_and(mask, mask2)
    # tf.print(mask)
    mask = tf.cast(mask, tf.float32)
    mask = tf.reshape(mask, (tf.shape(mask)[0],19,1))

    # remove zero entries
    loss = reco_scale*tf.reduce_mean(tf.square(inputs[:,:,:]-outputs[:,:,:])*mask)
    return loss

def mse_loss_numpy(inputs, outputs):
    # remove last dimension
    inputs = np.reshape(inputs, (inputs.shape[0],19,3))
    outputs = np.reshape(outputs, (outputs.shape[0],19,3))
    
    mask0 = inputs[:,:,0]!=0
    mask1 = inputs[:,:,1]!=0
    mask2 = inputs[:,:,2]!=0
    mask = (mask0 + mask1 + mask2)*1
    mask = np.reshape(mask, (mask.shape[0],19,1))
    inputs = inputs*mask
    outputs = outputs*mask

    # remove zero entries
    loss = np.mean(np.square(inputs.reshape(inputs.shape[0],57)-outputs.reshape(outputs.shape[0],57)),axis=1)
    return loss

def radius(mean, logvar):
    sigma = np.sqrt(np.exp(logvar))
    radius = mean*mean/sigma/sigma
    return np.sum(radius, axis=-1)

def kl_loss(mu, logvar, beta=None):
    kl_loss = 1 + logvar - np.square(mu) - np.exp(logvar)
    kl_loss = np.mean(kl_loss, axis=-1) # mean over latent dimensions
    kl_loss *= -0.5
    if beta!=None: return beta*kl_loss
    else: return kl_loss


class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

mse = tf.keras.losses.MeanSquaredError()

def total_objective(vae, output):
    
    for key in output.keys():
        Y_predict, z_mean , z_logvar = vae.predict(output[key]['target'].reshape(output[key]['target'].shape[0],57),batch_size=1024*4,return_latent=True)
        Y_predict = Y_predict.reshape(Y_predict.shape[0],19,3)
        output[key]['reco_loss'] = mse_loss_numpy(output[key]['target'], Y_predict)
        output[key]['kl_loss'] = kl_loss(z_mean, z_logvar)
        output[key]['total_loss'] = output[key]['reco_loss'] + output[key]['kl_loss']
    
    qcd = output['ZeroBias']['target']
    bsm = output['haa4b_ma15_powheg']['target']

    total_qcd = output['ZeroBias']['total_loss']
    total_bsm = output['haa4b_ma15_powheg']['total_loss']

    total_true_val = np.concatenate((np.ones(total_bsm.shape[0]), np.zeros(total_qcd.shape[0])))
    total_pred_val = np.nan_to_num(np.concatenate((total_bsm, total_qcd)))

    total_fpr_loss, total_tpr_loss, total_threshold_loss = roc_curve(total_true_val, total_pred_val)
    total_objective = np.interp(10**(-5), total_fpr_loss, total_tpr_loss)

    return total_objective

In [17]:
def make_model(hp):
    input_shape=57
    inputs = keras.Input(shape=(input_shape,))
    x = layers.Dense(hp.Int("inputs",16,256,32),kernel_initializer='lecun_uniform', activation='relu')(inputs)
    
    for i in range(hp.Int("n_encoder_layers",1, 5)):
        x = layers.Dense(hp.Int("encoder_layer_width",16,256,32),kernel_initializer='lecun_uniform', activation='relu')(x)
    
    z_width = hp.Int("latent_dim_width",2,12,1)
    
    z_mean = layers.Dense(z_width,kernel_initializer='zeros')(x)
    z_mean = tf.cast(z_mean, tf.float16)
    z_logvar = layers.Dense(z_width,kernel_initializer='zeros')(x)
    z_logvar = tf.cast(z_logvar, tf.float16)
    z = Sampling()([z_mean, z_logvar])
    encoder = keras.Model(inputs, [z_mean, z_logvar, z], name="encoder")
    encoder.summary()

    latent_inputs = keras.Input(hp.Int("latent_dim_width",2,12,1),)
    y = layers.Dense(hp.Int("layer_width",16,256,32),kernel_initializer='lecun_uniform', activation='relu')(latent_inputs)
    
    for i in range(hp.Int("n_decoder_layers",1, 5)):
        y = layers.Dense(hp.Int("decoder_layer_width",16,256,32),kernel_initializer='lecun_uniform', activation='relu')(y)
    
    decoded = layers.Dense(input_shape)(y)
    decoder = keras.Model(latent_inputs, decoded, name="decoder")
    decoder.summary()
    
    vae = VAE(encoder, decoder)
    obj = total_objective(vae, output)
    
    vae.compile(optimizer=keras.optimizers.Adam(),
                metrics=[mse_loss, kl_loss(z_mean, z_logvar), obj])
    
    return vae

In [14]:
def optimization(input_qcd, input_bsm, beta):
    
    # magic trick to make sure that Lambda function works
    tf.compat.v1.disable_eager_execution()
    
    global output
    output={}
    
    with h5py.File(input_qcd, 'r') as h5f:
        output['ZeroBias'] = {}
    
        data = np.array(h5f['full_data_cyl'][:events], dtype=np.float16)
        ET = np.array(h5f['ET'][:events], dtype=np.float16)
        L1bit = np.array(h5f['L1bit'][:events], dtype=np.int8)

        #mask saturated ET
        mask_ET = ET<2047.5
        ET = ET[mask_ET]
        data = data[mask_ET]
        L1bit = L1bit[mask_ET]
    
        #mask saturated PT
        mask_0  = data[:,0,0]<2047.5
        mask_1_9  = data[:,1:9,0]<255.5
        mask_9_20  = data[:,9:20,0]<1023.5
        mask = np.concatenate((mask_0[:,np.newaxis],mask_1_9,mask_9_20),axis=1)*1
        data = data*mask[:,:,np.newaxis]

        pt = np.copy(data[:,:,0])
        eta = np.copy(data[:,:,1])
        phi = np.copy(data[:,:,2])
    
        data[:,:,0] = pt*np.cos(phi)
        data[:,:,1] = pt*np.sin(phi)
        data[:,:,2] = pt*np.sinh(eta)
        data_target = np.copy(data)

        del pt, eta, phi, mask_ET, mask_0, mask_1_9, mask_9_20, mask
    
        if(norm=='ET'):
            data_target[:,:,:] = data[:,:,:]/ET[:,None,None]
            std_xy = (np.std(data_target[:,:,0])+np.std(data_target[:,:,1]))/2
            std_z = np.std(data_target[:,:,2])
            data_target[:,:,2] = data_target[:,:,2]*(std_xy/std_z)
        elif(norm=='std'):
            mean_qcd = np.mean(data_target, axis=0)
            std_qcd = np.std(data_target, axis=0)
            data_target = (data_target[:,:,:] - mean_qcd[None,:,:])/std_qcd[None,:,:]

            # mean_qcd = np.array([np.mean(data_target[:,:,0]),np.mean(data_target[:,:,1]),np.mean(data_target[:,1:20,2])])
            # std_qcd = np.array([np.std(data_target[:,:,0]),np.std(data_target[:,:,1]),np.std(data_target[:,1:20,2])])
            # data_target[:,:,0] = (data_target[:,:,0]-mean_qcd[0])/std_qcd[0]
            # data_target[:,:,1] = (data_target[:,:,1]-mean_qcd[1])/std_qcd[1]
            # data_target[:,:,2] = (data_target[:,:,2]-mean_qcd[2])/std_qcd[2] 
            data_target[:,0,2] = 0
        else:
            data_target[:,0,:] = data[:,0,:]/2048
            data_target[:,1:9,:] = data[:,1:9,:]/256
            data_target[:,9:20,:] = data[:,9:20,:]/1024
        

        X_train, output['ZeroBias']['data'], Y_train, output['ZeroBias']['target'], _ , output['ZeroBias']['ET'], _ ,output['ZeroBias']['L1bit'] =  train_test_split( data, data_target, ET,L1bit, test_size=0.5)

        X_train = X_train.reshape(X_train.shape[0], X_train.shape[1]*X_train.shape[2])
        Y_train = Y_train.reshape(Y_train.shape[0], Y_train.shape[1]*Y_train.shape[2])

        del data, data_target, ET, L1bit
        
    with h5py.File(input_bsm,'r') as h5f2:
        for key in h5f2.keys():
            if('TT' not in key[:2]) and ('haa4b_ma15_powheg' not in key) and ('GluGluToHHTo4B_cHHH1' not in key): continue
            if len(h5f2[key].shape) < 3: continue
            
            output[str(key)] = {}
            output[str(key)]['data'] = np.array(h5f2[str(key)][:events,:,:],dtype=np.float16)
            output[str(key)]['ET'] = np.array(h5f2[str(key)+'_ET'][:events],dtype=np.float16)
            output[str(key)]['L1bit'] = np.array(h5f2[str(key)+'_l1bit'][:events],dtype=np.int8)

            #mask saturated ET
            mask_ET = output[str(key)]['ET']<2047.5
            output[str(key)]['ET'] = output[str(key)]['ET'][mask_ET]
            output[str(key)]['data'] = output[str(key)]['data'][mask_ET]
            output[str(key)]['L1bit'] = output[str(key)]['L1bit'][mask_ET]
        
            #mask saturated PT
            mask_0  = output[str(key)]['data'][:,0,0]<2047.5
            mask_1_9  = output[str(key)]['data'][:,1:9,0]<255.5
            mask_9_20  = output[str(key)]['data'][:,9:20,0]<1023.5
            mask = np.concatenate((mask_0[:,np.newaxis],mask_1_9,mask_9_20),axis=1)*1
            output[str(key)]['data'] = output[str(key)]['data']*mask[:,:,np.newaxis]

            pt = np.copy(output[str(key)]['data'][:,:,0])
            eta = np.copy(output[str(key)]['data'][:,:,1])
            phi = np.copy(output[str(key)]['data'][:,:,2])
        
            output[str(key)]['data'][:,:,0] = pt*np.cos(phi)
            output[str(key)]['data'][:,:,1] = pt*np.sin(phi)
            output[str(key)]['data'][:,:,2] = pt*np.sinh(eta)

            del pt, eta, phi, mask_ET, mask_0, mask_1_9, mask_9_20, mask


            output[str(key)]['target'] = np.copy(output[str(key)]['data'])
            if(norm=='ET'):
                output[str(key)]['target'] = output[str(key)]['data']/output[str(key)]['ET'][:,None,None]
                output[str(key)]['target'][:,:,2] = output[str(key)]['target'][:,:,2]*(std_xy/std_z)
            elif(norm=='std'):
                output[str(key)]['target'] = (output[str(key)]['target'] - mean_qcd[None,:,:])/std_qcd[None,:,:]
                # output[str(key)]['target'][:,:,0]= (output[str(key)]['data'][:,:,0]-mean_qcd[0])/std_qcd[0]
                # output[str(key)]['target'][:,:,1]= (output[str(key)]['data'][:,:,1]-mean_qcd[1])/std_qcd[1]
                # output[str(key)]['target'][:,:,2]= (output[str(key)]['data'][:,:,2]-mean_qcd[2])/std_qcd[2]
                output[str(key)]['target'][:,0,2] = 0
            elif(norm=='max_PT'):
                output[str(key)]['target'][:,0,:] = output[str(key)]['data'][:,0,:]/2048
                output[str(key)]['target'][:,1:9,:] = output[str(key)]['data'][:,1:9,:]/256
                output[str(key)]['target'][:,9:20,:] = output[str(key)]['data'][:,9:20,:]/1024
        
    ktuner = kt.BayesianOptimization(
            hypermodel=make_model,
            objective = kt.Objective('val_total_objective', direction='min'),
            max_trials = 1,
            executions_per_trial = 3,
            directory = ktuner_results)
    
    ktuner.search(x=X_train,
                 y=Y_train,
                 epochs=5,
                 batch_size=1024,
                 validation_split=0.2,
                 callbacks=[tf.keras.callbacks.EarlyStopping('val_loss',patience=5)])
    
    with open(f"ktuner_{int(time.time())}.pkl", "wb") as f:
        pickle.dump(ktuner, f)
    
    ktuner.results_summary()
    
    logging.info('Get the optimal hyperparameters')
    best_hps = ktuner.get_best_hyperparameters(num_trials=5)[0]
    logging.info('Getting and printing best hyperparameters!')
    print(best_hps)
        

In [15]:
input_hardqcd="/eos/uscms/store/group/lpctrig/jngadiub/L1TNtupleRun3-h5-extended-v2/QCD_preprocessed.h5"
input_qcd="/eos/uscms/store/group/lpctrig/jngadiub/L1TNtupleRun3-ZB-h5-extended-v2/ZB_preprocessed.h5"
input_bsm = "/eos/uscms/store/group/lpctrig/jngadiub/L1TNtupleRun3-h5-extended-v2-120X/BSM_preprocessed.h5"
events=500000
norm = 'std'
beta = 0.8
reco_scale = 1000

In [18]:
optimization(input_qcd, input_bsm, beta)

TypeError: in user code:

    File "/tmp/ipykernel_195759/1538566277.py", line 57, in call  *
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

    TypeError: Input 'y' of 'Mul' Op has type float32 that does not match type float16 of argument 'x'.
