In [None]:
import sys
sys.path.append("..")
import numpy as np
import h5py
import setGPU

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Lambda, BatchNormalization, Activation, Concatenate, Dropout, Layer
from tensorflow.keras.layers import ReLU, LeakyReLU
from tensorflow.keras import backend as K
import math

from datetime import datetime
from tensorboard import program
import os
import pathlib
import matplotlib.pyplot as plt
%matplotlib inline

from functions import load_model, save_model, make_mse_loss
import pickle
from autoencoder_classes import AE

In [None]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

## Load data

In [None]:
# Data = (N,19,3,1).flatten()
with open('/afs/cern.ch/work/e/egovorko/public/data_20000000.pickle', 'rb') as f:
    _, _, X_test_flatten, X_test_scaled, all_bsm_data, pt_scaler = pickle.load(f)

In [None]:
X_train_flatten = X_train_flatten.reshape(X_train_flatten.shape[0], X_train_flatten.shape[1]*X_train_flatten.shape[2])

In [None]:
X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1]*X_train_scaled.shape[2])

In [None]:
X_test_flatten = X_test_flatten.reshape(X_test_flatten.shape[0], X_test_flatten.shape[1]*X_test_flatten.shape[2])

In [None]:
X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1]*X_test_scaled.shape[2])

## Define autoencoder model

In [None]:
latent_dim = 3
input_shape = 57

In [None]:
#encoder
inputArray = Input(shape=(input_shape))
x = BatchNormalization()(inputArray)
x = Dense(32, kernel_initializer=tf.keras.initializers.HeUniform())(x)
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.3)(x)
x = Dense(16, kernel_initializer=tf.keras.initializers.HeUniform())(x)
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.3)(x)
encoder = Dense(latent_dim, kernel_initializer=tf.keras.initializers.HeUniform())(x)
# x = BatchNormalization()(x)
# encoder = LeakyReLU(alpha=0.3)(x)
#decoder
x = Dense(16, kernel_initializer=tf.keras.initializers.HeUniform())(encoder)
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.3)(x)
x = Dense(32, kernel_initializer=tf.keras.initializers.HeUniform())(x)
x = BatchNormalization()(x)
x = LeakyReLU(alpha=0.3)(x)
decoder = Dense(input_shape, kernel_initializer=tf.keras.initializers.HeUniform())(x)

#create autoencoder
autoencoder = Model(inputs = inputArray, outputs=decoder)
autoencoder.summary()

In [None]:
ae = AE(autoencoder)
ae.compile(optimizer=keras.optimizers.Adam(lr=0.00001))

## Train model

In [None]:
EPOCHS = 150
BATCH_SIZE = 1024

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
from neptunecontrib.monitoring.keras import NeptuneMonitor
callbacks=[]
callbacks.append(ReduceLROnPlateau(monitor='val_loss',  factor=0.1, patience=2, verbose=1, mode='auto', min_delta=0.0001, cooldown=2, min_lr=1E-6))
callbacks.append(TerminateOnNaN())
callbacks.append(NeptuneMonitor())
callbacks.append(tf.keras.callbacks.EarlyStopping(monitor='val_loss',verbose=1, patience=10, restore_best_weights=True))

In [None]:
print("TRAINING")
history = ae.fit(X_train_flatten, X_train_scaled, epochs = EPOCHS, batch_size = BATCH_SIZE,
                  validation_split=0.2,
                  callbacks=callbacks)

In [None]:
#save_model('AE_notpruned_CMS', ae.autoencoder)
autoencoder = load_model('AE_notpruned_CMS')

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure()
plt.plot(history.history['loss'][:], label='Training loss')
plt.plot(history.history['val_loss'][:], label='Validation loss')
plt.title('Training and validation loss - MSE')
#plt.yscale('log', nonposy='clip')
plt.legend(loc='best')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

## Prediction - background

In [None]:
qcd_prediction = autoencoder.predict(X_test_flatten)

## Prediction - Beyond Standard Model samples

In [None]:
all_bsm_data[0].shape

In [None]:
bsm_labels = ['VectorZPrimeToQQ__M50',
                     'VectorZPrimeToQQ__M100',
                     'VectorZPrimeToQQ__M200',
                     'VBF_HToInvisible_M125',
                     'VBF_HToInvisible_M125_private',
                     'ZprimeToZH_MZprime1000',
                     'ZprimeToZH_MZprime800',
                     'ZprimeToZH_MZprime600',
                     'GluGluToHHTo4B',
                     'HTo2LongLivedTo4mu_1000',
                     'HTo2LongLivedTo4mu_125_12',
                     'HTo2LongLivedTo4mu_125_25',
                     'HTo2LongLivedTo4mu_125_50',
                     'VBFHToTauTau',
                     'VBF_HH'
                     ]

In [None]:
bsm_scaled_data=[]
for bsm in all_bsm_data:
    bsm = bsm.reshape(bsm.shape[0],19,3,1)
    bsm = np.squeeze(bsm, axis=-1)
    bsm_data_target = np.copy(bsm)
    bsm_data_target[:,:,0] = pt_scaler.transform(bsm_data_target[:,:,0])
    bsm_data_target[:,:,0] = np.multiply(bsm_data_target[:,:,0], np.not_equal(bsm[:,:,0],0))
    bsm_data_target.reshape(bsm_data_target.shape[0], bsm_data_target.shape[1]*bsm_data_target.shape[2])
    bsm_scaled_data.append(bsm_data_target)

In [None]:
len(bsm_scaled_data)

In [None]:
bsm_results = []

for i, label in enumerate(bsm_labels):
    bsm_prediction = autoencoder(all_bsm_data[i].reshape(all_bsm_data[i].shape[0], all_bsm_data[i].shape[1]*all_bsm_data[i].shape[2]))
    
    bsm_results.append([label, bsm_scaled_data[i], bsm_prediction])

In [None]:
output_result = '/eos/user/e/epuljak/AE_result_CMSdata_notpruned.h5'

In [None]:
h5f = h5py.File(output_result, 'w')
h5f.create_dataset('QCD_input', data=X_test_flatten)
h5f.create_dataset('QCD_target', data=X_test_scaled)
h5f.create_dataset('predicted_QCD', data = qcd_prediction)
for i, bsm in enumerate(bsm_results):
    h5f.create_dataset('%s_scaled' %bsm[0], data=bsm[1])
    h5f.create_dataset('%s_input' %bsm[0], data=all_bsm_data[i].reshape(all_bsm_data[i].shape[0], all_bsm_data[i].shape[1]*all_bsm_data[i].shape[2]))
    h5f.create_dataset('predicted_%s' %bsm[0], data=bsm[2])

h5f.close()

In [None]:
h5f.close()