import library

In [None]:
from functools import partial

import numpy as np
import pandas as pd
import os
import random
import time
import tensorflow as tf, re, math
from tensorflow.keras import applications
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import optimizers
from tensorflow.keras import metrics
from tensorflow.keras import Model, Sequential
from tensorflow.keras import backend as K 
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import gc
import uproot

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print("Running on TPU ", tpu.cluster_spec().as_dict()["worker"])
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError:
    print("Not connected to a TPU runtime. Using CPU/GPU strategy")
    strategy = tf.distribute.MirroredStrategy()
    
!nvidia-smi

In [None]:
batch_size = 64

loss function

In [None]:
def nll(y_true, y_pred):
    """ Negative log likelihood (Bernoulli). """

    # keras.losses.binary_crossentropy gives the mean
    # over the last axis. we require the sum
    return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1)

class KLDivergenceLayer(layers.Layer):

    """ Identity transform layer that adds KL divergence
    to the final model loss.
    """

    def __init__(self, *args, **kwargs):
        self.is_placeholder = True
        super(KLDivergenceLayer, self).__init__(*args, **kwargs)

    def call(self, inputs):

        mu, log_var = inputs

        kl_batch = - .5 * K.sum(1 + log_var -
                                K.square(mu) -
                                K.exp(log_var), axis=-1)

        self.add_loss(K.mean(kl_batch), inputs=inputs)

        return inputs

build model

In [None]:
def get_model(original_dim, intermediate_dim, latent_dim):
    # Encoder

    x = layers.Input(shape=(original_dim,))
    h = layers.Dense(intermediate_dim, activation='relu')(x)

    z_mu = layers.Dense(latent_dim)(h)
    z_log_var = layers.Dense(latent_dim)(h)

    z_mu, z_log_var = KLDivergenceLayer()([z_mu, z_log_var])

    # Reparametrization trick
    z_sigma = layers.Lambda(lambda t: K.exp(.5*t))(z_log_var)

    eps = layers.Input(tensor=K.random_normal(shape=(K.shape(x)[0], 
                                              latent_dim)))
    z_eps = layers.Multiply()([z_sigma, eps])
    z = layers.Add()([z_mu, z_eps])

    # This defines the Encoder which takes noise and input and outputs
    # the latent variable z
    encoder = Model(inputs=[x, eps], outputs=z)

    # Decoder is MLP specified as single Keras Sequential Layer
    decoder = Sequential([
        layers.Dense(intermediate_dim, input_dim=latent_dim, activation='relu'),
        layers.Dense(original_dim, activation='sigmoid')
    ])

    x_pred = decoder(z)

    vae = Model(inputs=[x, eps], outputs=x_pred, name='vae')
    vae.compile(optimizer='rmsprop', loss=nll)
    return vae

load data

In [None]:
def get_df(root_file_name, filter_name):
    events = uproot.open(root_file_name, filter_name=filter_name)["tree"]
    df = events.arrays(library="pd")
    return df

inputfile = 'test.root'
df = get_df(inputfile, '*')
df.dropna(inplace=True)

features = ['Jet_pt']
features = sorted(features)

X = df[features]
x_train, x_test = train_test_split(X, test_size=0.20)

In [None]:
def build_dset(df): 
    df = df.copy()
    dataset = tf.data.Dataset.from_tensor_slices((dict(df), dict(df)))
    dataset = dataset.batch(batch_size, drop_remainder=False)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    return dataset
    
x_train_dataset = build_dset(x_train)
x_test_dataset = build_dset(x_test)

get model

In [None]:
original_dim = len(features)
intermediate_dim = 16
latent_dim = 8

with strategy.scope():
    model = get_model(original_dim, intermediate_dim, latent_dim)
    model.summary()

train model

In [None]:
def get_lr_callback():
    lr_start   = 0.000001
    lr_max     = 0.01
    lr_min     = 0.000001
    lr_ramp_ep = 5
    lr_sus_ep  = 10
    lr_decay   = 0.8
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start   
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max    
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min    
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = True)
    return lr_callback

checkpoint_path = "weights.{epoch:05d}.hdf5"
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 monitor = 'val_loss',
                                                 save_weights_only=True,
                                                 save_best_only=False,
                                                 mode = 'min',
                                                 verbose=1)

num_epochs = 20

history = model.fit(
    x_train_dataset,
    shuffle=True,
    epochs=num_epochs,
    batch_size=batch_size,
    validation_data=x_test_dataset,
    callbacks=[cp_callback, get_lr_callback()]
)