# Demo Notebook: MLC $C$ Parametrizations

## Import and Set-Up

First, we must import all the relevant libraries.

In [3]:
## General imports ##
# Used for selecting GPU.
import os
import tensorflow as tf

# Used for distributions libraries.
from scipy import stats

## Utility imports ##
from utils.losses import *
from utils.plotting import *
from utils.training import *

# Just this seed isn't enough to ensure results are completely replicable,
# as there is randomness in GPU execution.
np.random.seed(666)

In [4]:
# This cell is to select the GPU.
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # pick a number < 4 on ML4HEP; < 3 on Voltan 
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

2022-07-07 08:40:44.584554: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2022-07-07 08:40:44.610706: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: Quadro RTX 6000 computeCapability: 7.5
coreClock: 1.77GHz coreCount: 72 deviceMemorySize: 23.65GiB deviceMemoryBandwidth: 625.94GiB/s
2022-07-07 08:40:44.610924: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2022-07-07 08:40:44.612238: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2022-07-07 08:40:44.613789: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2022-07-07 08:40:44.614011: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2

## Data Generation

We first set up the data distributions as well as various other aspects of the training.

[Add more about $C$ parametrizations here]

In [None]:
## Mess around with these inputs.

# The trained models will be saved in a folder "set_{num}"
num = 0

# The number of models to train for each parametrization.
reps = 100

# The background and signal distributions.
bkgd = stats.norm(-0.1, 1)
sgnl = stats.norm(+0.1, 1)

# The sample size (N samples are drawn from each distribution for training).
N = 10**6

In [None]:
# Model parameters
linear = {'loss':mlc, 'output':'relu'}
square = {'loss':square_mlc, 'output':'linear'}
exponl = {'loss':exp_mlc, 'output':'linear'}

filestr_linear = 'models/demo/set_' + str(num) + '/linear/model_{}.h5'
filestr_square = 'models/demo/set_' + str(num) + '/square/model_{}.h5'
filestr_exponl = 'models/demo/set_' + str(num) + '/exponl/model_{}.h5'

## Data Generation
data = make_data(bkgd, sgnl, N) + [N]

## Training

We train ```reps``` models of each parametrization of the MLC loss.

In [None]:
for _ in reps:
for i in range(reps):
    print(i, end = ' ')
    linear_model = train(data, **linear)
    linear_model.save_weights(filestr_linear.format(i))
    square_model = train(data, **square)
    square_model.save_weights(filestr_square.format(i))
    exponl_model = train(data, **linear)
    exponl_model.save_weights(filestr_exponl.format(i))

Once all the models are trained, we load them back in and create a likelihood ratio function from each one.

In [None]:
# Get model likelihood ratios.
lrs_linear = [None] * reps
lrs_square = [None] * reps
lrs_exponl = [None] * reps
for i in range(reps):
    model_linear = create_model(**linear)
    model_linear.load_weights(filestr_linear.format(i))
    model_square = create_model(**square)
    model_square.load_weights(filestr_square.format(i))
    model_exponl = create_model(**exponl)
    model_exponl.load_weights(filestr_exponl.format(i))
    
    lrs_linear[i] = pure_lr(model_linear)
    lrs_square[i] = square_lr(model_square)
    lrs_exponl[i] = exp_lr(model_exponl)

## Visualization

Once we have the model likelihood ratios, we can average together all ```reps``` of our predictions and predict them against the true likelihood ratio.

In [None]:
# Get average predictions and errors. Add on the labels for plotting.
ds = np.linspace(0, 20, 1000)
xs = (ds - np.mean(D)) / np.std(D)

lr_1 = avg_lr(get_preds(lrs_1, xs)) + ('MLC (linear)',)
lr_2 = avg_lr(get_preds(lrs_2, xs)) + ('MLC (square)',)
lr_3 = avg_lr(get_preds(lrs_3, xs)) + ('MLC (exponential)',)

lrr_1 = avg_lrr(get_preds(lrs_1, xs), ds) + ('MLC (linear)',)
lrr_2 = avg_lrr(get_preds(lrs_2, xs), ds) + ('MLC (square)',)
lrr_3 = avg_lrr(get_preds(lrs_3, xs), ds) + ('MLC (exponential)',)

In [None]:
lr_plot([lr_1, lr_2, lr_3], 
        r'MLC $C$ Parametrizations',
        filename='plots/demo/set_{}/lrs.png'.format(num))

In [None]:
lrr_plot([lrr_1, lrr_2, lrr_3], 
         r'MLC $C$ Parametrizations',
         filename='plots/demo/set_{}/lrrs.png'.format(num))