In [5]:
import os,pickle
import numpy as np
import tensorflow as tf
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from process_data import preprocess_conditional_flow_data_cww
from Model.ConditionalRealNVP import ConditionalRealNVP
from Model.Discriminator import Discriminator
from Utils.ObjDict import ObjDict
from Utils.mkdir_p import mkdir_p

In [2]:
# __________________________________________________________________ ||
# Basic configurables
# __________________________________________________________________ ||

input_csv_path = "data/train_cww.npy"
saved_sig_model_path = "output/train_mixnlp_cww_210120_v1/saved_model_sig_1700.h5"
saved_bkg_model_path = "output/train_mixnlp_cww_210120_v1/saved_model_bkg_1700.h5"
saved_disc_model_path = "output/train_mixnlp_cww_210120_v1/saved_model_disc_1700.h5"
output_dir = os.path.dirname(saved_sig_model_path)
event_size = 4000
ndim = 3
ncond = 1

In [9]:
# __________________________________________________________________ ||
# Load models
# __________________________________________________________________ ||

nf_model = ObjDict(
    sig = ConditionalRealNVP(num_coupling_layers=5,dim=ndim,ncond=ncond),
    bkg = ConditionalRealNVP(num_coupling_layers=5,dim=ndim,ncond=ncond),
    disc = Discriminator([32,32,32,]),
)
samples = nf_model.sig.distribution.sample(event_size)
condition = 1.0 * np.ones((event_size,1))

_,_ = nf_model.sig.predict([samples,condition,])
nf_model.sig.load_weights(saved_sig_model_path)

_,_ = nf_model.bkg.predict([samples,condition,])
nf_model.bkg.load_weights(saved_bkg_model_path)

_ = nf_model.disc.predict(samples)
nf_model.disc.load_weights(saved_disc_model_path)


In [10]:
arr = np.load(input_csv_path)
sigs,bkg = preprocess_conditional_flow_data_cww(arr)

In [15]:
# __________________________________________________________________ ||
# Make plots for different conditions
# __________________________________________________________________ ||

n_dim = 5
param_grid = [sigs[idx_param] for idx_param in np.random.randint(0,len(sigs),n_dim*n_dim)]
param_grid.sort(key=lambda x: x.condition[0])
figsize = (50,50)

samples = nf_model.sig.distribution.sample(event_size)
fig_m4l,ax_m4l = plt.subplots(n_dim,n_dim,figsize=figsize)
fig_mz1,ax_mz1 = plt.subplots(n_dim,n_dim,figsize=figsize)
fig_mz2,ax_mz2 = plt.subplots(n_dim,n_dim,figsize=figsize)

for i,m in enumerate(param_grid):
    
    ix = int(i / n_dim)
    iy = i % n_dim
    
    condition_str = str(m.condition[0])
    condition = np.ones((event_size,1)) * m.condition[0]

    idx_batch = np.random.randint(0,m.x.shape[0],event_size)
    
    x_sig_true = m.x[idx_batch]
    x_sig_gen,_ = nf_model.sig.predict([samples,condition,])
    
    idx_batch = np.random.randint(0,bkg.x.shape[0],event_size)
    x_bkg_true = bkg.x[idx_batch]
    condition = np.ones((event_size,1)) * m.condition[0]
    x_bkg_gen,_ = nf_model.bkg.predict([samples,condition,])

    ax_m4l[ix,iy].hist(x_sig_true[:,0],bins=100,density=1.,histtype='step',range=[-10.,10.],label='True sig '+condition_str)
    ax_m4l[ix,iy].hist(x_sig_gen[:,0],bins=100,density=1.,histtype='step',range=[-10.,10.],label='Flow sig '+condition_str)
    ax_m4l[ix,iy].hist(x_bkg_true[:,0],bins=100,density=1.,histtype='step',range=[-10.,10.],label='True bkg'+condition_str)
    ax_m4l[ix,iy].hist(x_bkg_gen[:,0],bins=100,density=1.,histtype='step',range=[-10.,10.],label='Flow bkg '+condition_str)
    ax_m4l[ix,iy].legend(loc='best')
    ax_m4l[ix,iy].set_title(condition_str)
    
    ax_mz1[ix,iy].hist(x_sig_true[:,1],bins=100,density=1.,histtype='step',range=[-10.,10.],label='True sig '+condition_str)
    ax_mz1[ix,iy].hist(x_sig_gen[:,1],bins=100,density=1.,histtype='step',range=[-10.,10.],label='Flow sig '+condition_str)
    ax_mz1[ix,iy].hist(x_bkg_true[:,1],bins=100,density=1.,histtype='step',range=[-10.,10.],label='True bkg '+condition_str)
    ax_mz1[ix,iy].hist(x_bkg_gen[:,1],bins=100,density=1.,histtype='step',range=[-10.,10.],label='Flow bkg '+condition_str)
    ax_mz1[ix,iy].legend(loc='best')
    ax_mz1[ix,iy].set_title(condition_str)
    
    ax_mz2[ix,iy].hist(x_sig_true[:,2],bins=100,density=1.,histtype='step',range=[-10.,10.],label='True sig'+condition_str)
    ax_mz2[ix,iy].hist(x_sig_gen[:,2],bins=100,density=1.,histtype='step',range=[-10.,10.],label='Flow sig '+condition_str)
    ax_mz2[ix,iy].hist(x_bkg_true[:,2],bins=100,density=1.,histtype='step',range=[-10.,10.],label='True bkg '+condition_str)
    ax_mz2[ix,iy].hist(x_bkg_gen[:,2],bins=100,density=1.,histtype='step',range=[-10.,10.],label='Flow bkg '+condition_str)
    ax_mz2[ix,iy].legend(loc='best')
    ax_mz2[ix,iy].set_title(condition_str)
    
fig_m4l.savefig(os.path.join(output_dir,'m4l.png'))
fig_mz1.savefig(os.path.join(output_dir,'mZ1.png'))
fig_mz2.savefig(os.path.join(output_dir,'mZ2.png'))

In [None]:
# __________________________________________________________________ ||
# Make plots for likelihood
# __________________________________________________________________ ||

import time

n_dim = 3
sig_bkg_ratio = 0.1
param_grid = [sigs[idx_param] for idx_param in np.random.randint(0,len(sigs),n_dim*n_dim)]
param_grid.sort(key=lambda x: x.condition[0])

plot_low = 0.0
plot_high = 0.2
n_grid = 10
x_grid = [plot_low+(plot_high-plot_low)/n_grid*i for i in range(n_grid+1)]

bkg_event_size = 5000
sig_event_size = int(sig_bkg_ratio * bkg_event_size)

z = np.zeros(n_grid+1)
fig, ax = plt.subplots(n_dim,n_dim,figsize=figsize)

for i,p in enumerate(param_grid):
    
    print("-"*100)
    print("Drawing plot ",i," with param ",p.condition[0], "signal background ratio ",sig_bkg_ratio)
    
    ix = int(i / n_dim)
    iy = i % n_dim
    
    start_time = time.time()
    
    condition_str = str(p)

    nf_model.sig.direction = -1
    nf_model.bkg.direction = -1
    
    idx_sig_batch = np.random.randint(0,p.x.shape[0],sig_event_size)
    idx_bkg_batch = np.random.randint(0,bkg.shape[0],bkg_event_size)
    
    condition_concat = np.concatenate([np.ones((event_size,1)) * x for ix,x in enumerate(x_grid)])
    x_data_concat = np.concatenate([p.x[idx_batch] for ix,x in enumerate(x_grid)])
    
    z_concat = nf_model.batch_log_loss([x_data_concat,condition_concat])
    
    for ig,x in enumerate(x_grid):
        z[ig] = tf.reduce_mean(z_concat[ig*idx_batch.shape[0]:(ig+1)*idx_batch.shape[0]])

    ax[ix,iy].plot(x_grid,z,)
    ylims = ax[ix,iy].get_ylim()
    ax[ix,iy].arrow(p.condition[0], ylims[1], 0., ylims[0]-ylims[1],)
    
    elapsed_time = time.time() - start_time
    print("Time used: "+str(elapsed_time)+"s")
    
fig.savefig(os.path.join(output_dir,'log_loss.png'))