In [None]:
import numpy as np
import h5py as h5
from matplotlib import pyplot as plt
import tensorflow as tf
import os

In [None]:
from omnifold import DataLoader, MultiFold, MLP, SetStyle, HistRoutine

In [None]:
data_path = '/global/cfs/cdirs/ntrain1/unfolding'

In [None]:
nevts = 100_000
gen_data = h5.File(os.path.join(data_path,'train_herwig.h5'))['gen_subs'][:nevts]
reco_data = h5.File(os.path.join(data_path,'train_herwig.h5'))['reco_subs'][:nevts]
gen_mc = h5.File(os.path.join(data_path,'train_pythia.h5'))['gen_subs'][:nevts]
reco_mc = h5.File(os.path.join(data_path,'train_pythia.h5'))['reco_subs'][:nevts]

data = DataLoader(reco = reco_data,normalize=True)
mc = DataLoader(reco = reco_mc,gen = gen_mc,normalize=True)

## Let's take a quick look at the data

In [None]:
SetStyle()

In [None]:
var_names = ["Jet Mass [GeV]","Jet Width", "$n_{constituents}$",r"$ln\rho$","$z_g$",r"$\tau_{21}$"]
nbins = 50
binning = [
        np.linspace(0,75,nbins),
        np.linspace(0,0.6,nbins),
        np.linspace(0,80,80),
        np.linspace(-14,-2,nbins),
        np.linspace(0.0,0.5,nbins),
        np.linspace(0.0,1.2,nbins),
    ]

In [None]:
for iv, var in enumerate(var_names):
    data_dict = {
    'gen_data': gen_data[:,iv],
    'reco_data': reco_data[:,iv],
    'gen_mc': gen_mc[:,iv],
    'reco_mc': reco_mc[:,iv],
    }
    HistRoutine(data_dict,var, reference_name = 'gen_data',plot_ratio=False,binning=binning[iv])

## Let's now create the neural network models we are going to use to model the weight distributions

In [None]:
ndim = len(var_names)
model1 = MLP(ndim)
model2 = MLP(ndim)

In [None]:
print(model1.summary())

## Let's Unfold!

In [None]:
omnifold = MultiFold(
    "ZJets",
    model1,
    model2,
    data,
    mc,
    batch_size = 1024,
    niter = 1,  #Number of Iterations                                                                                                                                                                                                  
    epochs=10,      
    weights_folder = 'weights',
    verbose=True,
)

In [None]:
omnifold.Preprocessing()
omnifold.Unfold()

## Now, let's evaluate the model!

In [None]:
validation_data = h5.File(os.path.join(data_path,'test_herwig.h5'))['gen_subs'][:]
unfolded_weights  = omnifold.reweight(validation_data,omnifold.model2,batch_size=1000)   

## More plots!

In [None]:
for iv, var in enumerate(var_names):
    data_dict = {
    'gen_data': gen_data[:,iv],
    'reco_data': reco_data[:,iv],
    'gen_mc': gen_mc[:,iv],
    'reco_mc': reco_mc[:,iv],
    'unfolded': gen_mc[:,iv],
    }
    weight_dict = {
    'gen_data': data.weight,
    'reco_data': data.weight,
    'gen_mc': mc.weight,
    'reco_mc': mc.weight,
    'unfolded': unfolded_weights,
    }
    HistRoutine(data_dict,var, reference_name = 'gen_data',binning=binning[iv])

# Exercises

## The unfolded distribution looks good, but not quite there yet. Try changing: 
* The number of OmniFold iterations
* The number of training epochs

## We use $ln\rho$ as one of the inputs, how does the plot for $\rho$ alone looks like?

## Let's create a new observable: Jet mass / Jet width. How does that distribution looks like for the unfolded events?