# Gaussian Example

In this example, we consider a one-dimensional Gaussian distribution at the particle level and two Gaussian distributions at the detector level. The data are generated as follows:
\begin{aligned}
    Y_{1} &= X + Z_{1}, \\
    Y_{2} &= X + Z_{2},
\end{aligned}
where $X\sim\mathcal{N}(\mu,\sigma^2), Z_{1}\sim\mathcal{N}(0,1), Z_{2}\sim\mathcal{N}(0,\theta^2)$. Here, $\theta$ is the nuisance parameter, which only affects the second coordinate of the detector-level data. 

In [None]:
# Standard libraries
import numpy as np
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import seaborn as sns
import glob
from natsort import natsorted

# Pytorch
import torch
from torch.utils.data import Dataset
from torch.utils.data import random_split, DataLoader
from torch import nn, optim

# POF functions
import utils
import profile_omnifold as pof

dvc = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {dvc} device")

## Generate Gaussian Data

In [2]:
# specify the nuisance parameter
theta = 1.5

In [3]:
# smearing kernel for the MC data
mu_kernel_mc = 0
sigma1_kernel_mc = 1
sigma2_kernel_mc = 1

# smearing kernel for the experimental data
mu_kernel_data = 0
sigma1_kernel_data = 1
sigma2_kernel_data = theta

def k_mc(y,x):
    return norm.pdf(y[:,0]-x[:,0],loc=mu_kernel_mc,scale=sigma1_kernel_mc)*norm.pdf(y[:,1]-x[:,0],loc=mu_kernel_mc,scale=sigma2_kernel_mc)

def k_data(y,x):
    return norm.pdf(y[:,0]-x[:,0],loc=mu_kernel_data,scale=sigma1_kernel_data)*norm.pdf(y[:,1]-x[:,0],loc=mu_kernel_data,scale=sigma2_kernel_data)

# reweighting function w parametrized by theta
# w(x,y,theta) = p(y|x,theta)/p(y|x,mc)
def w_func(x,y,theta):
    return norm.pdf(y[:,0]-x[:,0],loc=mu_kernel_data,scale=sigma1_kernel_data)*norm.pdf(y[:,1]-x[:,0],loc=mu_kernel_data,scale=theta)/k_mc(y,x)

# derivative of w with respect to theta
def w_func_derivative(x,y,theta):
    return ((y[:,1]-x[:,0])**2/(theta**3)-1/theta)*w_func(x,y,theta)

# true reweighting function on the MC response kernel (i.e. k_data/k_mc)
def w_true(x,y):
    return k_data(y,x)/k_mc(y,x)

### Experimental Data (Nature)

In [4]:
# specify the parameters for the particle-level distribution
mu_data = 0.8
sigma_data = 1

def px(x):
    return norm.pdf(x,loc=mu_data,scale=sigma_data)
def py1(y):
    return norm.pdf(y,loc=mu_data+mu_kernel_data,scale=np.sqrt(sigma_data**2+sigma1_kernel_data**2))
def py2(y):
    return norm.pdf(y,loc=mu_data+mu_kernel_data,scale=np.sqrt(sigma_data**2+sigma2_kernel_data**2))


# sample from the experimental distribution
Ndata = 100000
x_data = np.random.normal(loc=mu_data,scale=sigma_data,size=Ndata).reshape(-1,1)
y_data1 = np.random.normal(loc=x_data[:,0]+mu_kernel_data,scale=sigma1_kernel_data,size=Ndata).reshape(-1,1)
y_data2 = np.random.normal(loc=x_data[:,0]+mu_kernel_data,scale=sigma2_kernel_data,size=Ndata).reshape(-1,1)
y_data = np.hstack([y_data1, y_data2])

### Monte Carlo Data (Simulation)

In [5]:
# specify the parameters for the particle-level distribution
mu_mc = 0
sigma_mc = 1


def qx(x):
    return norm.pdf(x,loc=mu_mc,scale=sigma_mc)
def qy1(y):
    return norm.pdf(y,loc=mu_mc+mu_kernel_mc,scale=np.sqrt(sigma_mc**2+sigma1_kernel_mc**2))
def qy2(y):
    return norm.pdf(y,loc=mu_mc+mu_kernel_mc,scale=np.sqrt(sigma_mc**2+sigma2_kernel_mc**2))


# sample from the MC distribution
Nsim = 100000
x_mc = np.random.normal(loc=mu_mc,scale=sigma_mc,size=Nsim).reshape(-1,1)
y_mc1 = np.random.normal(loc=x_mc[:,0]+mu_kernel_mc,scale=sigma1_kernel_mc,size=Nsim).reshape(-1,1)
y_mc2 = np.random.normal(loc=x_mc[:,0]+mu_kernel_mc,scale=sigma2_kernel_mc,size=Nsim).reshape(-1,1)
y_mc = np.hstack([y_mc1,y_mc2])

### Plot both experimental and MC distributions

In [None]:
fig, ax = plt.subplots(1, 3, sharey=True, sharex=True, figsize=(15, 6))

sns.kdeplot(
    x=x_data[:, 0], ax=ax[0],
    color="black", linestyle="-", linewidth=2, bw_adjust=2,
    label=f"Experiment"
)

sns.kdeplot(
    x=x_mc[:, 0], ax=ax[0],
    color="tab:blue", linestyle=":", linewidth=2, bw_adjust=2, 
    label=f"MC Simulation"
)

sns.kdeplot(
    x=y_data[:, 0], ax=ax[1],
    color="black", linestyle="-", linewidth=2, bw_adjust=2,
    label=f"Experiment"
)

sns.kdeplot(
    x=y_mc[:, 0], ax=ax[1],
    color="tab:blue", linestyle=":", linewidth=2, bw_adjust=2, 
    label=f"MC Simulation"
)

sns.kdeplot(
    x=y_data[:, 1], ax=ax[2],
    color="black", linestyle="-", linewidth=2, bw_adjust=2,
    label=f"Experiment"
)

sns.kdeplot(
    x=y_mc[:, 1], ax=ax[2],
    color="tab:blue", linestyle=":", linewidth=2, bw_adjust=2, 
    label=f"MC Simulation"
)


ax[2].legend(loc="best", fontsize=15)
ax[0].set_xlim(-6, 6)
ax[0].set_xlabel(r"$X$", fontsize=20)
ax[1].set_xlabel(r"$Y_1$", fontsize=20)
ax[2].set_xlabel(r"$Y_2$", fontsize=20)
ax[0].set_ylabel("Probability Density", fontsize=20)
ax[0].tick_params(axis="both", labelsize=14)
fig.tight_layout()

## Train W model

We train a neural network (NN) model to learn the W function, which represents the ratio of the response kernel parametrized by $\theta$ to the Monte Carlo (MC) kernel, i.e. $w(y,x,\theta)=p(y|x,\theta)/q(y|x)$.

In this example, we know that the response kernel follows a Gaussian distribution, meaning the analytic form of the W function is also known. As a result, training the W model is not necessary to run the ProfileOmniFold algorithm. Instead, we can use the true W function `w_func` as a direct alternative in the next section if preferred.

In [11]:
# hyperparameters for training the neural network
config = {
    'batch_size': 10000,
    'lr': 0.001,
    'patience': 10,
    'activation': nn.ReLU()
}

### Systematic Data (varying theta, used for training W function)

In [7]:
Nsys = 100000
# simulation with theta variation
theta_min = 0.5
theta_max = 2.0

theta0_sim = np.random.uniform(theta_min, theta_max, Nsys).reshape(-1, 1)
theta1_sim = np.random.uniform(theta_min, theta_max, Nsys).reshape(-1, 1)

thetas = theta1_sim[:,0]

x_sys = np.random.normal(loc=mu_mc,scale=sigma_mc,size=Nsys).reshape(-1,1)

y_nominal1 = np.random.normal(loc=x_sys[:,0],scale=sigma1_kernel_mc,size=Nsys).reshape(-1,1)
y_nominal2 = np.random.normal(loc=x_sys[:,0],scale=sigma2_kernel_mc,size=Nsys).reshape(-1,1)
y_nominal = np.hstack([y_nominal1,y_nominal2])

y_sys1 = np.random.normal(loc=x_sys[:,0],scale=sigma1_kernel_mc,size=Nsys).reshape(-1,1)
y_sys2 = []
for i in range(Nsys):
    y_sys2.append(np.random.normal(x_sys[i,0], theta1_sim[i,0]))
y_sys2 = np.array(y_sys2).reshape(-1,1)
y_sys = np.hstack([y_sys1,y_sys2])

### Train a single W function

In [9]:
# Convert data into W dataset, data loaders and create loss function
w_ds = pof.w_dataset(x_mc, y_mc, theta0_sim, x_sys, y_sys, theta1_sim)

# split samples to 50% train and 50% test sets
w_ds_train, w_ds_test = random_split(w_ds, [len(w_ds)//2, len(w_ds)-len(w_ds)//2])
w_dataloader_train = DataLoader(w_ds_train, batch_size=100000, shuffle=True)
w_dataloader_test = DataLoader(w_ds_test, batch_size=100000, shuffle=False)

In [None]:
# Train W model

wRT_model_network = pof.wRT_network(sigmoid=True, n_inputs=4, activation=config['activation']).double().to(dvc)
optimizerRT = optim.Adam(wRT_model_network.parameters(), lr=config['lr'])
loss_fn_RT = nn.BCELoss()
wRT_tr = pof.w_trainer(w_dataloader_train, w_dataloader_test, wRT_model_network, loss_fn_RT, optimizerRT, patience=config['patience'])

wT_model_network = pof.wT_network(sigmoid=True, n_inputs=2, activation=config['activation']).double().to(dvc)
optimizerT = optim.Adam(wT_model_network.parameters(), lr=config['lr'])
loss_fn_T = nn.BCELoss()
wT_tr = pof.w_trainer(w_dataloader_train, w_dataloader_test, wT_model_network, loss_fn_T, optimizerT)

wRT_tr.fit()
wT_tr.fit()

In [None]:
# optionally, save the models for later access
wRT_checkpoint = {
    "model_state_dict": wRT_model_network.state_dict(),
    "config": config,
    "num_sys": Nsys,
    "num_mc": Nsim,
    "theta_min": theta_min,
    "theta_max": theta_max
}
wT_checkpoint = {
    "model_state_dict": wT_model_network.state_dict(),
    "config": config,
    "num_sys": Nsys,
    "num_mc": Nsim,
    "theta_min": theta_min,
    "theta_max": theta_max
}

torch.save(wRT_checkpoint, "models/2DGaussian/wRT_network_2dgaussian.pth")
torch.save(wT_checkpoint, "models/2DGaussian/wT_network_2dgaussian.pth")

### Train an ensemble of W functions

Training an ensemble of W functions can help to reduce the model uncertainty in the ProfileOmniFold algorithm. In this section, we train multiple W models and save them for later use.

In [None]:
num_ensemble = 10
# Theta range for systematic data
theta_min = 0.5
theta_max = 2.0
Nsys = 100000
Nsim = 100000


for i in range(1,num_ensemble+1):
    print("model ", i, "training...")
    x_mc = np.random.normal(loc=mu_mc,scale=sigma_mc,size=Nsim).reshape(-1,1)
    y_mc1 = np.random.normal(loc=x_mc[:,0]+mu_kernel_mc,scale=sigma1_kernel_mc,size=Nsim).reshape(-1,1)
    y_mc2 = np.random.normal(loc=x_mc[:,0]+mu_kernel_mc,scale=sigma2_kernel_mc,size=Nsim).reshape(-1,1)
    y_mc = np.hstack([y_mc1,y_mc2])
    
    theta0_sim = np.random.uniform(theta_min, theta_max, Nsys).reshape(-1, 1)
    theta1_sim = np.random.uniform(theta_min, theta_max, Nsys).reshape(-1, 1)
    
    thetas = theta1_sim[:,0]
    
    x_sys = np.random.normal(loc=mu_mc,scale=sigma_mc,size=Nsys).reshape(-1,1)
    
    y_nominal1 = np.random.normal(loc=x_sys[:,0],scale=sigma1_kernel_mc,size=Nsys).reshape(-1,1)
    y_nominal2 = np.random.normal(loc=x_sys[:,0],scale=sigma2_kernel_mc,size=Nsys).reshape(-1,1)
    y_nominal = np.hstack([y_nominal1,y_nominal2])
    
    y_sys1 = np.random.normal(loc=x_sys[:,0],scale=sigma1_kernel_mc,size=Nsys).reshape(-1,1)
    y_sys2 = []
    for j in range(Nsys):
        y_sys2.append(np.random.normal(x_sys[j,0], theta1_sim[j,0]))
    y_sys2 = np.array(y_sys2).reshape(-1,1)
    y_sys = np.hstack([y_sys1,y_sys2])
    
    # Convert data into W dataset, data loaders and create loss function
    w_ds = pof.w_dataset(x_mc, y_mc, theta0_sim, x_sys, y_sys, theta1_sim)
    
    # split samples to 50% train and 50% test sets
    w_ds_train, w_ds_test = random_split(w_ds, [len(w_ds)//2, len(w_ds)-len(w_ds)//2])
    w_dataloader_train = DataLoader(w_ds_train, batch_size=100000, shuffle=True)
    w_dataloader_test = DataLoader(w_ds_test, batch_size=100000, shuffle=False)
    
    # Train W model
    
    wRT_model_network = pof.wRT_network(sigmoid=True, n_inputs=4, activation=config['activation']).double().to(dvc)
    optimizerRT = optim.Adam(wRT_model_network.parameters(), lr=config['lr'])
    loss_fn_RT = nn.BCELoss()
    wRT_tr = pof.w_trainer(w_dataloader_train, w_dataloader_test, wRT_model_network, loss_fn_RT, optimizerRT, patience=config['patience'])
    
    wT_model_network = pof.wT_network(sigmoid=True, n_inputs=2, activation=config['activation']).double().to(dvc)
    optimizerT = optim.Adam(wT_model_network.parameters(), lr=config['lr'])
    loss_fn_T = nn.BCELoss()
    wT_tr = pof.w_trainer(w_dataloader_train, w_dataloader_test, wT_model_network, loss_fn_T, optimizerT)
    
    wRT_tr.fit()
    wT_tr.fit()

    wRT_checkpoint = {
        "model_state_dict": wRT_model_network.state_dict(),
        "config": config,
        "num_sys": x_sys.shape[0],
        "num_mc": x_mc.shape[0],
        "theta_min": theta_min,
        "theta_max": theta_max
    }
    wT_checkpoint = {
        "model_state_dict": wT_model_network.state_dict(),
        "config": config,
        "num_sys": x_sys.shape[0],
        "num_mc": x_mc.shape[0],
        "theta_min": theta_min,
        "theta_max": theta_max
    }
        
    torch.save(wRT_checkpoint, f"models/2DGaussian/Ensemble/wRT_network_2dgaussian({i}).pth")
    torch.save(wT_checkpoint, f"models/2DGaussian/Ensemble/wT_network_2dgaussian({i}).pth")

## Load W model

In [None]:
# load the models if saved previously
wRT_model_network = pof.wRT_network(sigmoid=True, n_inputs=4).double().to(dvc)
wT_model_network = pof.wT_network(sigmoid=True, n_inputs=2).double().to(dvc)

wRT_model_network.load_state_dict(torch.load("models/2DGaussian/Ensemble/wRT_network_2dgaussian(1).pth")["model_state_dict"])
wT_model_network.load_state_dict(torch.load("models/2DGaussian/Ensemble/wT_network_2dgaussian(1).pth")["model_state_dict"])

<All keys matched successfully>

In [None]:
# load the ensemble
# natsorted is not needed. It was just to list the files in order
wRT_list = natsorted(glob.glob("models/2DGaussian/Ensemble/wRT_network_2dgaussian(*).pth"))
wT_list = natsorted(glob.glob("models/2DGaussian/Ensemble/wT_network_2dgaussian(*).pth"))

wRT_ensemble = []
wT_ensemble = []
for i in range(len(wRT_list)):
    wRT_ensemble.append(pof.wRT_network(sigmoid=True, n_inputs=4).double().to(dvc))
    wT_ensemble.append(pof.wT_network(sigmoid=True, n_inputs=2).double().to(dvc))
    wRT_ensemble[i].load_state_dict(torch.load(wRT_list[i])["model_state_dict"])
    wT_ensemble[i].load_state_dict(torch.load(wT_list[i])["model_state_dict"])


In [None]:
# wrap the w function on the MC dataset (so that it becomes only a function of theta)
ds = pof.test_dataset(x_mc, y_mc)
ds_dataloader = DataLoader(ds, batch_size=100000, shuffle=False)

# compute w_theta using single model
#w_theta_nn = pof.make_w_theta(ds_dataloader, wRT_model_network, wT_model_network)
# compute w_theta using ensemble
w_theta_nn_ensemble = pof.make_w_theta_ensemble(ds_dataloader, wRT_ensemble, wT_ensemble, func='median')

## Profile OmniFold Algorithm

Now, let's run the algorithm! There are two options:  
 
- Use the fitted w function, `w_theta_nn` (`w_theta_nn_ensemble`), obtained above.  
- Use the true w function, `w_func`, since we know the form of the smearing kernel.  

In [None]:
# use the NN solution
#w_theta = w_theta_nn
w_theta = w_theta_nn_ensemble

In [None]:
# use the true function
def w_theta(theta):
    return w_func(x_mc, y_mc, theta)

In [None]:
# Run POF with a single initial theta value
theta0 = 1.0
pof_out = pof.profile_omnifold(y_data, x_mc, y_mc, iterations=10, w_theta=w_theta, theta_bar=1.0, theta0=theta0, theta_range=[1.0,2.0], num_grid_points=30,
                                               no_penalty=True, epochs=20, patience=3, verbose=0)
nu_pof = pof_out['weights']

In [None]:
# Generate Ensemble of POF solutions with different initial theta0
theta0_list = [i for i in np.arange(1.0, 2.0, 0.1) for _ in range(1)]
print('theta0_list:', np.round(theta0_list,1))
pof_list = []
for theta0 in theta0_list:
    res = pof.profile_omnifold(y_data, x_mc, y_mc, iterations=10, w_theta=w_theta, theta_bar=1.0, theta0=theta0, theta_range=[0.5, 1.5], 
                                                       num_grid_points=50, no_penalty=True, epochs=20, 
                                                       return_Q=True, return_acc=True, return_loss=True, verbose=0)
    pof_list.append(res)

In [17]:
# updated theta in each iteration
temp = np.arange(1,11,dtype=int)
columns = ['iteration'] + ['theta0: '] * len(pof_list)
for i in range(0, len(pof_list)):
    temp = np.vstack((temp, pof_list[i]['weights'][:,3,0]))
    columns[i+1] += str(np.round(pof_list[i]['theta0'], 1))
df_theta = pd.DataFrame(temp.T, columns=columns)


# validation accuracy of step 1 neural network
temp = np.arange(1,11,dtype=int)
columns = ['iteration'] + ['theta0: '] * len(pof_list)
for i in range(0, len(pof_list)):
    temp = np.vstack((temp, pof_list[i]['step1_val_acc'].ffill(axis=1).iloc[:,-1]))
    columns[i+1] += str(np.round(pof_list[i]['theta0'], 1))
df_acc = pd.DataFrame(temp.T, columns=columns)

In [None]:
# theta and accuracy evolution

# Normalize values to colormap range
norm = mcolors.Normalize(vmin=min(theta0_list), vmax=max(theta0_list))
cmap = cm.viridis_r

# Plot each theta curve
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(10, 8), sharex=True)
for i in range(1,len(df_theta.columns)):
    ax1.plot([0]+df_theta['iteration'].tolist(), [pof_list[i-1]['theta0']] + df_theta.iloc[:,i].tolist(), 
             color=cmap(norm(theta0_list[i-1])))


ax1.axhline(y=theta, color='red', linestyle='--', linewidth=1.2, label=rf'Truth: $\theta={theta}$')
ax1.legend(loc='best', fontsize=14)
ax1.set_ylabel(r"$\hat{\theta}$", fontsize=16)
ax1.set_title(r"$\hat{\theta}$ update in EM iteration", fontsize=16)
ax1.tick_params(axis='both', labelsize=14) 
ax1.grid(True)

# Plot each goodness_of_fit curve
for i in range(1,len(df_acc.columns)):
    ax2.plot(df_theta['iteration'], 1 - np.abs(df_acc.iloc[:,i]-0.5) * 2, color=cmap(norm(theta0_list[i-1])))


ax2.set_xlabel("Iteration", fontsize=16)
ax2.set_ylabel("Goodness of fit", fontsize=16)
ax2.set_title("Goodness of fit in EM iteration", fontsize=16)
ax2.tick_params(axis='both', labelsize=14) 
ax2.grid(True)


sm = cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])  # dummy for colorbar
cbar = fig.colorbar(sm, ax=[ax1, ax2], orientation='vertical', fraction=0.025, pad=0.02)
cbar.set_label(r'$\theta_0$', size=16)

In [None]:
# indicate which iteration to select the best weights from
itr = -1

# find the best weights (fit) by the classifier accurarcy (closer to 0.5 indicates a better fit)
nu_pof, best_run = pof.best_weights(pof_list, itr)
pof_out = pof_list[best_run]

In [None]:
print("chosen run:", best_run)
print("theta0:", pof_out['theta0'])
print("fitted theta:", nu_pof[itr,3,0])
print("true theta:", theta)

In [None]:
# As a comparison, we can also run the vanilla omnifold algorithm
of_out = pof.omnifold(y_data, x_mc, y_mc, iterations=10, verbose=0, epochs=20)
nu_of = of_out['weights']

## Plot the results

In [None]:
# indicate which iteration to plot
itr = -1

fig, ax = plt.subplots(1, 1, sharey=True, sharex=True, figsize=(10, 7))

sns.kdeplot(
    x=x_data[:, 0], ax=ax,
    color="black", linestyle="-", linewidth=2, bw_adjust=2,
    label=f"Experiment"
)

sns.kdeplot(
    x=x_mc[:, 0], ax=ax,
    color="tab:blue", linestyle=":", linewidth=2, bw_adjust=2, 
    label=f"MC Simulation"
)
sns.kdeplot(
    x=x_mc[:, 0], ax=ax,
    weights=nu_of[itr, 1, :],
    color="tab:green", linestyle="--", linewidth=2, bw_adjust=2,
    label=rf"OmniFold ($\theta=1$)"
)
sns.kdeplot(
    x=x_mc[:, 0], ax=ax,
    weights=nu_pof[itr, 1, :],
    color="tab:orange", linestyle="-.", linewidth=2, bw_adjust=2,
    label=rf"Profile OmniFold ($\hat{{\theta}}={nu_pof[itr,3,0]:.2f}$)"
)


ax.legend(loc="best", fontsize=15, frameon=False)
ax.set_xlim(-5, 5)
ax.set_xlabel(r"$X$", fontsize=20)
ax.set_ylabel("Probability Density", fontsize=20)
ax.tick_params(axis="both", labelsize=14)
fig.tight_layout()

In [None]:
# Y1
cpwr = utils.comparison_plots_with_ratio(-5, 5, 50, xlabel=r"$Y_1$", density=True, header="")
cpwr.add_data(y_data[:,0], label=rf"Experiment ($\theta={theta}$)", target=True, histtype="step", color='black', ls="-", lw=2)
cpwr.add_data(y_mc[:,0], label=rf"MC Simulation ($\theta=1$)", histtype="step", color='tab:blue', ls=":", lw=2)
cpwr.add_data(y_mc[:,0], weights=nu_of[itr,0,:], label=rf"OmniFold ($\theta=1$)", histtype="step", color='tab:green', ls="--", lw=2)
cpwr.add_data(y_mc[:,0], weights=nu_pof[itr,0,:]*nu_pof[itr,2,:], label=rf'Profile OmniFold ($\hat{{\theta}}={nu_pof[itr,3,0]:.2f}$)', histtype="step", color='tab:orange', ls="-.", lw=2)
cpwr.show()

# Y2
cpwr = utils.comparison_plots_with_ratio(-5, 5, 50, xlabel=r"$Y_2$", density=True, header="")
cpwr.add_data(y_data[:,1], label=rf"Experiment ($\theta={theta}$)", target=True, histtype="step", color='black', ls="-", lw=2)
cpwr.add_data(y_mc[:,1], label=rf"MC Simulation ($\theta=1$)", histtype="step", color='tab:blue', ls=":", lw=2)
cpwr.add_data(y_mc[:,1], weights=nu_of[itr,0,:], label=rf"OmniFold ($\theta=1$)", histtype="step", ls="--", color='tab:green', lw=2)
cpwr.add_data(y_mc[:,1], weights=nu_pof[itr,0,:]*nu_pof[itr,2,:], label=rf'Profile OmniFold ($\hat{{\theta}}={nu_pof[itr,3,0]:.2f}$)', histtype="step", ls="-.",color='tab:orange', lw=2)
cpwr.show()

# X
cpwr = utils.comparison_plots_with_ratio(-5, 5, 50, xlabel=r"$X$", density=True, legend_corner="best", header="")
cpwr.add_data(x_data, label="Experiment", target=True, histtype="step", color='black', ls="-", lw=2)
cpwr.add_data(x_mc, label="MC Simulation", histtype="step", color='tab:blue', ls=":", lw=2)
cpwr.add_data(x_mc, weights=nu_of[itr,1,:], label=rf"OmniFold ($\theta=1$)", histtype="step", color='tab:green', ls="--", lw=2)
cpwr.add_data(x_mc, weights=nu_pof[itr,1,:], label=rf'Profile OmniFold ($\hat{{\theta}}={nu_pof[itr,3,0]:.2f}$)', histtype="step", color='tab:orange', ls="-.",lw=2)
cpwr.show()

print('fitted theta:', nu_pof[itr,3,0])