# Simulation for Profile Omnifold

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import sys
sys.path.append('drive/MyDrive/Colab Notebooks/ProfileOmnifold')

In [3]:
import pickle
import numpy as np
from scipy.stats import norm
from scipy.integrate import quad
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns
import utils
from random import choices
import profile_omnifold as pof
import tensorflow as tf
import tensorflow.keras.backend as K
# Enable eager execution
tf.config.run_functions_eagerly(True)
from sklearn.model_selection import train_test_split, ShuffleSplit
from keras.layers import Dense, Input, Concatenate
from keras.models import Model
import torch
from torch.utils.data import Dataset
from torch.utils.data import random_split, DataLoader
from torch import nn, optim
from sklearn.metrics import accuracy_score, roc_curve, auc, confusion_matrix, roc_auc_score
dvc = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {dvc} device")

Using cpu device


## 1D Gaussian Data

### Response Kernel

In [None]:
# smearing kernel for the MC data
mu_kernel_mc = 0
sigma_kernel_mc = 1

# smearing kernel for the experimental data
mu_kernel_data = 0
sigma_kernel_data = 1.2


def k_mc(y,x):
    return norm.pdf(y[:,0]-x[:,0],loc=mu_kernel_mc,scale=sigma_kernel_mc)

def k_data(y,x):
    return norm.pdf(y[:,0]-x[:,0],loc=mu_kernel_data,scale=sigma_kernel_data)

# reweighting function w parametrized by theta (in this case, the standard deviation of the response kernel
def w_theta(y,x,theta):
    return norm.pdf(y[:,0]-x[:,0],loc=mu_kernel_data,scale=theta)/k_mc(y,x)

# derivative of w with respect to theta
def w_theta_derivative(y,x,theta):
    return ((y[:,0]-x[:,0])**2/(theta**3)-1/theta)*w_theta(y,x,theta)

# true reweighting function on the MC response kernel
def w_true(y,x):
    return k_data(y,x)/k_mc(y,x)

### Experimental Densities

In [None]:
mu_data = 0.2
sigma_data = 1

def px(x):
    return norm.pdf(x,loc=mu_data,scale=sigma_data)
def py(y):
    return norm.pdf(y,loc=mu_data+mu_kernel_data,scale=np.sqrt(sigma_data**2+sigma_kernel_data**2))


### Monte Carlo Densities

In [None]:
mu_mc = 0
sigma_mc = 1


def qx(x):
    return norm.pdf(x,loc=mu_mc,scale=sigma_mc)
def qy(y):
    return norm.pdf(y,loc=mu_mc+mu_kernel_mc,scale=np.sqrt(sigma_mc**2+sigma_kernel_mc**2))



### Simulation

In [None]:
nsim = 5
Ndata = 100000
Nsim = 100000
theta_list = []
x_mc_list = []
nu_list = []

for i in range(nsim):
    # sample from the experimental distribution

    x_data = np.random.normal(loc=mu_data,scale=sigma_data,size=Ndata).reshape(-1,1)
    y_data = np.random.normal(loc=x_data[:,0]+mu_kernel_data,scale=sigma_kernel_data,size=Ndata).reshape(-1,1)

    # sample from the MC distribution

    x_mc = np.random.normal(loc=mu_mc,scale=sigma_mc,size=Nsim).reshape(-1,1)
    y_mc = np.random.normal(loc=x_mc[:,0]+mu_kernel_mc,scale=sigma_kernel_mc,size=Nsim).reshape(-1,1)

    # the true w function parametrized by theta
    # delta_epsilon is the penalization strength applied on theta, larger delta_epsilon indicates less penalty
    delta_epsilon = 1
    theta_test = (sigma_kernel_data-sigma_kernel_mc)/delta_epsilon
    print('true theta:', theta_test)
    def w_func(theta):
      return w_theta(y_mc, x_mc, theta*delta_epsilon+sigma_kernel_mc)
    def w_func_derivative(theta):
      return w_theta_derivative(y_mc, x_mc, theta*delta_epsilon+sigma_kernel_mc)


    nu_profile_penalized_nn = pof.penalized_profile_omnifold_test(y_data, x_mc, y_mc, 10, w_func, w_func_derivative, 0., 0., no_penalty=True, verbose=0)

    x_mc_list.append(x_mc)
    nu_list.append(nu_profile_penalized_nn[:,1,:])
    print("theta:", nu_profile_penalized_nn[:,3,0])
    theta_list.append(nu_profile_penalized_nn[:,3,0])



## 2D Gaussian Data

### Response kernel

In [4]:
# smearing kernel for the MC data
mu_kernel_mc = 0
sigma1_kernel_mc = 1
sigma2_kernel_mc = 1

# smearing kernel for the experimental data
mu_kernel_data = 0
sigma1_kernel_data = 1
sigma2_kernel_data = 1.5

def k_mc(y,x):
    return norm.pdf(y[:,0]-x[:,0],loc=mu_kernel_mc,scale=sigma1_kernel_mc)*norm.pdf(y[:,1]-x[:,0],loc=mu_kernel_mc,scale=sigma2_kernel_mc)

def k_data(y,x):
    return norm.pdf(y[:,0]-x[:,0],loc=mu_kernel_data,scale=sigma1_kernel_data)*norm.pdf(y[:,1]-x[:,0],loc=mu_kernel_data,scale=sigma2_kernel_data)

# reweighting function w parametrized by theta (in this case, the standard deviation of the response kernel)
def w_theta(y,x,theta):
    return norm.pdf(y[:,0]-x[:,0],loc=mu_kernel_data,scale=sigma1_kernel_data)*norm.pdf(y[:,1]-x[:,0],loc=mu_kernel_data,scale=theta)/k_mc(y,x)

# derivative of w with respect to theta
def w_theta_derivative(y,x,theta):
    return ((y[:,1]-x[:,0])**2/(theta**3)-1/theta)*w_theta(y,x,theta)

# true reweighting function on the MC response kernel
def w_true(y,x):
    return k_data(y,x)/k_mc(y,x)

### Experimental Densities

In [5]:
mu_data = 0.8
sigma_data = 1

def px(x):
    return norm.pdf(x,loc=mu_data,scale=sigma_data)
def py1(y):
    return norm.pdf(y,loc=mu_data+mu_kernel_data,scale=np.sqrt(sigma_data**2+sigma1_kernel_data**2))
def py2(y):
    return norm.pdf(y,loc=mu_data+mu_kernel_data,scale=np.sqrt(sigma_data**2+sigma2_kernel_data**2))


### Monte Carlo Densities

In [6]:
mu_mc = 0
sigma_mc = 1


def qx(x):
    return norm.pdf(x,loc=mu_mc,scale=sigma_mc)
def qy1(y):
    return norm.pdf(y,loc=mu_mc+mu_kernel_mc,scale=np.sqrt(sigma_mc**2+sigma1_kernel_mc**2))
def qy2(y):
    return norm.pdf(y,loc=mu_mc+mu_kernel_mc,scale=np.sqrt(sigma_mc**2+sigma2_kernel_mc**2))


## Simulation

In [None]:
nsim = 5
Ndata = 100000
Nsim = 100000
theta_list = []
x_mc_list = []
x_data_list = []
y_mc_list = []
y_data_list = []
nu_list = []

for i in range(nsim):
    # sample from the experimental distribution

    x_data = np.random.normal(loc=mu_data,scale=sigma_data,size=Ndata).reshape(-1,1)
    y_data1 = np.random.normal(loc=x_data[:,0]+mu_kernel_data,scale=sigma1_kernel_data,size=Ndata).reshape(-1,1)
    y_data2 = np.random.normal(loc=x_data[:,0]+mu_kernel_data,scale=sigma2_kernel_data,size=Ndata).reshape(-1,1)
    y_data = np.hstack([y_data1, y_data2])

    # sample from the MC distribution

    x_mc = np.random.normal(loc=mu_mc,scale=sigma_mc,size=Nsim).reshape(-1,1)
    y_mc1 = np.random.normal(loc=x_mc[:,0]+mu_kernel_mc,scale=sigma1_kernel_mc,size=Nsim).reshape(-1,1)
    y_mc2 = np.random.normal(loc=x_mc[:,0]+mu_kernel_mc,scale=sigma2_kernel_mc,size=Nsim).reshape(-1,1)
    y_mc = np.hstack([y_mc1,y_mc2])

    # the true w function parametrized by theta
    # delta_epsilon is the penalization strength applied on theta, larger delta_epsilon indicates less penalty
    delta_epsilon = 1
    sigma_kernel_data = sigma2_kernel_data
    sigma_kernel_mc = sigma2_kernel_mc
    theta_test = (sigma_kernel_data-sigma_kernel_mc)/delta_epsilon
    print('true theta:', theta_test)
    def w_func(theta):
      return w_theta(y_mc, x_mc, theta*delta_epsilon+sigma_kernel_mc)
    def w_func_derivative(theta):
      return w_theta_derivative(y_mc, x_mc, theta*delta_epsilon+sigma_kernel_mc)


    nu_profile_penalized_nn = pof.penalized_profile_omnifold_test1(y_data, x_mc, y_mc, 10, w_func, w_func_derivative, 0., 0., no_penalty=True, verbose=0)

    x_mc_list.append(x_mc)
    x_data_list.append(x_data)
    y_mc_list.append(y_mc)
    y_data_list.append(x_data)
    nu_list.append(nu_profile_penalized_nn[:,1,:])
    print("theta:", nu_profile_penalized_nn[:,3,0])
    theta_list.append(nu_profile_penalized_nn[:,3,0])



true theta: 0.5




[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [None]:
fig, ax = plt.subplots(1, 1, sharey=True, sharex=True, figsize=(15, 8))
x = np.linspace(-7, 5, 1000)
ax.plot(x, px(x),
       'black', lw=3, alpha=0.6, label='experimental particle-level distribution')
ax.plot(x, qx(x),
       'lightblue', lw=3, alpha=0.6, label='Monte Carlo particle-level distribution')
iter = 7
sns.kdeplot(x=x_mc_list[0][:,0], ax=ax, color="orange", weights=nu_list[0][iter,:], label="Profile Omnifold")
for i in range(1,len(x_mc_list)):
    sns.kdeplot(x=x_mc_list[i][:,0], ax=ax, color="orange", weights=nu_list[i][iter,:])
plt.legend(loc='best')