In [None]:
# basic setup for Colab
import os
LOG_DIR = 'log_dir/.'

if 'DATALAB_ENV' in os.environ:
  import getpass
  gitlab_token = getpass.getpass()
  !git clone https://oauth2:{gitlab_token}@gitlab.cern.ch/pdecastr/paper-learning_inference.git
  !git clone https://oauth2:{gitlab_token}@gitlab.cern.ch/pdecastr/neyman.git
  !pip install git+https://github.com/wookayin/tensorflow-plot.git@master
  !pip install neyman/.
  repo_path = "paper-learning_inference"
  !npm install -g localtunnel
  get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
  )
  get_ipython().system_raw('lt --port 6006 >> url.txt 2>&1 &')
  !cat url.txt
else:
  repo_path = ".."

In [None]:
%matplotlib inline
from glob import glob
import numpy as np
import time
import json
import matplotlib.pyplot as plt
import matplotlib
# add code to path
import sys
sys.path.append(f"{repo_path}/code/")
from scipy.interpolate import InterpolatedUnivariateSpline
import inference_estimator as ie
from template_likelihood import TemplateLikelihood
import neyman.models as nm
import tensorflow as tf
import neyman.inferences as ni
import neyman.models as nm


k = tf.keras

font = {'size'   : 14}

matplotlib.rc('font', **font)

In [None]:
valid_path = "../data/2d_gaussian/valid/*"
test_path = "../data/2d_gaussian/test/*"

valid_samples = {}
for f in glob(valid_path):
  name = os.path.splitext(os.path.basename(f))[0]
  valid_samples[name] = dict(np.load(f))

test_samples = {}
for f in glob(test_path):
  name = os.path.splitext(os.path.basename(f))[0]
  test_samples[name] = dict(np.load(f))

In [None]:
tl = TemplateLikelihood()

expected = tl.expected()
nll = tl.nll()
h_nll, g_nll = ni.batch_hessian(nll, pars=tl.pars)

# covariance
c_nll = tf.matrix_inverse(h_nll, name="c_nll")
# profile grads and hess (only nuissance par)
g_nll_prof = g_nll[:,1:]
h_nll_prof = h_nll[:,1:,1:]
c_nll_prof = tf.matrix_inverse(h_nll_prof, name="c_nll_prof")

# newton step
newton_step =  tf.matmul(c_nll_prof, g_nll_prof[:,:, tf.newaxis])[:,0,0]

In [None]:
def get_asimov(feed_dict):
    feed_dict = feed_dict.copy()
    with tf.Session() as sess:
      asimov_data = sess.run(expected, feed_dict=feed_dict)
    return asimov_data[0]

def get_hessian(feed_dict):
  feed_dict = feed_dict.copy()
  with tf.Session() as sess:    
    hess = sess.run(h_nll, feed_dict=feed_dict)
  return hess

def get_unc_approx(feed_dict):
  feed_dict = feed_dict.copy()
  with tf.Session() as sess:    
    cov  = sess.run(c_nll, feed_dict=feed_dict)
  return np.sqrt(cov[0])[0]

def get_unc_profile(feed_dict):
  feed_dict = feed_dict.copy()
  with tf.Session() as sess:
    no_nuis = sess.run(nll, feed_dict=feed_dict)
    r_dist_ph = tl.phs["r_dist"]
    feed_dict[r_dist_ph] = feed_dict[r_dist_ph]
    for i in range(10):
        newton_step_arr = sess.run(newton_step , feed_dict=feed_dict)
        feed_dict[r_dist_ph] =  feed_dict[r_dist_ph]-newton_step_arr
    profiled = sess.run(nll, feed_dict=feed_dict)
  return no_nuis, profiled


def get_nll_surface(feed_dict, mu_scan):
  feed_dict = feed_dict.copy()
  nll_surface = np.empty([mu_scan.shape[0],r_dist_scan.shape[0]], dtype=np.float32)
  mu_ph = tl.phs["mu"]
  with tf.Session() as sess:
    for i, mu_val in enumerate(mu_scan):
      feed_dict[mu_ph] = [mu_val]
      nll_surface[i] = sess.run(nll, feed_dict=feed_dict)
  return nll_surface


In [None]:
approx_uncs = {}
profile_uncs = {}
hists_all = {}
surfaces = {}

In [None]:
n_bins = 2
shift = np.array([[0.2,0.]])
bins = np.linspace(0., 1., 11, endpoint=True)

re_ce = "fix/cross_entropy_b_2_bs_128_lr_0.01_seed_*0.h5"

for model_path in glob(re_ce):
  
  norm_phs = {tl.phs["n_bkg"] : 960.,
              tl.phs["n_sig"] : 40.}

  par_phs = {tl.phs["mu"] : [1.],
             tl.nuis_rv: [2.]}
  
  model = k.models.load_model(model_path)
  model_name = os.path.splitext(model_path)[0]
  
  preds_test = {}
  preds_test["bkg_nom"] = model.predict_proba(test_samples["p0_sample"]["X"])[:,1]
  preds_test["sig"] = model.predict_proba(test_samples["p1_sample"]["X"])[:,1]
  preds_test["bkg_up"] = model.predict_proba(test_samples["p0_sample"]["X"]+shift)[:,1]
  preds_test["bkg_dw"] = model.predict_proba(test_samples["p0_sample"]["X"]-shift)[:,1]
  
  hists_test = { k : np.histogram(v, bins=bins)[0] for k,v in preds_test.items()}
  hists_test = { k : v/v.sum() for k,v in hists_test.items()}
  hists_all[model_name] = hists_test
  
  not_zero = np.any([h != 0.0 for h in hists_test.values()],axis=0)
  shape_phs = {tl.phs["c_bkg_nom"] : hists_test["bkg_nom"][not_zero],
               tl.phs["c_bkg_dw"]  : hists_test["bkg_dw"][not_zero],
               tl.phs["c_bkg_up"]  : hists_test["bkg_up"][not_zero],
               tl.phs["c_sig"] : hists_test["sig"][not_zero]}
  
  feed_dict = {**shape_phs, **norm_phs, **par_phs}
  asimov_data = get_asimov(feed_dict)
  asimov_phs = {tl.phs["observed"] : asimov_data}
  feed_dict = {**shape_phs, **norm_phs, **par_phs, **asimov_phs}

  approx_uncs[model_name] = get_unc_approx(feed_dict)
  mu_scan = np.linspace(0.05,1.95, 101, endpoint=True, dtype=np.float32)
  par_phs = {tl.phs["mu"] : mu_scan,
             tl.phs["r_dist"] : np.ones_like(mu_scan)*2.}
  feed_dict = {**shape_phs, **norm_phs, **par_phs, **asimov_phs}
  profile_uncs[model_name] = get_unc_profile(feed_dict)
  r_dist_scan = np.linspace(1.2,2.8, 101, endpoint=True, dtype=np.float32)
  par_phs = {tl.phs["mu"] : [1],
             tl.phs["r_dist"] : r_dist_scan}
  feed_dict = {**shape_phs, **norm_phs, **par_phs, **asimov_phs}
  surfaces[model_name] = get_nll_surface(feed_dict, mu_scan)


In [None]:
n_bins = 10
shift = np.array([[0.2,0.]])

re_inferno = "fix/inferno_b_10_bs_512_lr_0.0001_seed_*.h5"

for model_path in glob(re_inferno):
  
  norm_phs = {tl.phs["n_bkg"] : 960.,
              tl.phs["n_sig"] : 40.}

  par_phs = {tl.phs["mu"] : [1.],
             tl.nuis_rv: [2.]}

  model = k.models.load_model(model_path)
  model_name = os.path.splitext(model_path)[0]
  
  preds_test = {}
  preds_test["bkg_nom"] = model.predict_proba(test_samples["p0_sample"]["X"])
  preds_test["sig"] = model.predict_proba(test_samples["p1_sample"]["X"])
  preds_test["bkg_up"] = model.predict_proba(test_samples["p0_sample"]["X"]+shift)
  preds_test["bkg_dw"] = model.predict_proba(test_samples["p0_sample"]["X"]-shift)
  
  hists_test_raw = { k : np.bincount(np.argmax(v, axis=-1),minlength=n_bins) for k,v in preds_test.items()}
  hists_test = { k : v/v.sum() for k,v in hists_test_raw.items()}
  
  hists_all[model_name] = hists_test
  
  not_zero = np.any([h != 0.0 for h in hists_test.values()],axis=0)


  shape_phs = {tl.phs["c_bkg_nom"] : hists_test["bkg_nom"][not_zero],
               tl.phs["c_bkg_dw"]  : hists_test["bkg_dw"][not_zero],
               tl.phs["c_bkg_up"]  : hists_test["bkg_up"][not_zero],
               tl.phs["c_sig"] : hists_test["sig"][not_zero]}
  
  feed_dict = {**shape_phs, **norm_phs, **par_phs}
  asimov_data = get_asimov(feed_dict)
  asimov_phs = {tl.phs["observed"] : asimov_data}
  feed_dict = {**shape_phs, **norm_phs, **par_phs, **asimov_phs}

  approx_uncs[model_name] = get_unc_approx(feed_dict)
  mu_scan = np.linspace(0.05,1.95, 101, endpoint=True, dtype=np.float32)
  par_phs = {tl.phs["mu"] : mu_scan,
             tl.phs["r_dist"] : np.ones_like(mu_scan)*2.}
  feed_dict = {**shape_phs, **norm_phs, **par_phs, **asimov_phs}
  profile_uncs[model_name] = get_unc_profile(feed_dict)
  r_dist_scan = np.linspace(1.2,2.8, 101, endpoint=True, dtype=np.float32)
  par_phs = {tl.phs["mu"] : [1],
             tl.phs["r_dist"] : r_dist_scan}
  feed_dict = {**shape_phs, **norm_phs, **par_phs, **asimov_phs}
  surfaces[model_name] = get_nll_surface(feed_dict, mu_scan)

In [None]:
fig, ax = plt.subplots()

levels = [0.,0.5,1.0,2.0,4]

key = 'progress/cross_entropy_b_2_bs_128_lr_0.01_seed_167_19'

ax.contourf(r_dist_scan, mu_scan,surfaces[key]-surfaces[key].min(),
            levels=levels)

In [None]:
fig, ax = plt.subplots(figsize=(8,6))

unc_roots = {}
for name, prof in profile_uncs.items():
  is_not_nan = (np.isnan(prof[1]) == False)
  profile = prof[1][is_not_nan]-(prof[1][is_not_nan]).min()
  
  if "fix/cross_entropy" in name:
    l_ce = ax.plot(mu_scan[is_not_nan], profile,"r-", alpha=0.2)
    unc_roots[name] =  InterpolatedUnivariateSpline(mu_scan, profile-0.5).roots()
  if ("fix/inferno_b_10_bs_512_lr_0.0001_" in name) and ("150" not in name):
    l_inferno = ax.plot(mu_scan[is_not_nan], profile,"b-", alpha=0.2)
    unc_roots[name] =  InterpolatedUnivariateSpline(mu_scan[is_not_nan], profile-0.5).roots()
  elif ("150" not in name):
    unc_roots[name] =  InterpolatedUnivariateSpline(mu_scan[is_not_nan], profile-0.5).roots()


ax.set_ylim([0.,1.0])
ax.set_xlim([0.25,1.75])


ax.set_xlabel(r"signal strength parameter $\nu$")
ax.set_ylabel(r"likelihood profile $\Delta\mathcal{L}$")

unc_width = { k : (v[1]-v[0])/2.  for k,v in unc_roots.items()}

ax.legend((l_ce[0], l_inferno[0]), ("cross-entropy","inference-aware"),frameon=False)
fig.savefig("profile_likelihood.pdf", bbox_inches='tight')

In [None]:

eval_losses = {} 

re_no_ext = os.path.splitext(re_inferno)[0]
print(re_no_ext)
for event_path in glob(f"log_dir_{re_no_ext}/eval/events.out.tfevents.*"):
  print(event_path)
  model_name = event_path.split("/")[1]
  if (any(ss in model_name for ss in ["77","150"])):
    continue
  eval_losses[model_name] = {"steps" : [],
                             "asimov_loss" : []}

  for e in tf.train.summary_iterator(event_path):
    for v in e.summary.value:
      if v.tag == "asimov_loss":
        eval_losses[model_name]["steps"].append(e.step)
        eval_losses[model_name]["asimov_loss"].append(v.simple_value)

In [None]:
eval_losses.keys()

In [None]:

eval_losses = {} 

re_no_ext = os.path.splitext(re_ce)[0]
print(re_no_ext)
for event_path in glob(f"log_dir_fix/cross_entropy_b_2_bs_128_lr_0.01_seed_167/eval/events.out.tfevents.*"):
  print(event_path)
  model_name = event_path.split("/")[1]
  eval_losses[model_name] = {"steps" : [],
                             "asimov_loss" : []}

  for e in tf.train.summary_iterator(event_path):
    for v in e.summary.value:
      if v.tag == "asimov_loss":
        eval_losses[model_name]["steps"].append(e.step)
        eval_losses[model_name]["asimov_loss"].append(v.simple_value)

In [None]:
base_name='cross_entropy_b_2_bs_128_lr_0.01_seed_167'
eval_ce = eval_losses[base_name]["asimov_loss"][1:]

fig, ax = plt.subplots(figsize=(10,10))

l_soft_std = { key : np.sqrt(v["asimov_loss"][-1]) for key, v in eval_losses.items()}
pl_vals = []
ap_vals = []
lo_vals = []
for i in range(20):
  key = f"progress/{base_name}_{i}"
  pl_vals.append(unc_width[key])
  ap_vals.append(approx_uncs[key][1])

lo_vals = np.sqrt(eval_ce)

ax.plot(pl_vals, lo_vals, "-", label="from loss function")
ax.plot(pl_vals, ap_vals, "-", label= "from template likelihood")

ax.set_xlabel("profile likelihood width")
ax.set_ylabel("hessian approximation")

ax.legend()

#fig.savefig("pl_vs_hessian_diagnosis_ce_dynamics.pdf")

In [None]:
fig, ax = plt.subplots(figsize=(20,20))

ax.plot(range(20),pl_vals, label = "template pf",)
ax.plot(range(20),ap_vals, label = "template unc hess")
ax.plot(range(20),lo_vals, label = "loss unc hess")

ax.legend()


In [None]:
fig, ax = plt.subplots(figsize=(8,6))

for eval_loss in eval_losses.values():
  ax.plot(eval_loss["steps"],eval_loss["asimov_loss"])

ax.set_xlabel("training step")
ax.set_ylabel("validation-set inference-aware loss")

fig.savefig("training_dynamics.pdf", bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(figsize=(10,10))

l_soft_std = { k : np.sqrt(v["asimov_loss"][-1]) for k, v in eval_losses.items()}
pl_vals = []
ap_vals = []
lo_vals = []
for k in unc_width:
  if "inferno" in k:
    model_name = k.split("/")[1]
    lo_vals.append(l_soft_std[model_name])
    pl_vals.append(unc_width[k])
    ap_vals.append(approx_uncs[k][0])
    
ax.plot(pl_vals, lo_vals, "o", label="from loss function")
ax.plot(pl_vals, ap_vals, "o", label= "from template likelihood")

ax.set_xlabel("profile likelihood width")
ax.set_ylabel("hessian approximation")

ax.legend()

fig.savefig("pl_vs_hessian_diagnosis.pdf")

In [None]:
for subset in ["cross", "inferno"]:
  mean = np.mean([v for n,v in unc_width.items() if subset in n])
  std = np.std([v for n,v in unc_width.items() if subset in n],ddof=1)
  print(subset,mean,std)