In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
import neyman.models as nm
import neyman.inferences

import importlib

importlib.reload(neyman.inferences)

ni = neyman.inferences

# add code to path
import sys
sys.path.append("../code/")
import inference_estimator as ie

tf.logging.set_verbosity(tf.logging.INFO)

ds = tf.contrib.distributions

In [None]:
# rotation of background component parameter
theta = tf.placeholder_with_default(0.,shape=(),name="theta")
# mean distance of background mean to (0,0)
r_dist = tf.placeholder_with_default(2.,shape=(),name="r_dist")

# background distribution (no rotation and centered at (2,0))
p0_loc = [r_dist, 0.]
p0_cov =  [[1.,0.],[0.,9.]]
p0 = ds.MultivariateNormalFullCovariance(loc=p0_loc,
            covariance_matrix=p0_cov, name="p0")

# counter clock-wise rotation matrix
scale = tf.linalg.LinearOperatorFullMatrix(
            [[tf.cos(theta),-tf.sin(theta)],
             [tf.sin(theta),tf.cos(theta)]]
        )

# background transformation of rotation (bijector)
affine = ds.bijectors.AffineLinearOperator(scale=scale)

# resulting distribution
p0_transform = ds.TransformedDistribution(distribution=p0,
                                         bijector=affine,
                                         name="p0_transform")

# signal is a a (0,0) centered unit variance 2D Gaussian
p1_loc =  [0., 0.]
p1_diag = [1., 1.]
p1 = ds.MultivariateNormalDiag(loc=p1_loc,
                                scale_diag=p1_diag,
                                name="p1")

In [None]:
fig, ax = plt.subplots(figsize=(6,6))


with tf.Session() as sess:
    p0_sample = sess.run(p0_transform.sample(2000), feed_dict={theta : 0.})
    p1_sample = sess.run(p1.sample(2000))


ax.plot(p0_sample[:,0],p0_sample[:,1],".")
ax.plot(p1_sample[:,0],p1_sample[:,1],".")    

ax.set_ylim([-10,10])
ax.set_xlim([-10,10])

fig;

In [None]:
n_samples = 125000
X_sample_tensors = {}
y_values = {}
p0_sample = "p0_sample"
X_sample_tensors[p0_sample] = p0.sample(n_samples // 2, seed=7, name="p0_sample") 
y_values[p0_sample] = 0.
p1_sample = "p1_sample"
X_sample_tensors[p1_sample] = p1.sample(n_samples // 2, seed=17, name="p1_sample") 
y_values[p1_sample] = 1.

In [None]:
samples = {}

with tf.Session() as sess:
    for name, sample_tensor in X_sample_tensors.items():
        samples[name] = {}
        samples[name]["X"] = sess.run(sample_tensor)
        samples[name]["y"] =  y_values[name]*np.ones(sample_tensor.shape[0],
                                                     dtype=np.float32)

In [None]:
train_samples = {}
valid_samples = {}

for name,sample in samples.items():
    keys = sample.keys()
    split_sample = train_test_split(*list(sample.values()),
                                    test_size = 0.4,
                                    random_state=17)
    train_samples[name] = { k : split_sample[i] for k,i in zip(keys,[0,2])}
    valid_samples[name] = { k : split_sample[i] for k,i in zip(keys,[1,3])}

In [None]:
def input_fn_train():
    components = {}
    for key, value in train_samples.items():
        components[key] = tf.data.Dataset.from_tensor_slices(value["X"])\
                                          .shuffle(buffer_size=10000)\
                                          .batch(16)
                
    dataset = tf.data.Dataset.zip({"components" : components})
    
    dataset_it = dataset.make_one_shot_iterator()
    next_batch = dataset_it.get_next()
    return next_batch, None

def input_fn_valid():
    components = {}
    for key, value in valid_samples.items():
        components[key] = tf.data.Dataset.from_tensor_slices(value["X"])\
                                          .shuffle(buffer_size=10000)\
                                          .batch(16)
                
    dataset = tf.data.Dataset.zip({"components" : components})
    
    dataset_it = dataset.make_one_shot_iterator()
    next_batch = dataset_it.get_next()
    return next_batch, None

In [None]:
importlib.reload(ie)

def c_norm_dist_fn():
    norm_dict = { "p0_sample" : 200.,       
                  "p1_sample" : 200.}  
    nuis_pars = []                   
    return norm_dict, nuis_pars      

clf = ie.InferenceEstimator(c_norm_dist_fn,
                            n_bins =2,
                            c_interest="p1_sample",
                            use_cross_entropy=True,
                            model_dir="./dnn_clf")



In [None]:
clf.train(input_fn=input_fn_train)
clf.evaluate(input_fn=input_fn_valid)

In [None]:

X = tf.placeholder(dtype=tf.float32, shape=(None,2), name="X_ph")
X_prime = affine.forward(X)
input_ph = {"X" : X_prime }

out = clf.model_fn(input_ph, None, tf.estimator.ModeKeys.PREDICT, config=None)
probs = out.predictions["probabilities"]
tf.train.init_from_checkpoint(clf.latest_checkpoint(), {'/' : '/'})
init_op=  tf.global_variables_initializer()

In [None]:

theta_scan = np.linspace(-2.,2.,21, endpoint=True, dtype=np.float32)

probs_arrs = {}
X_arrs = {}

with tf.Session() as sess:
    sess.run(init_op)
    X_arrs[p1_sample], probs_arrs[p1_sample] = sess.run((X_prime,probs[:,1]), feed_dict={X : valid_samples[p1_sample]["X"]})
    probs_arrs[p0_sample] = {}
    X_arrs[p0_sample] = {}
    for theta_val in theta_scan:
        X_arrs[p0_sample][theta_val], probs_arrs[p0_sample][theta_val] = sess.run((X_prime,probs[:,1]), feed_dict={X : valid_samples[p0_sample]["X"],
                                                                           theta : theta_val})

In [None]:
fig, ax = plt.subplots(figsize=(12,12))
    
bins = np.linspace(0.,1.,21, endpoint=True)
ax.hist(probs_arrs[p0_sample][0.],bins=bins, histtype="step",label="background")
ax.hist(probs_arrs[p0_sample][theta_scan[0]],  histtype="step",bins=bins, label="background - theta -1.0")
ax.hist(probs_arrs[p0_sample][theta_scan[-1]],  histtype="step", bins=bins, label="background - theta +1.0")
ax.hist(probs_arrs[p1_sample],bins=bins, histtype="step",  label="signal")
ax.legend()
fig;

In [None]:
# make fine bin histograms
hist_arrs = {}
bins = np.linspace(0.,1.,101, endpoint=True)
hist_arrs["p1_sample"], _ = np.histogram(probs_arrs["p1_sample"], bins=bins,weights=np.ones(probs_arrs["p1_sample"].shape[0], dtype=np.float32))
hist_arrs["p1_sample"] /= hist_arrs["p1_sample"].sum()
hist_arrs["p0_sample"] = {}
for k,v in probs_arrs["p0_sample"].items():
    hist_arrs["p0_sample"][k] , _ = np.histogram(v, bins=bins, weights=np.ones(v.shape[0], dtype=np.float32))
    hist_arrs["p0_sample"][k] /=  hist_arrs["p0_sample"][k].sum()

In [None]:
x_arr = []
t_arr = []
for k,v in hist_arrs["p0_sample"].items():
    t_arr.append(k)
    x_arr.append(np.add.reduceat(v, [0,50]))

x_arr = np.array(x_arr)
t_arr = np.array(t_arr)

In [None]:
reduce_ind = [0,50]
c_sig = np.add.reduceat(hist_arrs[p1_sample], reduce_ind)
c_nom = np.add.reduceat(hist_arrs[p0_sample][0.0], reduce_ind)
c_up = np.add.reduceat(hist_arrs[p0_sample][1.0], reduce_ind)
c_dw = np.add.reduceat(hist_arrs[p0_sample][-1.0], reduce_ind)

In [None]:
def int_quad_lin(alpha, c_nom, c_up, c_dw):
    "Three-point interpolation, quadratic inside and linear outside"
    
    alpha_t = tf.tile(tf.expand_dims(alpha,axis=-1),[1, tf.shape(c_nom)[0]])
    a = 0.5*(c_up+c_dw)-c_nom
    b = 0.5*(c_up-c_dw)
    ones = tf.ones_like(alpha_t)
    switch = tf.where(alpha_t < 0.,
                      ones*tf.expand_dims(c_dw-c_nom, axis=0),
                      ones*tf.expand_dims(c_up-c_nom, axis=0))
    abs_var = tf.where(tf.abs(alpha_t) > 1., 
                      (2*b+tf.sign(alpha_t)*a)*(alpha_t-tf.sign(alpha_t))+switch,
                      a*tf.pow(alpha_t,2)+b*alpha_t)
    return c_nom+abs_var

In [None]:

with tf.Session() as sess:
    x_arr_int = int_quad_lin(theta_scan,c_nom, c_up, c_dw).eval()
        

In [None]:
fig, axs = plt.subplots(1,2, figsize=(12,6))

axs[0].plot(t_arr, x_arr[:,0], label="true")
axs[0].plot(t_arr, x_arr_int[:,0], label="interp")
axs[0].legend()
axs[1].plot(t_arr, x_arr[:,1], label="true")
axs[1].plot(t_arr, x_arr_int[:,1], label="interp")
axs[1].legend()

fig;

In [None]:
def poisson(x, rate):
    "float64  poisson pdf (avoid numerical inacurracies)"
    x_d = tf.cast(x, tf.float64)
    rate_d = tf.cast(rate, tf.float64)
    log_rate_d = tf.log(rate_d)
    p_d = x_d*log_rate_d - tf.lgamma(tf.convert_to_tensor(1.,dtype=tf.float64)+x_d)-rate_d
    return tf.cast( p_d, tf.float32)

In [None]:
# expected shapes for background (nom/up/down variation for interpolation)
c_bkg_nom_ph = tf.placeholder(dtype=tf.float32, shape=(None,), name="c_bkg_nom_ph") 
c_bkg_dw_ph = tf.placeholder(dtype=tf.float32, shape=(None,), name="c_bkg_dw_ph") 
c_bkg_up_ph = tf.placeholder(dtype=tf.float32, shape=(None,), name="c_bkg_up_ph") 
# expected shapes for signal (no parameters)
c_sig_ph = tf.placeholder(dtype=tf.float32, shape=(None,), name="c_sig_ph")

# expected number of signal and background events
n_bkg_ph = tf.placeholder_with_default(10000., shape=(), name="n_bkg_ph")
n_sig_ph = tf.placeholder_with_default(100., shape=(), name="n_sig_ph")

# model parameters (input specified by placeholders)
mu_ph = tf.placeholder(dtype=tf.float32, shape=(None,), name="mu_ph")
theta_ph = tf.placeholder(dtype=tf.float32, shape=(None,), name="theta_ph")

# auxiliary measurement parameters
theta_scale_ph = tf.placeholder_with_default(0.2, shape=(), name="theta_scale_ph")

# distribution of nuissance parameters
theta_rv = nm.Normal(loc=tf.zeros_like(theta_ph),
                     scale=tf.ones_like(theta_ph)*theta_scale_ph,
                     value=theta_ph,
                     name="theta_dist")

# background shape as a function of theta
c_bkg = int_quad_lin(theta_rv, c_bkg_nom_ph,
                     c_bkg_up_ph, c_bkg_dw_ph)

# expected events ([batch, bin])
mu = tf.expand_dims(mu_ph,-1, name="mu_expanded")
expected = mu*n_sig_ph*c_sig_ph+n_bkg_ph*c_bkg

# placeholder for data/asimov
observed_ph = tf.placeholder(dtype=tf.float32, shape=(None,), name="observed_ph")

# likelihood
poisson_pdf = poisson(observed_ph, expected)
nll = -tf.reduce_sum(poisson_pdf ,-1)
theta_ext = -theta_rv.log_prob(theta_rv)
nll_ext = nll+theta_ext

# hessians and likelihoods
h_nll, g_nll =  ni.batch_hessian(nll, pars=[mu_ph, theta_rv])
h_nll_ext, g_nll_ext = ni.batch_hessian(nll_ext, pars=[mu_ph, theta_rv])

# covariance without constraints (only POI)
c_nll_poi = tf.matrix_inverse(h_nll[:,:1,:1], name="c_nll_poi")
c_nll = tf.matrix_inverse(h_nll, name="c_nll")
c_nll_ext = tf.matrix_inverse(h_nll_ext, name="c_nll_ext")
# profile grads and hess (only nuissance par)
g_nll_prof = g_nll_ext[:,1:]
h_nll_prof = h_nll_ext[:,1:,1:]
c_nll_prof = tf.matrix_inverse(h_nll_prof, name="c_nll_prof")

# newton step
newton_step =  tf.matmul(c_nll_prof, g_nll_prof[:,:, tf.newaxis])[:,0,0]

In [None]:
shape_phs = {c_bkg_nom_ph : c_nom,
             c_bkg_dw_ph : c_dw,
             c_bkg_up_ph : c_up,
             c_sig_ph : c_sig}

norm_phs = {n_bkg_ph : 10000.,
            n_sig_ph : 100.}

par_phs = {mu_ph : [1.],
           theta_ph: [0.]}

In [None]:

par_phs = {mu_ph : [1.],
           theta_ph: [0.]}

feed_dict = {**shape_phs, **norm_phs, **par_phs}

minima = {}
with tf.Session() as sess:
    asimov_data = sess.run(expected, feed_dict=feed_dict)
    print("asimov data: ", asimov_data[0])
    asimov_phs = {observed_ph : asimov_data[0]}
    feed_dict = {**feed_dict, **asimov_phs}
    minima  = sess.run({ t : t for t in [c_nll_ext, c_nll_poi, g_nll_ext ] }, feed_dict=feed_dict)

print("error mu no nuis:", np.sqrt(np.diag(minima[c_nll_poi][0])))
print("error mu and nuis:", np.sqrt(np.diag(minima[c_nll_ext][0])))


In [None]:

theta_scan = np.linspace(-0.2,0.2, 101, endpoint=True, dtype=np.float32)
par_phs = {mu_ph : [1.0],
           theta_ph: theta_scan }

feed_dict = {**shape_phs, **norm_phs, **par_phs, **asimov_phs}

arrs = {}
with tf.Session() as sess:
    feed_dict = {**feed_dict, **asimov_phs}
    arrs[theta_ext]= sess.run(theta_ext, feed_dict=feed_dict)
    arrs[nll_ext] = sess.run(nll_ext, feed_dict=feed_dict)

fig, ax = plt.subplots(figsize=(10,8))

ax.plot(theta_scan, arrs[theta_ext]-arrs[theta_ext].min())
ax.plot(theta_scan, arrs[nll_ext]-arrs[nll_ext].min())

fig;

In [None]:
mu_scan = np.linspace(0.2,1.5, 101, endpoint=True, dtype=np.float32)
par_phs = {mu_ph : mu_scan,
           theta_ph: np.zeros_like(mu_scan)}

feed_dict = {**shape_phs, **norm_phs, **par_phs, **asimov_phs}

no_nuis = {}
profiled = {}
with tf.Session() as sess:
    no_nuis[nll_ext] = sess.run(nll_ext, feed_dict=feed_dict)
    feed_dict[theta_ph] = feed_dict[theta_ph]+0.1
    for i in range(10):
        newton_step_arr = sess.run(newton_step , feed_dict=feed_dict)
        feed_dict[theta_ph] =  feed_dict[theta_ph]-newton_step_arr
    profiled[nll_ext] = sess.run(nll_ext, feed_dict=feed_dict)

fig, ax = plt.subplots(figsize=(10,8))

ax.plot(mu_scan, profiled[nll_ext]-profiled[nll_ext].min())
ax.plot(mu_scan, no_nuis[nll_ext]-no_nuis[nll_ext].min())

fig;

In [None]:

mu_scan = np.linspace(0.2,1.5, 51, endpoint=True, dtype=np.float32)
theta_scan = np.linspace(-0.2,0.2, 101, endpoint=True, dtype=np.float32)
par_phs = {mu_ph : [1],
           theta_ph: theta_scan}

feed_dict = {**shape_phs, **norm_phs, **par_phs, **asimov_phs}

nll_surface = np.empty([mu_scan.shape[0],theta_scan.shape[0]], dtype=np.float32)


with tf.Session() as sess:
    for i, mu_val in enumerate(mu_scan):
        feed_dict[mu_ph] = [mu_val]
        nll_surface[i] = sess.run(nll_ext, feed_dict=feed_dict)

    
fig, ax = plt.subplots(figsize=(10,8))

#im = ax.imshow(nll_surface)
pcm = ax.pcolor(theta_scan,mu_scan,nll_surface-nll_surface.min())
fig.colorbar(pcm, ax=ax)

fig;