In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

#PDFs in BDT and sindec?
import os

# set env flags to catch BLAS used for scipy/numpy 
# to only use 1 cpu, n_cpus will be totally controlled by csky
os.environ['MKL_NUM_THREADS'] = "1"
os.environ['NUMEXPR_NUM_THREADS'] = "1"
os.environ['OMP_NUM_THREADS'] = "1"
os.environ['OPENBLAS_NUM_THREADS'] = "1"
os.environ['VECLIB_MAXIMUM_THREADS'] = "1"

import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'w'
mpl.rcParams['savefig.facecolor'] = 'w'
import matplotlib.pyplot as plt
from matplotlib import colors, cm
import csky as cy
from csky import cext
import numpy as np
import pandas as pd
import astropy
#from icecube import astro
import histlite as hl
import healpy
import pickle
import socket
healpy.disable_warnings()
plt.rc('figure', facecolor = 'w')
plt.rc('figure', dpi=100)

## Define Settings

In [None]:
selection_version = 'version-001-p01'

host_name = socket.gethostname()

if 'cobalt' in host_name:
    print('Working on Cobalts')
    plot_dir = '/home/mhuennefeld/public_html/analyses/DNNCascade/plots/pdf_construction/asymmetric_uncertainty_contours'
    
else:
    raise ValueError('Unknown host:', host_name)

In [None]:
for dir_path in [plot_dir]:
    if not os.path.exists(dir_path):
        print('Creating directory:', dir_path)
        os.makedirs(dir_path)

## Define Toy MC

We will create a toy MC simulation on a discrete 2D surface with coordinates x and y. 
Background will be evenly distributed on this surface.
The events will have tunable uncertainties in x- and y-direction. 
For simplicity, these will be modelled by simple independent Gaussians.
Essentially this allows us to tweak the uncertainty contours from symmetric circles to axis-aligned
elongated ellipses.
We will use this simulation to understand the importance of proper coverage as well as the impact
of circularized vs proper elliptical uncertainty contours.
In order to asses the performance in a quantitative way, we will compute the sensitivy 
(90% of trials with a test-statistic value above the median background test-statistic value)
for the various settings. 
The search itself will be performed in the binned 2D space by doing the following:
 1. Convolve template with defined (estimated, i.e. not necessarily the true) uncertainty contours via a 2D convolution operation. The kernel will be the discretized Gaussian in x- and y-direction.
 2. Maximize the PS likelihood: $L(n_s) = \prod_i (\frac{n_s}{N} \cdot S_i(x_i, y_i) +  (1-\frac{n_s}{N}) \cdot B_i)$ with the signal PDF $S_i(x_i, y_i)$, defined by the convolved template and the background PDF $B_i = \frac{N_\mathrm{bins}}{N}$.
 3. Compute test-statistic $\tau = - 2 \cdot \ln \frac{L(\hat{n}_s)}{L(n_s=0)}$

In the simulation we will be able to adjust:
 - Template that we are searching for
 - True uncertainty contours: $\sigma_x$ and $\sigma_y$
 - Estimated uncertainty contours: $\hat{\sigma}_x$ and $\hat{\sigma}_y$
 - Number of bins in x-y-plane
 - Background level


#### Define simulation settings

In [None]:
cfg = {
    'bin_dims': (50, 40),
    'n_background': 0,
    'cov_true': np.array([[0.005, 0.], [0., 0.09]]),  # true uncertainty contours
    'cov_rec': np.array([[0.0583, 0.], [0., 0.0583]]),  # assumed uncertainty contours
}
cfg

#### Define helper functions for simulation

In [None]:
from scipy import signal


def multivariate_gaussian(pos, mu, cov):
    """
    Multivariate Gaussian distribution
    
    Computes the multivariate Gaussian in n-dimensional space.
    Used variables:
        N: number of samples/events
        n: number of dimensions
    
    Paramters
    ---------
    pos: array_like
        The positions in the n-dimensional space.
        Shape: [N, n]
    mu: array_like
        The center of the n-d Gaussian.
        Shape: [N, n]
    cov: array_like
        The covariance matrix.
        Shape: [N, n, n]
    
    Returns
    -------
    array_like
        The n-dimensional Gaussian evaluated at pos.
        Shape: [N] with the number of events N
    """
    n = mu.shape[0]
    cov_det = np.linalg.det(cov)
    cov_inv = np.linalg.inv(cov)
    N = np.sqrt((2*np.pi)**n * cov_det)

    # This einsum call calculates (x-mu)T.cov-1.(x-mu) in a vectorized
    # way across all the input variables.
    fac = np.einsum('...k,...kl,...l->...', pos-mu, cov_inv, pos-mu)

    return np.exp(-fac / 2) / N


def get_coordinates(cfg):
    x = np.linspace(-1, 1, cfg['bin_dims'][0])
    y = np.linspace(-1, 1, cfg['bin_dims'][1])
    
    X, Y = np.meshgrid(x, y)
    
    # transpose: I like to use convention: first index into array is for x, second for y
    X = X.T
    Y = Y.T
    X_flat = np.reshape(X, (-1))
    Y_flat = np.reshape(Y, (-1))
    
    return X, Y, X_flat, Y_flat


def idx_to_coordinates(x_idx, y_idx, cfg):
    x_mids = np.linspace(-1, 1, cfg['bin_dims'][0])
    y_mids = np.linspace(-1, 1, cfg['bin_dims'][1])
    
    x = x_mids[np.atleast_1d(x_idx)]
    y = y_mids[np.atleast_1d(y_idx)]
    return np.array((x, y)).T
    
def flat_idx_to_coordinates(flat_idx, cfg):
    x_idx, y_idx = np.unravel_index(flat_idx, shape=cfg['bin_dims'])
    return idx_to_coordinates(x_idx=x_idx, y_idx=y_idx, cfg=cfg)

def coordinates_to_idx(x, y, cfg):
    x_mids = np.linspace(-1, 1, cfg['bin_dims'][0])
    y_mids = np.linspace(-1, 1, cfg['bin_dims'][1])
    
    x_diff = np.diff(x_mids)[0]
    y_diff = np.diff(y_mids)[0]
    x_edges = np.concatenate((
        np.atleast_1d(-np.inf),
        np.atleast_1d(x_mids[0] - x_diff * 0.5), 
        x_mids + x_diff * 0.5, 
        np.atleast_1d(np.inf),
    ))
    y_edges = np.concatenate((
        np.atleast_1d(-np.inf),
        np.atleast_1d(y_mids[0] - y_diff * 0.5), 
        y_mids + y_diff * 0.5, 
        np.atleast_1d(np.inf),
    ))
    x_idx = np.searchsorted(x_edges, x) - 2
    y_idx = np.searchsorted(y_edges, y) - 2
    
    mask_inbound = np.logical_and(
        x_idx >= 0,
        x_idx < len(x_mids),
    )
    mask_inbound = np.logical_and(
        mask_inbound,
        y_idx >= 0,
    )
    mask_inbound = np.logical_and(
        mask_inbound,
        y_idx < len(y_mids),
    )
    
    # compute flattened index
    idx_flat = np.atleast_1d(np.zeros_like(x, dtype=int) - 1)
    idx_flat[mask_inbound] = np.ravel_multi_index((x_idx[mask_inbound], y_idx[mask_inbound]), dims=cfg['bin_dims'])
    
    return x_idx, y_idx, idx_flat

def get_template_from_gaussians(mu, cov, cfg):
    """Get template from multivariate Gaussian distributions
    
    Assumes x-y-plane is centered around 0 with extent -1 to 1
    in x and y.
    
    Used variables:
        N: number of samples/events
        n: number of dimensions
    
    Paramters
    ---------
    mu: array_like
        The center of the n-d Gaussian.
        Shape: [N, n]
    cov: array_like
        The covariance matrix.
        Shape: [N, n, n]
    
    Returns
    -------
    array_like
        The template PDF.
        Shape: [bin_dims_x, bin_dims_y]
    """
    X, Y, X_flat, Y_flat = get_coordinates(cfg)
    
    # Shape: [n_bins, 1, 2]
    pos = np.empty((np.product(cfg['bin_dims']), 1, 2))
    pos[:, 0, 0] = X_flat
    pos[:, 0, 1] = Y_flat
    
    Z_flat = multivariate_gaussian(pos, mu=mu, cov=cov)
    
    # shape: [n_bins_x, n_bins_y, n_models]
    Z = np.reshape(Z_flat, (*cfg['bin_dims'], len(mu)))
    
    # shape: [n_bins_x, n_bins_y]
    Z = np.sum(Z, axis=-1)
    
    # now let's normalize the PDF over the bins
    Z /= np.sum(Z)
    
    return Z


def get_smeared_template(template, cov, cfg, normalize=True):
    
    # make sure cov has correct shape
    # shape: [1, 2, 2]
    cov = np.asarray(cov)
    if len(cov.shape) == 2:
        cov = cov[np.newaxis]
    
    # compute discretized gaussian kernel
    unc_kernel = get_template_from_gaussians(
        mu=np.array([[0, 0]]), 
        cov=cov, 
        cfg=cfg,
    )
    smeared_template = signal.convolve(template, unc_kernel, mode='same')
    if normalize:
        smeared_template /= np.sum(smeared_template)
    return smeared_template


def plot_template(
            template, cfg, cb_label='Bin probability', 
            only_contours=False, plot_cbar=True, fig=None, 
            ax=None, 
            **kwargs
        ):
    if ax is None:
        fig, ax = plt.subplots()
    x = np.linspace(-1, 1, cfg['bin_dims'][0])
    y = np.linspace(-1, 1, cfg['bin_dims'][1])
    
    if only_contours:
        cf = ax.contour(x, y, np.transpose(template), **kwargs)
    else:
        cf = ax.contourf(x, y, np.transpose(template), **kwargs)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    
    if plot_cbar:
        cbar = fig.colorbar(cf)
        cbar.ax.set_ylabel(cb_label)

    return fig, ax


def plot_trial(template, cfg, **kwargs):
    return plot_template(template=template, cfg=cfg, cb_label='Number of Events', **kwargs)


def inject_events(cfg, n_sig, template, smearing='template', seed=None):
    
    if smearing not in ['template', 'event']:
        raise ValueError(smearing)
        
    rng = np.random.RandomState(seed)
    
    assert template.shape == cfg['bin_dims'], (cfg['bin_dims'], template.shape)
    
    # get background
    n_bins = np.product(cfg['bin_dims'])
    n_bkg_per_bin = cfg['n_background'] / n_bins
    bkg = rng.poisson(lam=n_bkg_per_bin, size=cfg['bin_dims'])
    
    # draw number of signal events
    n_sig = rng.poisson(lam=n_sig)
    
    # sample locations according to template
    if smearing == 'template':
        # convolve template with true uncertainty
        template_smeared = get_smeared_template(template, cov=cfg['cov_true'], cfg=cfg)
        template = template_smeared
    
    template_flat = np.reshape(template, (-1))
    sig_idx = rng.choice(np.arange(len(template_flat)), size=n_sig, p=template_flat, replace=True)
    
    # smear location of individual events according to true uncertainty
    if smearing == 'event' and len(sig_idx) > 0:
        
        # True event locations
        # Shape: [N_events, 2]
        event_pos_true = flat_idx_to_coordinates(sig_idx, cfg)
        
        # smear locations
        event_pos_reco = []
        for pos in event_pos_true:
            event_pos_reco.append(rng.multivariate_normal(mean=pos, cov=cfg['cov_true']))
        event_pos_reco = np.array(event_pos_reco)
        
        # figure out which bin idx this belongs to
        _, _, sig_idx_smeared = coordinates_to_idx(x=event_pos_reco[:, 0], y=event_pos_reco[:, 1], cfg=cfg)
        
        # only add those that are in bounds
        sig_idx_smeared = sig_idx_smeared[sig_idx_smeared >= 0]
        
        # overwrite where to inject events
        sig_idx = sig_idx_smeared
        
    sig_idx, sig_counts = np.unique(sig_idx, return_counts=True)
    
    sig_inj = np.zeros_like(bkg)
    sig_inj_flat = np.reshape(sig_inj, (-1))
    sig_inj_flat[sig_idx] += sig_counts
    sig_inj = np.reshape(sig_inj_flat, cfg['bin_dims'])
    
    trial = bkg + sig_inj
    
    return trial, bkg, sig_inj

# ---------------
# Define Template
# ---------------
# calculate points of Gauss centers along function
x = np.linspace(-0.5, 0.5, 100)
y = 0.5*(x*2+1.5)**(-1) - .5
mu = np.array([y, x]).T # flip x and y to be more elongated in y
if False:
    fix, ax = plt.subplots()
    ax.plot(y, x)
    ax.set_xlim(-1, 1)
    ax.set_ylim(-1, 1)
cov = np.diag([0.005, 0.005])[np.newaxis]
print(cov.shape)
template = get_template_from_gaussians(mu, cov, cfg)

plot_template(template, cfg)
# ---------------


In [None]:
%%timeit
inject_events(cfg, n_sig=1000, template=template)

#### Check if convolving template is equal to smearing individual events

In [None]:
evts, bkg, sig = inject_events(cfg, n_sig=100000, smearing='event', template=template)

smeared_template = get_smeared_template(template, cov=cfg['cov_true'], cfg=cfg)

levels = np.linspace(0, 0.005, 10)
fig, ax = plot_template(sig/np.sum(sig), cfg, levels=levels)
plot_template(smeared_template, cfg, only_contours=True, plot_cbar=False, fig=fig, ax=ax, levels=levels)


#### Helper Functions 

In [None]:
from itertools import cycle

def get_ls_cycler(ls_list=['-', '--', ':', '-.']):
    return cycle(ls_list)

def get_color_cycler(colors=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']):
    return cycle(colors)
