In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

#PDFs in BDT and sindec?
import os

# set env flags to catch BLAS used for scipy/numpy 
# to only use 1 cpu, n_cpus will be totally controlled by csky
if False:
    os.environ['MKL_NUM_THREADS'] = "1"
    os.environ['NUMEXPR_NUM_THREADS'] = "1"
    os.environ['OMP_NUM_THREADS'] = "1"
    os.environ['OPENBLAS_NUM_THREADS'] = "1"
    os.environ['VECLIB_MAXIMUM_THREADS'] = "1"

import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'w'
mpl.rcParams['savefig.facecolor'] = 'w'
import matplotlib.pyplot as plt
from matplotlib import colors, cm
import csky as cy
from csky import cext
import numpy as np
from tqdm.notebook import tqdm_notebook as tqdm
import astropy
#from icecube import astro
import histlite as hl
import healpy
import healpy as hp
import socket
import pickle
import copy
healpy.disable_warnings()
plt.rc('figure', facecolor = 'w')
plt.rc('figure', dpi=100)

## Define Settings

In [None]:
selection_version = 'version-001-p01'

host_name = socket.gethostname()

if 'cobalt' in host_name:
    print('Working on Cobalts')
    data_prefix = '/data/user/ssclafani/data/cscd/final'
    ana_dir = '/data/user/ssclafani/data/analyses/'
    plot_dir = '/data/user/mhuennefeld/data/analyses/DNNCascadeCodeReview/unblinding_checks/plots/unblinding/pi0_cutoff_sensitivity'
    
else:
    raise ValueError('Unknown host:', host_name)

In [None]:
for dir_path in [plot_dir]:
    if not os.path.exists(dir_path):
        print('Creating directory:', dir_path)
        os.makedirs(dir_path)

## Load Data

In [None]:
repo = cy.selections.Repository()
specs = cy.selections.DNNCascadeDataSpecs.DNNC_10yr

In [None]:
%%time

ana = cy.get_analysis(
    repo, selection_version, specs, 
    #gammas=np.r_[0.1:6.01:0.125],
)

In [None]:
a = ana.anas[0]
a.sig

In [None]:
a.bg_data

## Helpers

In [None]:
from cycler import cycle
from copy import deepcopy

soft_colors = cy.plotting.soft_colors
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


def get_bias_allt(tr, ntrials=200, n_sigs=np.r_[:101:10], quiet=False):
    trials = [
        (None if quiet else print(f'\r{n_sig:4d} ...', end='', flush=True))
        or
        tr.get_many_fits(ntrials, n_sig=n_sig, logging=False, seed=n_sig)
        for n_sig in n_sigs]
    if not quiet:
        print()
    for (n_sig, t) in zip(n_sigs, trials):
        t['ntrue'] = np.repeat(n_sig, len(t))
    allt = cy.utils.Arrays.concatenate(trials)
    return allt

def get_color_cycler():
    return cycle(colors)

def plot_ns_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)

    h = hl.hist((allt.ntrue, allt.ns), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(ax.set_ylim(lim))
    ax.plot(lim, lim, **expect_kw)
    ax.set_aspect('equal')

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$n_s$')
    ax.grid()

def plot_gamma_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)
    expect_gamma = tr.sig_injs[0].flux[0].gamma

    h = hl.hist((allt.ntrue, allt.gamma), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(lim)
    ax.set_ylim(1, 4)
    ax.axhline(expect_gamma, **expect_kw)

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$\gamma$')
    ax.grid()

def plot_bkg_trials(
            bg, fig=None, ax=None, 
            label='{} bg trials', 
            label_fit=r'$\chi^2[{:.2f}\mathrm{{dof}},\ \eta={:.3f}]$', 
            color=colors[0],
            density=False,
            bins=50,
        ):
    if ax is None:
        fig, ax = plt.subplots(figsize=(6, 4))
    
    if density:
        h = bg.get_hist(bins=bins).normalize()
    else:
        h = bg.get_hist(bins=bins)
    if label is not None:
        label = label.format(bg.n_total)
    hl.plot1d(ax, h, crosses=True, color=color, label=label)

    # compare with the chi2 fit:
    if hasattr(bg, 'pdf'):
        x = h.centers[0]
        norm = h.integrate().values
        if label_fit is not None:
            label_fit = label_fit.format(bg.ndof, bg.eta)
        if density:
            ax.semilogy(x, bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)
        else:
            ax.semilogy(x, norm * bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)

    ax.set_xlabel(r'TS')
    if density:
        ax.set_ylabel(r'Density')
    else:
        ax.set_ylabel(r'number of trials')
    ax.legend()
        
    return fig, ax

## Setup Analysis

In [None]:
import sys
sys.path.insert(0, '../..')

import config as cg

cg.base_dir = '/data/user/mhuennefeld/data/analyses/unblinding_v1.0.1/'

In [None]:
def get_gp_tr(template_str, cutoff=np.inf, gamma=None, cpus=20):
    cutoff_GeV = cutoff * 1e3
    gp_conf = cg.get_gp_conf(
        template_str=template_str, gamma=gamma, cutoff_GeV=cutoff_GeV, base_dir=cg.base_dir)
    print(gp_conf.pop('dir'))
    tr = cy.get_trial_runner(gp_conf, ana=ana, mp_cpus=cpus)
    return tr

def get_pi0_tr(gamma, cutoff, cpus=20):
    cutoff_GeV = cutoff * 1e3
    template = cg.template_repo.get_template('Fermi-LAT_pi0_map')
    gp_conf = {
        'template': template,
        'flux': cy.hyp.PowerLawFlux(gamma, energy_cutoff=cutoff_GeV),
        'randomize': ['ra'],
        'fitter_args': dict(gamma=gamma),
        'sigsub': True,
        'update_bg': True,
        'fast_weight': False,
    }
    tr = cy.get_trial_runner(gp_conf, ana=ana, mp_cpus=cpus)
    return tr


#### Get TrialRunners

In [None]:
gamma_grid = [2.7, 3.0, 3.3]
cutoff_grid = [1, 10, 100, np.inf]

tr_dict = {}
for gamma in tqdm(gamma_grid, total=len(gamma_grid)):
    for cutoff in cutoff_grid:
        tr_dict[(gamma, cutoff)] = get_pi0_tr(gamma=gamma, cutoff=cutoff)



#### Get trials for each template

In [None]:
n_trials = 50
n_sig = 700
seed = 42
trials_dict = {}

for key, tr in tr_dict.items():
    
    trials = []
    
    print('Injecting {} signal events for template {}'.format(n_sig, key))
    for i in tqdm(range(n_trials), total=n_trials):
        trials.append(tr.get_one_trial(n_sig=n_sig, poisson=True, seed=seed + i))
    
    trials_dict[key] = trials
    

#### Get fits for each template combination

In [None]:
ts_dict = {}
for key_inj, tr_inj in tr_dict.items():
    
    for key, tr in tr_dict.items():
        print('Computing TS values for injection {} and testing with {}'.format(key_inj, key))

        ts_values = []
        n_values = len(trials_dict[key_inj])
        for trial in tqdm(trials_dict[key_inj], total=n_values):
            ts_values.append(tr.get_one_fit_from_trial(trial)) 
        
        ts_values = np.array(ts_values)
        ts_dict[(key_inj, key)] = cy.utils.Arrays({
          'ts': ts_values[:, 0],  
          'ns': ts_values[:, 1],  
        })



#### Find best fit based on TS

In [None]:
import pandas as pd

gamma_inj = []
cutoff_inj = []
gamma_fit = []
cutoff_fit = []

for key_inj in tr_dict.keys():
    for j in range(n_trials):
        max_ts = -np.inf
        max_key = None
        for key in tr_dict.keys():
            ts_values = ts_dict[(key_inj, key)]
            if ts_values.ts[j] > max_ts:
                max_ts = ts_values.ts[j]
                max_key = key
        gamma_inj.append(key_inj[0])
        cutoff_inj.append(key_inj[1])
        gamma_fit.append(max_key[0])
        cutoff_fit.append(max_key[1])

df_best_ts = pd.DataFrame({
    'gamma_inj': gamma_inj,
    'cutoff_inj': cutoff_inj,
    'gamma_fit': gamma_fit,
    'cutoff_fit': cutoff_fit,
})
df_best_ts

In [None]:
fig, ax = plt.subplots()
ax.hist2d(df_best_ts['gamma_inj'], df_best_ts['gamma_fit'], bins=3)

fig, ax = plt.subplots()
log_c_inj = np.log10(df_best_ts['cutoff_inj'])
log_c_fit = np.log10(df_best_ts['cutoff_fit'])
log_c_inj[~np.isfinite(log_c_inj)] = 4
log_c_fit[~np.isfinite(log_c_fit)] = 4
ax.hist2d(log_c_inj, log_c_fit, bins=4)

#### Get bkg fits for each template

In [None]:
n_bkg_trials = 10000
seed = 1337

bkg_dict = {}
for key, tr in tr_dict.items():
    print('Running background trials for template {}'.format(key))
    bkg_dict[key] = tr.get_many_fits(
        n_trials=n_bkg_trials, seed=seed, mp_cpus=20)
        

#### Plot ts distribution

In [None]:
for key, bg in bkg_dict.items():
    bg_tsd = cy.dists.TSD(bg)
    fig, ax = plot_bkg_trials(bg_tsd)
    ts = res_dict[key][0]
    ns = res_dict[key][1]
    ax.axvline(
        ts, color='0.8', ls='--', lw=2,
        label='TS: {:3.3f} | ns: {:3.1f}'.format(ts, ns), 
    )
    ts_5sig = bg_tsd.isf_nsigma(5)
    ax.axvline(
        ts_5sig, ls='--', lw=1,
        label='5-sigma TS: {:3.3f}'.format(ts_5sig), 
    )
    ax.set_title('Analysis: {}'.format(key))
    ax.set_yscale('log')
    ax.legend()
    fig.savefig('{}/ts_dist_{}.png'.format(plot_dir, key))

#### Compute Significance

In [None]:
p_val_dict = {}
sigma_dict = {}
for key, ts_values in ts_dict.items():
    bg = cy.dists.TSD(bkg_dict[key[1]])
    max_bg_ts = np.max(bg.values)
    mask_above = ts_values.ts > max_bg_ts
    ts = np.array(ts_values.ts)
    if np.sum(mask_above) > 0:
        print('Setting {} ts values to max bkg ts value of {}.'.format(
            np.sum(mask_above), max_bg_ts))
        ts[mask_above] = max_bg_ts
    p_val_dict[key] = bg.sf(ts)
    sigma_dict[key] = bg.sf_nsigma(ts)
