In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

#PDFs in BDT and sindec?
import os

# set env flags to catch BLAS used for scipy/numpy 
# to only use 1 cpu, n_cpus will be totally controlled by csky
if False:
    os.environ['MKL_NUM_THREADS'] = "1"
    os.environ['NUMEXPR_NUM_THREADS'] = "1"
    os.environ['OMP_NUM_THREADS'] = "1"
    os.environ['OPENBLAS_NUM_THREADS'] = "1"
    os.environ['VECLIB_MAXIMUM_THREADS'] = "1"

import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'w'
mpl.rcParams['savefig.facecolor'] = 'w'
import matplotlib.pyplot as plt
from matplotlib import colors, cm
import csky as cy
from csky import cext
import numpy as np
import astropy
from tqdm.notebook import tqdm_notebook as tqdm
#from icecube import astro
import histlite as hl
import healpy
import healpy as hp
import socket
import pickle
import copy
healpy.disable_warnings()
plt.rc('figure', facecolor = 'w')
plt.rc('figure', dpi=100)

## Define Settings

In [None]:
host_name = socket.gethostname()

if 'cobalt' in host_name:
    print('Working on Cobalts')
    data_prefix = '/data/user/ssclafani/data/cscd/final'
    ana_dir = '/data/user/ssclafani/data/analyses/'
    plot_dir = '/data/user/mhuennefeld/data/analyses/DNNCascadeCodeReview/unblinding_checks/plots/unblinding/track_gp_sensitivity'
    
else:
    raise ValueError('Unknown host:', host_name)

In [None]:
for dir_path in [plot_dir]:
    if not os.path.exists(dir_path):
        print('Creating directory:', dir_path)
        os.makedirs(dir_path)

## Load Data

In [None]:
selection_version = 'version-001-p01'
repo = cy.selections.Repository()
specs = cy.selections.DNNCascadeDataSpecs.DNNC_10yr

In [None]:
%%time

ana = cy.get_analysis(repo, selection_version, specs)

In [None]:
a = ana.anas[0]
a.sig

In [None]:
a.bg_data

#### Load PSTracksV4

In [None]:
%%time

track_version = 'version-004-p00'
ana_tracks = cy.get_analysis(
    cy.selections.Repository(), track_version, cy.selections.PSDataSpecs.ps_v4, 
)

## Helpers

In [None]:
from cycler import cycle
from copy import deepcopy

soft_colors = cy.plotting.soft_colors
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


def get_bias_allt(tr, ntrials=200, n_sigs=np.r_[:101:10], quiet=False):
    trials = [
        (None if quiet else print(f'\r{n_sig:4d} ...', end='', flush=True))
        or
        tr.get_many_fits(ntrials, n_sig=n_sig, logging=False, seed=n_sig)
        for n_sig in n_sigs]
    if not quiet:
        print()
    for (n_sig, t) in zip(n_sigs, trials):
        t['ntrue'] = np.repeat(n_sig, len(t))
    allt = cy.utils.Arrays.concatenate(trials)
    return allt

def get_color_cycler(colors=colors):
    return cycle(colors)

def plot_ns_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)

    h = hl.hist((allt.ntrue, allt.ns), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(ax.set_ylim(lim))
    ax.plot(lim, lim, **expect_kw)
    ax.set_aspect('equal')

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$n_s$')
    ax.grid()

def plot_gamma_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)
    expect_gamma = tr.sig_injs[0].flux[0].gamma

    h = hl.hist((allt.ntrue, allt.gamma), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(lim)
    ax.set_ylim(1, 4)
    ax.axhline(expect_gamma, **expect_kw)

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$\gamma$')
    ax.grid()

def plot_bkg_trials(
            bg, fig=None, ax=None, 
            label='{} bg trials', 
            label_fit=r'$\chi^2[{:.2f}\mathrm{{dof}},\ \eta={:.3f}]$', 
            color=colors[0],
            density=False,
            bins=50,
        ):
    if ax is None:
        fig, ax = plt.subplots(figsize=(6, 4))
    
    if density:
        h = bg.get_hist(bins=bins).normalize()
    else:
        h = bg.get_hist(bins=bins)
    if label is not None:
        label = label.format(bg.n_total)
    hl.plot1d(ax, h, crosses=True, color=color, label=label)

    # compare with the chi2 fit:
    if hasattr(bg, 'pdf'):
        x = h.centers[0]
        norm = h.integrate().values
        if label_fit is not None:
            label_fit = label_fit.format(bg.ndof, bg.eta)
        if density:
            ax.semilogy(x, bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)
        else:
            ax.semilogy(x, norm * bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)

    ax.set_xlabel(r'TS')
    if density:
        ax.set_ylabel(r'Density')
    else:
        ax.set_ylabel(r'number of trials')
    ax.legend()
        
    return fig, ax

## Setup Analysis

In [None]:
import sys
sys.path.insert(0, '../..')

import config as cg

cg.base_dir = '/data/user/mhuennefeld/data/analyses/unblinding_v1.0.1/'

In [None]:
def get_tr(sindec, gamma, extension=0., cutoff=np.inf, cpus=20, sigsub=True):
    src = cy.utils.sources(0, np.arcsin(sindec), extension=extension, deg=False)
    cutoff_GeV = cutoff * 1e3
    conf = cg.get_ps_conf(
        src=src, gamma=gamma, cutoff_GeV=cutoff_GeV, sigsub=sigsub)

    tr = cy.get_trial_runner(ana=ana, conf=conf, mp_cpus=cpus)
    return tr, src

def get_tr_tracks(sindec, gamma, extension=0., cutoff=np.inf, cpus=20, sigsub=False, ana=ana):
    src = cy.utils.sources(0, np.arcsin(sindec), extension=extension, deg=False)
    cutoff_GeV = cutoff * 1e3
    conf = {
        'src': src,
        'flux': cy.hyp.PowerLawFlux(gamma, energy_cutoff=cutoff_GeV),
        'update_bg': True,
        'sigsub':  sigsub,
        'randomize': ['ra'],
    }
    
    tr = cy.get_trial_runner(ana=ana, conf=conf, mp_cpus=cpus)
    return tr, src

def get_gp_tr(template_str, cutoff=np.inf, gamma=None, cpus=20):
    cutoff_GeV = cutoff * 1e3
    gp_conf = cg.get_gp_conf(
        template_str=template_str, gamma=gamma, 
        cutoff_GeV=cutoff_GeV, base_dir=cg.base_dir)
    tr = cy.get_trial_runner(gp_conf, ana=ana, mp_cpus=cpus)
    return tr

def get_gp_tr_tracks(template_str, cutoff=np.inf, gamma=None, cpus=20):
    cutoff_GeV = cutoff * 1e3
    gp_conf = cg.get_gp_conf(
        template_str=template_str, gamma=gamma, 
        cutoff_GeV=cutoff_GeV, base_dir=cg.base_dir)
    gp_conf.pop('dir')
    tr = cy.get_trial_runner(gp_conf, ana=ana_tracks, mp_cpus=cpus)
    return tr

def get_template_tr(template, gamma=2.7, cutoff_tev=np.inf, ana=ana, cpus=20):
    cutoff_gev = cutoff_tev * 1000.
    gp_conf = {
        'template': template,
        'flux': cy.hyp.PowerLawFlux(gamma, energy_cutoff=cutoff_gev),
        'randomize': ['ra'],
        'fitter_args': dict(gamma=gamma),
        'sigsub': True,
        'update_bg': True,
        'fast_weight': False,
    }
    tr = cy.get_trial_runner(gp_conf, ana=ana, mp_cpus=cpus)
    return tr

#### Get TrialRunners

In [None]:
tr_dict = {
    'pi0': get_gp_tr('pi0'),
    #'kra5': get_gp_tr('kra5'),
    #'kra50': get_gp_tr('kra50'),
    'pi0_ps_v4': get_gp_tr_tracks('pi0'),
}

#### Get results if they exist

In [None]:
res_dict = {}
for key in tr_dict.keys():
    f_path = os.path.join(
        cg.base_dir, 
        'gp/results/{}/{}_unblinded.npy'.format(key, key), 
    )
    if os.path.exists(f_path):
        res_dict[key] = np.load(f_path)

#### Get bkg fits for each trial runner

In [None]:
bkg_file_dict = {
    'fermibubbles_50TeV': '{}/gp/trials/{}/{}/trials.dict'.format(cg.base_dir, 'DNNC', 'fermibubbles'),
    'pi0': '{}/gp/trials/{}/{}/trials.dict'.format(cg.base_dir, 'DNNC', 'pi0'),
    'kra5': '{}/gp/trials/{}/{}/trials.dict'.format(cg.base_dir, 'DNNC', 'kra5'),
    'kra50': '{}/gp/trials/{}/{}/trials.dict'.format(cg.base_dir, 'DNNC', 'kra50'),
    'snr': '{}/stacking/{}_bg.dict'.format(cg.base_dir, 'snr'),
    'pwn': '{}/stacking/{}_bg.dict'.format(cg.base_dir, 'pwn'),
    'unid': '{}/stacking/{}_bg.dict'.format(cg.base_dir, 'unid'),
    'pi0_ps_v4': os.path.join(plot_dir, 'trials_pi0_ps_v4.pkl'),
}
n_bkg_trials = 30000
seed = 1337

bkg_dict = {}
for key, tr in tr_dict.items():
    if key in bkg_file_dict and os.path.exists(bkg_file_dict[key]):
        print('Loading background trials for template {}'.format(key))
        sig = np.load(bkg_file_dict[key], allow_pickle=True)
        if key in ['pi0', 'kra5', 'kra50']:
            bkg_dict[key] = sig['poisson']['nsig'][0.0]['ts']
        elif key in ['fermibubbles_50TeV']:
            cutoff = float(key.replace('fermibubbles_', '').replace('TeV', ''))
            print(key, 'cutoff', cutoff)
            bkg_dict[key] = sig['poisson']['cutoff'][cutoff]['nsig'][0.0]['ts']
        else:
            bkg_dict[key] = sig.ts
    
    else:
        print('Running background trials for model {}'.format(key))
        trials = tr.get_many_fits(
            n_trials=n_bkg_trials, seed=seed, mp_cpus=25)
        
        bkg_dict[key] = trials.ts
        
        out_file = os.path.join(plot_dir, 'trials_{}.pkl'.format(key))
        with open(out_file, 'wb') as f:
            pickle.dump(trials, f, protocol=2)
        

#### Plot ts distribution

In [None]:
for key, bg in bkg_dict.items():
    if len(bg) < 50000:
        bg_tsd = cy.dists.Chi2TSD(bg)
    else:
        bg_tsd = cy.dists.TSD(bg)
    fig, ax = plot_bkg_trials(bg_tsd)
    ts_3sig = bg_tsd.isf_nsigma(3)
    ax.axvline(
        ts_3sig, ls='--', lw=1,
        label='3-sigma TS: {:3.3f}'.format(ts_3sig), 
    )
    ax.set_title('Trial Runner: {}'.format(key))
    ax.set_yscale('log')
    ax.legend()
    fig.savefig('{}/ts_dist_{}.png'.format(plot_dir, key))

#### Define number of ns

In [None]:
ns_dict = {
    'pi0': 678, #748.11, # bias corrected to 678
    'kra5': 273.24,
    'kra50': 208.95,
    'snr': 218.45,
    'pwn': 279.61,
    'unid': 237.90,
    'combined_stacking': 735., # 735 if results were independent
    'fermibubbles_50TeV': 95.69,
    'pi0_ps_v4': tr_dict['pi0_ps_v4'].to_ns(E2dNdE=2.18e-11, E0=100, unit=1e3), # bias corrected to 678
}
ns_dict

#### Run trials for injection strength

In [None]:
n_sig_trials = 1000

p_values_dict = {}
n_sigma_dict = {}
sig_trials_dict = {}

for key, tr in tr_dict.items():
    
    # get background distribution
    bg = bkg_dict[key]
    if len(bg) < 50000:
        bg_tsd = cy.dists.Chi2TSD(bg)
    else:
        bg_tsd = cy.dists.TSD(bg)
        
    # get signal trials
    print('Performing signal trials for {} with ns {}'.format(key, ns_dict[key]))
    sig_trials = tr.get_many_fits(n_trials=n_sig_trials, cpus=25, n_sig=ns_dict[key], poisson=True)
    
    # compute significance
    p_values = bg_tsd.sf(sig_trials.ts)
    n_sigma = bg_tsd.sf_nsigma(sig_trials.ts)
    
    p_values_dict[key] = p_values
    n_sigma_dict[key] = n_sigma
    sig_trials_dict[key] = sig_trials
    

#### Plot expected significances

In [None]:
bins = np.linspace(-1, 6, 50)
fig, ax = plt.subplots(figsize=(9, 6))
color_cycle = get_color_cycler(colors=soft_colors)

for key, n_sigmas in n_sigma_dict.items():
    color = next(color_cycle)
    ax.hist(n_sigmas, bins=bins, histtype='step', label=key, color=color)
    median_sigma = np.median(n_sigmas)
    ax.axvline(median_sigma, ls='-.', color=color, label='Median: {:3.2f}$\cdot \sigma$'.format(median_sigma))
    
ax.legend()
ax.set_xlabel('$n \cdot \sigma$')
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'comparison_significance.png'))

#### Compute Sensitivity

In [None]:
sens_dict = {k: {} for k in tr_dict.keys()}

for key, tr  in tr_dict.items():
    if key == 'pi0': continue
    # get background distribution
    bg = bkg_dict[key]
    if len(bg) < 50000:
        bg = cy.dists.Chi2TSD(bg)
    else:
        bg = cy.dists.TSD(bg)
    
    if True:
        ts = bg.median()
        beta = 0.9
    else:
        ts = bg.isf_nsigma(3)
        beta = 0.5
    
    print('Computing Sensitivity for {}'.format(key))
    sens = tr.find_n_sig(
            # ts, threshold
            ts=ts,
            # beta, fraction of trials which should exceed the threshold
            beta=beta,
            # n_inj step size for initial scan
            n_sig_step=50,
            # this many trials at a time
            batch_size=100,
            # tolerance, as estimated relative error
            tol=.05,
            first_batch_size = 100,
            mp_cpus=30,
            seed=seed
        )
    sens['flux'] = tr.to_E2dNdE(sens['n_sig'], E0=100, unit=1e3)
    print('flux:', sens['flux'])
    sens_dict[key] = sens

In [None]:
for key, sens in sens_dict.items():
    print('  {}: {:3.3e}'.format(
        key, sens['flux'],
    ))

In [None]:
n_events = 0
for a_i in ana_tracks.anas:
    n_events += len(a_i.bg_data)
n_events