In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

#PDFs in BDT and sindec?
import os

# set env flags to catch BLAS used for scipy/numpy 
# to only use 1 cpu, n_cpus will be totally controlled by csky
if False:
    os.environ['MKL_NUM_THREADS'] = "1"
    os.environ['NUMEXPR_NUM_THREADS'] = "1"
    os.environ['OMP_NUM_THREADS'] = "1"
    os.environ['OPENBLAS_NUM_THREADS'] = "1"
    os.environ['VECLIB_MAXIMUM_THREADS'] = "1"

import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'w'
mpl.rcParams['savefig.facecolor'] = 'w'
import matplotlib.pyplot as plt
from matplotlib import colors, cm
import csky as cy
from csky import cext
import numpy as np
import astropy
from icecube import astro
import histlite as hl
import healpy
import healpy as hp
import socket
import pickle
from scipy import stats
import copy
healpy.disable_warnings()
plt.rc('figure', facecolor = 'w')
plt.rc('figure', dpi=100)

## Define Settings

In [None]:
selection_version = 'version-001-p00'

host_name = socket.gethostname()

if 'cobalt' in host_name:
    print('Working on Cobalts')
    #data_prefix = '/data/user/ssclafani/data/cscd/final'
    ana_dir = '/data/user/ssclafani/data/analyses/'
    plot_dir = cy.utils.ensure_dir('/data/user/ssclafani/data/analyses/DNNCascadeCodeReview/unblinding_checks/plots/unblinding/galactic_plane_checks')
    
else:
    raise ValueError('Unknown host:', host_name)

In [None]:
for dir_path in [plot_dir]:
    if not os.path.exists(dir_path):
        print('Creating directory:', dir_path)
        os.makedirs(dir_path)

## Load Data

In [None]:
repo = cy.selections.Repository(local_root='/data/user/ssclafani/data/analyses')
specs = cy.selections.DNNCascadeDataSpecs.DNNC_10yr

In [None]:
%%time

ana = cy.get_analysis(
    repo, selection_version, specs, 
    #gammas=np.r_[0.1:6.01:0.125],
)

In [None]:
a = ana.anas[0]
a.sig

In [None]:
a.bg_data

## Helpers

In [None]:
from cycler import cycle
from copy import deepcopy

soft_colors = cy.plotting.soft_colors
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


def get_bias_allt(tr, ntrials=200, n_sigs=np.r_[:101:10], quiet=False):
    trials = [
        (None if quiet else print(f'\r{n_sig:4d} ...', end='', flush=True))
        or
        tr.get_many_fits(ntrials, n_sig=n_sig, logging=False, seed=n_sig)
        for n_sig in n_sigs]
    if not quiet:
        print()
    for (n_sig, t) in zip(n_sigs, trials):
        t['ntrue'] = np.repeat(n_sig, len(t))
    allt = cy.utils.Arrays.concatenate(trials)
    return allt

def get_color_cycler():
    return cycle(colors)

def plot_ns_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)

    h = hl.hist((allt.ntrue, allt.ns), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(ax.set_ylim(lim))
    ax.plot(lim, lim, **expect_kw)
    ax.set_aspect('equal')

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$n_s$')
    ax.grid()

def plot_gamma_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)
    expect_gamma = tr.sig_injs[0].flux[0].gamma

    h = hl.hist((allt.ntrue, allt.gamma), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(lim)
    ax.set_ylim(1, 4)
    ax.axhline(expect_gamma, **expect_kw)

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$\gamma$')
    ax.grid()

def plot_bkg_trials(
            bg, fig=None, ax=None, 
            label='{} bg trials', 
            label_fit=r'$\chi^2[{:.2f}\mathrm{{dof}},\ \eta={:.3f}]$', 
            color=colors[0],
            density=False,
            bins=50,
        ):
    if ax is None:
        fig, ax = plt.subplots(figsize=(6, 4))
    
    if density:
        h = bg.get_hist(bins=bins).normalize()
    else:
        h = bg.get_hist(bins=bins)
    if label is not None:
        label = label.format(bg.n_total)
    hl.plot1d(ax, h, crosses=True, color=color, label=label)

    # compare with the chi2 fit:
    if hasattr(bg, 'pdf'):
        x = h.centers[0]
        norm = h.integrate().values
        if label_fit is not None:
            label_fit = label_fit.format(bg.ndof, bg.eta)
        if density:
            ax.semilogy(x, bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)
        else:
            ax.semilogy(x, norm * bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)

    ax.set_xlabel(r'TS')
    if density:
        ax.set_ylabel(r'Density')
    else:
        ax.set_ylabel(r'number of trials')
    ax.legend()
        
    return fig, ax

## Setup Analysis

In [None]:
import sys
sys.path.insert(0, '../..')

import config as cg

cg.base_dir = '/data/user/mhuennefeld/data/analyses/unblinding_v1.0.0/'

In [None]:
def get_gp_tr(template_str, cutoff=np.inf, gamma=None, cpus=20):
    cutoff_GeV = cutoff * 1e3
    gp_conf = cg.get_gp_conf(
        template_str=template_str, gamma=gamma, 
        cutoff_GeV=cutoff_GeV, base_dir=cg.base_dir)
    tr = cy.get_trial_runner(gp_conf, ana=ana, mp_cpus=cpus)
    return tr

def get_template_tr(template, gamma=2.7, cutoff_tev=np.inf, cpus=20):
    cutoff_gev = cutoff_tev * 1000.
    gp_conf = {
        'template': template,
        'flux': cy.hyp.PowerLawFlux(gamma, energy_cutoff=cutoff_gev),
        'randomize': ['ra'],
        'fitter_args': dict(gamma=gamma),
        'sigsub': True,
        'update_bg': True,
        'fast_weight': False,
    }
    tr = cy.get_trial_runner(gp_conf, ana=ana, mp_cpus=cpus)
    return tr


#### Get TrialRunners

In [None]:
tr_dict = {
    'pi0': get_gp_tr('pi0'),
    'kra5': get_gp_tr('kra5'),
    'kra50': get_gp_tr('kra50'),
}

#### Get bkg fits for each template

In [None]:
bkg_file_dict = {
    'pi0': '{}/gp/trials/{}/{}/trials.dict'.format(cg.base_dir, 'DNNC', 'pi0'),
    'kra5': '{}/gp/trials/{}/{}/trials.dict'.format(cg.base_dir, 'DNNC', 'kra5'),
    'kra50': '{}/gp/trials/{}/{}/trials.dict'.format(cg.base_dir, 'DNNC', 'kra50'),
}
n_bkg_trials = 20000
seed = 1337

bkg_dict = {}
for key, tr in tr_dict.items():
    if key in bkg_file_dict:
        print('Loading background trials for template {}'.format(key))
        sig = np.load(bkg_file_dict[key], allow_pickle=True)
        bkg_dict[key] = sig['poisson']['nsig'][0.0]['ts']
    
    else:
        print('Running background trials for template {}'.format(key))
        bkg_dict[key] = tr.get_many_fits(
            n_trials=n_bkg_trials, seed=seed, mp_cpus=20)


#### Get Results for each template

In [None]:
res_dict = {}
for key in tr_dict.keys():
    f_path = os.path.join(
        cg.base_dir, 
        'gp/results/{}/{}_unblinded.npy'.format(key, key), 
    )
    res_dict[key] = np.load(f_path)

In [None]:
tr_dict

In [None]:
#Get Trial
trial = tr_dict['pi0'].get_one_trial(TRUTH=True)
L = tr_dict['pi0'].get_one_llh_from_trial(trial)
L.fit(**tr_dict['pi0'].fitter_args)

In [None]:
tr = tr_dict['pi0']

In [None]:
tr.get_one_fit(TRUTH=True)

In [None]:
print('From the TrialRunner:')
print(cy.inspect.get_llh_model(tr, -1))
print(cy.inspect.get_pdf_ratio_model(tr, -1))
print(cy.inspect.get_space_model(tr, -1))
print(cy.inspect.get_energy_model(tr, -1))

In [None]:
space_eval = cy.inspect.get_space_eval(L, -1, 0) # 0: background events (1 would be for signal events)
energy_eval = cy.inspect.get_energy_eval(L, -1, 0)
print(space_eval)
print(energy_eval)

In [None]:
StoB_space = space_eval(gamma=2.7)[1] 
SoB_energy = energy_eval(gamma=2.7)[0]

In [None]:
SoB_space_nosigsub = space_eval(gamma=2.7)[0] 

In [None]:
w_noss = SoB_space_nosigsub - StoB_space * SoB_energy

In [None]:
w = (SoB_space_nosigsub - SoB_space) * SoB_energy


In [None]:
plt.hist(w, bins=50);
plt.xlabel('S/B space * S/B energy')
max(w)
plt.semilogy()

In [None]:
plt.scatter(SoB_energy, SoB_space_nosigsub, marker = 'x')
plt.xlabel('SoB Energy')
plt.ylabel('SoB Space no sigsub')

In [None]:
ws = np.argsort(w)

In [None]:
ws[-10:][::-1]

In [None]:
ras = [] 
decs = []
sigmas = []
es = []
for a in ws[-10:][::-1]:
    mask = trial.evss[0][0].idx == a
    dec = trial.evss[0][0][mask].dec[0]
    ra = trial.evss[0][0][mask].ra[0]
    e = trial.evss[0][0][mask].energy[0]
    ras.append(ra)
    decs.append(dec)
    es.append(e)
    sigma = trial.evss[0][0][mask].sigma[0]    
    sigmas.append(sigma)
    l, b = astro.equa_to_gal(float(ra), float(dec))
    print('ID: {} RA: {:.2f} DEC: {:.2f} E:{:.1f} SIGMA: {:2f}'.format(a, np.degrees(ra), np.degrees(dec),
                                                            e , np.degrees(sigma)))
    plt.scatter(np.degrees(l), np.degrees(b), s=np.pi*np.degrees(sigma)**2)
plt.xlabel('galactic (l)')
plt.axhline(0)
plt.ylabel('galactic (b)')

In [None]:
plt.scatter(np.degrees(ras), np.degrees(decs))
plt.xlim(360, 0)
plt.xlabel('RA')
plt.ylim(-90,0)
plt.ylabel('DEC')

In [None]:
trial.evss
trial2 = deepcopy(trial)

In [None]:
trial2

In [None]:
top = ws[-1:]

In [None]:
ev = trial.evss[0][0][trial.evss[0][0].idx == top]
print(ev.as_dataframe)

In [None]:
mask = (np.isin(trial.evss[0][0].idx, top))
missing_ev = trial2.evss[0][0][~mask]
trial2.evss[0][0] = missing_ev

In [None]:
trial2

In [None]:
L = tr_dict['pi0'].get_one_llh_from_trial(trial2)
L.fit(**tr_dict['pi0'].fitter_args)

In [None]:
ana[0].data = ana[0].data[~mask]

In [None]:
tr = get_gp_tr('pi0'),


In [None]:
new_ts, new_ns = tr[0].get_one_fit(TRUTH=True)

In [None]:
print(new_ts)

In [None]:
pval = np.mean(bkg_dict['pi0'] > new_ts)
print(stats.norm.isf(pval))

In [None]:
for i in range (11):
    trial2 = deepcopy(trial)
    if i == 0:
        mask = (np.isin(trial.evss[0][0].idx, 123412341234))
    else:
        top = ws[-i:]
        mask = (np.isin(trial.evss[0][0].idx, top))
    
    missing_ev = trial2.evss[0][0][~mask]
    trial2.evss[0][0] = missing_ev
    L = tr_dict['pi0'].get_one_llh_from_trial(trial2)
    new_ts, new_ns, gamma = L.fit(**tr_dict['pi0'].fitter_args)
    print('Removing Top {} Events: TS= {:.2f} ns={:.2f}'.format(i, new_ts, new_ns['ns']))
    
    pval = np.mean(bkg_dict['pi0'] > new_ts)
    print('New pvalue: {:.2f}'.format(stats.norm.isf(pval)))
    print('-------------')