In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

#PDFs in BDT and sindec?
import os

# set env flags to catch BLAS used for scipy/numpy 
# to only use 1 cpu, n_cpus will be totally controlled by csky
if False:
    os.environ['MKL_NUM_THREADS'] = "1"
    os.environ['NUMEXPR_NUM_THREADS'] = "1"
    os.environ['OMP_NUM_THREADS'] = "1"
    os.environ['OPENBLAS_NUM_THREADS'] = "1"
    os.environ['VECLIB_MAXIMUM_THREADS'] = "1"

import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'w'
mpl.rcParams['savefig.facecolor'] = 'w'
import matplotlib.pyplot as plt
from matplotlib import colors, cm
import csky as cy
from csky import cext
import numpy as np
import pandas as pd
import astropy
#from icecube import astro
import histlite as hl
import healpy
import healpy as hp
import socket
import pickle
import copy
healpy.disable_warnings()
plt.rc('figure', facecolor = 'w')
plt.rc('figure', dpi=100)

## Define Settings

In [None]:
selection_version = 'version-001-p01'

host_name = socket.gethostname()

if 'cobalt' in host_name:
    print('Working on Cobalts')
    data_prefix = '/data/user/ssclafani/data/cscd/final'
    ana_dir = '/data/user/ssclafani/data/analyses/'
    plot_dir = '/data/user/mhuennefeld/data/analyses/DNNCascadeCodeReview/unblinding_checks/plots/data_release/create_data/DataFluxMeasurement'
    
else:
    raise ValueError('Unknown host:', host_name)

In [None]:
for dir_path in [plot_dir]:
    if not os.path.exists(dir_path):
        print('Creating directory:', dir_path)
        os.makedirs(dir_path)

## Load Data

In [None]:
repo = cy.selections.Repository()
specs = cy.selections.DNNCascadeDataSpecs.DNNC_10yr

In [None]:
%%time

ana = cy.get_analysis(
    repo, selection_version, specs, 
    #gammas=np.r_[0.1:6.01:0.125],
)

In [None]:
a = ana.anas[0]
a.sig

In [None]:
a.bg_data

## Helpers

In [None]:
from cycler import cycle
from copy import deepcopy

soft_colors = cy.plotting.soft_colors
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


def get_bias_allt(tr, ntrials=200, n_sigs=np.r_[:101:10], quiet=False):
    trials = [
        (None if quiet else print(f'\r{n_sig:4d} ...', end='', flush=True))
        or
        tr.get_many_fits(ntrials, n_sig=n_sig, logging=False, seed=n_sig)
        for n_sig in n_sigs]
    if not quiet:
        print()
    for (n_sig, t) in zip(n_sigs, trials):
        t['ntrue'] = np.repeat(n_sig, len(t))
    allt = cy.utils.Arrays.concatenate(trials)
    return allt

def get_color_cycler():
    return cycle(colors)

def plot_ns_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)

    h = hl.hist((allt.ntrue, allt.ns), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(ax.set_ylim(lim))
    ax.plot(lim, lim, **expect_kw)
    ax.set_aspect('equal')

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$n_s$')
    ax.grid()

def plot_gamma_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)
    expect_gamma = tr.sig_injs[0].flux[0].gamma

    h = hl.hist((allt.ntrue, allt.gamma), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(lim)
    ax.set_ylim(1, 4)
    ax.axhline(expect_gamma, **expect_kw)

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$\gamma$')
    ax.grid()

def plot_bkg_trials(
            bg, fig=None, ax=None, 
            label='{} bg trials', 
            label_fit=r'$\chi^2[{:.2f}\mathrm{{dof}},\ \eta={:.3f}]$', 
            color=colors[0],
            density=False,
            bins=50,
        ):
    if ax is None:
        fig, ax = plt.subplots(figsize=(6, 4))
    
    if density:
        h = bg.get_hist(bins=bins).normalize()
    else:
        h = bg.get_hist(bins=bins)
    if label is not None:
        label = label.format(bg.n_total)
    hl.plot1d(ax, h, crosses=True, color=color, label=label)

    # compare with the chi2 fit:
    if hasattr(bg, 'pdf'):
        x = h.centers[0]
        norm = h.integrate().values
        if label_fit is not None:
            label_fit = label_fit.format(bg.ndof, bg.eta)
        if density:
            ax.semilogy(x, bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)
        else:
            ax.semilogy(x, norm * bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)

    ax.set_xlabel(r'TS')
    if density:
        ax.set_ylabel(r'Density')
    else:
        ax.set_ylabel(r'number of trials')
    ax.legend()
        
    return fig, ax, h

## Setup Analysis

In [None]:
import sys
sys.path.insert(0, '../../..')

import config as cg

cg.base_dir = '/data/user/mhuennefeld/data/analyses/unblinding_v1.0.1/'

In [None]:
def get_gp_tr(template_str, cutoff=np.inf, gamma=None, cpus=20, ana=ana):
    cutoff_GeV = cutoff * 1e3
    gp_conf = cg.get_gp_conf(
        template_str=template_str, gamma=gamma, cutoff_GeV=cutoff_GeV, base_dir=cg.base_dir)
    tr = cy.get_trial_runner(gp_conf, ana=ana, mp_cpus=cpus)
    return tr

def get_template_tr(template, gamma=2.7, cutoff_tev=np.inf, cpus=20):
    cutoff_gev = cutoff_tev * 1000.
    gp_conf = {
        'template': template,
        'flux': cy.hyp.PowerLawFlux(gamma, energy_cutoff=cutoff_gev),
        'randomize': ['ra'],
        'fitter_args': dict(gamma=gamma),
        'sigsub': True,
        'update_bg': True,
        'fast_weight': False,
    }
    tr = cy.get_trial_runner(gp_conf, ana=ana, mp_cpus=cpus)
    return tr


#### Get TrialRunners

In [None]:
tr_dict = {
    'pi0': get_gp_tr('pi0'),
    'kra5': get_gp_tr('kra5'),
    'kra50': get_gp_tr('kra50'),
}

#### Get Results for each template

In [None]:
res_dict = {}
for key in tr_dict.keys():
    f_path = os.path.join(
        cg.base_dir, 
        'gp/results/{}/{}_unblinded.npy'.format(key, key), 
    )
    if os.path.exists(f_path):
        res_dict[key] = np.load(f_path)
    else:
        print('File does not exist: {}'.format(f_path))

#### Print best fit fluxes

In [None]:
res_dict

In [None]:

model_norm = tr_dict['kra5'].to_model_norm(ns=res_dict['kra5'][1])
model_norm50 = tr_dict['kra50'].to_model_norm(ns=res_dict['kra50'][1])
dNdE = tr_dict['pi0'].to_dNdE(ns=res_dict['pi0'][1], E0=1e5)
E2dNdE = tr_dict['pi0'].to_E2dNdE(ns=res_dict['pi0'][1], E0=100, unit=1e3)
print(dNdE, E2dNdE, model_norm, model_norm50, model_norm*3/2, model_norm50*3/2)


In [None]:
tr_dict['kra5'].to_model_norm(ns=273)


#### Get bkg fits for each template

In [None]:
bkg_file_dict = {
    'pi0': '{}/gp/trials/{}/{}/trials.dict'.format(cg.base_dir, 'DNNC', 'pi0'),
    'kra5': '{}/gp/trials/{}/{}/trials.dict'.format(cg.base_dir, 'DNNC', 'kra5'),
    'kra50': '{}/gp/trials/{}/{}/trials.dict'.format(cg.base_dir, 'DNNC', 'kra50'),
}
n_bkg_trials = 20000
seed = 1337

bkg_dict = {}
for key, tr in tr_dict.items():
    if 'fermibubbles' in key: continue
    if key in bkg_file_dict:
        print('Loading background trials for template {}'.format(key))
        sig = np.load(bkg_file_dict[key], allow_pickle=True)
        bkg_dict[key] = sig['poisson']['nsig'][0.0]['ts']
    
    else:
        print('Running background trials for template {}'.format(key))
        bkg_dict[key] = tr.get_many_fits(
            n_trials=n_bkg_trials, seed=seed, mp_cpus=20)
        

In [None]:
for k, values in bkg_dict.items():
    print(k, len(values))

#### Get ts distribution

In [None]:
bkg_hist_dict = {}
bg_tsd_dict = {}
for key, bg in bkg_dict.items():
    bg_tsd = cy.dists.TSD(bg)
    fig, ax, h = plot_bkg_trials(bg_tsd, bins=np.linspace(0, 35, 1001))
    
    bkg_hist_dict[key] = h
    bg_tsd_dict[key] = bg_tsd
    
    ts = res_dict[key][0]
    ns = res_dict[key][1]
    ax.axvline(
        ts, color='0.8', ls='--', lw=2,
        label='TS: {:3.3f} | ns: {:3.1f}'.format(ts, ns), 
    )
    ts_5sig = bg_tsd.isf_nsigma(5)
    ax.axvline(
        ts_5sig, ls='--', lw=1,
        label='5-sigma TS: {:3.3f}'.format(ts_5sig), 
    )
    ax.set_title('Template: {}'.format(key))
    ax.set_yscale('log')
    ax.legend()
    fig.savefig('{}/ts_dist_{}.png'.format(plot_dir, key))

#### Save bkg ts distribution to file

We will save everything to human readible csv files. 
This will allow the end-user to easily load this data in any
application/language of their choice. 

In [None]:
df_bins = pd.DataFrame()
df_values = pd.DataFrame()
for key, h in bkg_hist_dict.items():
    df_bins[key] = h.bins[0]
    df_values[key] = h.values

df_bins = df_bins.reset_index()
df_values = df_values.reset_index()

df_bins.to_csv('{}/bkg_trials_binning.csv'.format(plot_dir), index=False)
df_values.to_csv('{}/bkg_trials_values.csv'.format(plot_dir), index=False)


#### Compute Significance

In [None]:
from scipy import stats

def get_significance_from_hist(ts, bin_edges, bin_values):
    """Compute significance for a given test-statistic
    
    Parameters
    ----------
    ts : float
        The test-statistic value for which the significance will be calculated.
    bin_edges : array_like
        The bin edges for the background trial histogram.
    bin_values : array_like
        The bin heights for the background trial histogram.
    
    Returns
    -------
    float
        The significance of rejecting the null hypothesis for the given
        test-statistic value based on the background trials provided
        via `bin_edges` and `bin_values`
    """
    # get index for which is valid:
    # edge[i-1] < ts <= edge[i]
    # this correspdonds to the i-th entry in bin_values
    index = np.searchsorted(bin_edges, ts)
    
    # get number of trials with larger ts values
    n_larger = np.sum(bin_values[index:])
    pval = 1.*n_larger/np.sum(bin_values)
    nsigma = stats.norm.isf(pval)
    
    return pval, nsigma

for key, h in bkg_hist_dict.items():
    pval, nsgima = get_significance_from_hist(
        ts=res_dict[key][0],
        bin_edges=df_bins[key].values,
        bin_values=df_values[key].values,
    )
    print('{:3.4f} | {:3.4f} (calc.) | {:3.4f} (hist) | {}'.format(
        res_dict[key][3], 
        bg_tsd_dict[key].sf_nsigma(res_dict[key][0]),
        nsgima,
        key, 
    ))


## Effective area and Acceptance Correction

#### Load Templates

In [None]:
kra5, kra5_nu, kra5_nubar, ebins5 = np.load('/data/ana/analyses/NuSources/2021_DNNCascade_analyses/templates/KRA-gamma_5PeV_maps_energies.tuple.npy', allow_pickle = True, encoding='latin1')
kra50, kra50_nu, kra50_nubar, ebins50 = np.load('/data/ana/analyses/NuSources/2021_DNNCascade_analyses/templates/KRA-gamma_maps_energies.tuple.npy', allow_pickle=True, encoding='latin1')
pi0 = np.load('/data/ana/analyses/NuSources/2021_DNNCascade_analyses/templates/Fermi-LAT_pi0_map.npy', allow_pickle=True, encoding='latin1')


##### Some sanity checks

In [None]:
kra5_sum = kra5_nu + kra5_nubar
np.allclose(kra5, kra5_sum), np.allclose(kra5_nubar, kra5_nu)

In [None]:
if True:
    csky_kra5_template, csky_kra5_energy_bins = cg.template_repo.get_template(
        'KRA-gamma_5PeV_maps_energies', per_pixel_flux=True)
    print(np.allclose(csky_kra5_template, kra5 / 2.))
    print(np.allclose(csky_kra5_template, kra5 / 3.))
    print(np.allclose(csky_kra5_energy_bins, ebins5))
    

In [None]:
# verify if these match the ones uploaded on zenodo
if True:
    kra5_, kra5_nu_, kra5_nubar_, ebins5_ = np.load('/data/user/mhuennefeld/to_delete/templates/KRA-gamma_5PeV_maps_energies.tuple.npy', allow_pickle = True, encoding='latin1')
    kra50_, kra50_nu_, kra50_nubar_, ebins50_ = np.load('/data/user/mhuennefeld/to_delete/templates/KRA-gamma_50PeV_maps_energies.tuple.npy', allow_pickle = True, encoding='latin1')
    print(
        np.allclose(kra5_, kra5), 
        np.allclose(kra5_nu_, kra5_nu), 
        np.allclose(kra5_nubar_, kra5_nubar), 
        np.allclose(ebins5_, ebins5),
    )
    print(
        np.allclose(kra50_, kra50), 
        np.allclose(kra50_nu_, kra50_nu), 
        np.allclose(kra50_nubar_, kra50_nubar), 
        np.allclose(ebins50_, ebins50),
    )
    

#### Compute Effective Area

In [None]:

def get_eff_area(sindec_min=-1, sindec_max=1, dlogE=1*np.log10(1.1), unit='cm'):
    """Get effective area for given declination band
    """
    
    # we want to try and match the binning used in the 
    # KRA-gamma templates (dlogE=np.log10(1.1) from 10 GeV)
    # in order to avoid too many binning artifacts later on.
    # So we will construct the bins the same way
    # (shifted by small epsilon) and then cut
    # them to a smaller energy range later on.
    bins = 10**np.arange(1 - 0.0001*dlogE, 8.01, dlogE)
    bins = bins[np.logical_and(bins >= 1e2, bins <= 1.1e7)]
    
    # get events belonging to this declination band
    sindec_true = np.sin(a.sig.true_dec)
    mask = (sindec_true < sindec_max) & (sindec_true > sindec_min)
    
    # compute the solid angle for this declination band
    solid_angle = 2 *np.pi * (sindec_max - sindec_min)
    
    # compute the effective area
    if unit == 'm':
        cm2_to_unit = 1e4
    elif unit == 'cm':
        cm2_to_unit = 1
    else:
        raise ValueError(unit)
        
    area_event = a.sig.oneweight[mask] / cm2_to_unit
    
    # normalize by solid angle
    area_event /= solid_angle
    
    # normalize by energy bin width
    # Energy bin ranges from
    # 10^(log10E - 0.5*dlogE) to 10^(log10E + 0.5*dlogE)
    # bin width is therefore:
    # 10^(log10E + 0.5*dlogE) - 10^(log10E - 0.5*dlogE)
    # = E * (10^(0.5*dlogE) - 10^(-0.5*dlogE))
    area_event /= a.sig.true_energy[mask] * dlogE * np.log(10)
    
    effa, _ = np.histogram(
        a.sig.true_energy[mask], 
        bins=bins, 
        weights=area_event,
    )
    
    return effa, bins, area_event, a.sig.true_energy[mask]

sindec_bins = np.linspace(-1., 1., 6)
eff_area = []
for i, sindec_max in enumerate(sindec_bins[1:]):
    effa, energy_bins, area_event, energy = get_eff_area(
        sindec_min=sindec_bins[i], sindec_max=sindec_max,
    ) 
    eff_area.append(effa)

eff_area = np.array(eff_area)
print('eff_area', eff_area.shape)

fig, ax = plt.subplots()
for i, sindec_max in enumerate(sindec_bins[1:]):
    effa, energy_bins, area_event, energy = get_eff_area(
        sindec_min=sindec_bins[i], sindec_max=sindec_max,
    ) 
    
    ax.step(energy_bins[1:], eff_area[i])
    ax.hist(energy, bins=energy_bins, weights=area_event, histtype='step', ls='--')
    ax.set_xscale('log')
    ax.set_yscale('log')



#### Save effective area to file

In [None]:
np.savetxt(
    '{}/effa_values.csv'.format(plot_dir),
    eff_area, delimiter=','
)
np.savetxt(
    '{}/effa_bins_sindec.csv'.format(plot_dir),
    sindec_bins, delimiter=','
)
np.savetxt(
    '{}/effa_bins_energy.csv'.format(plot_dir),
    energy_bins, delimiter=','
)

#### Compute acceptance correction

In [None]:


def acceptance_correct_pi0(
            ns, template, eff_area, sindec_bins, energy_bins,
            livetime=304047105.0735066,
        ):
    """Convert a number of signal events to the corresponding flux

    Parameters
    ----------
    ns : array_like
        The number of signal events that will be converted to the
        corresponding flux.
    template : array_like
        The spatial template in units of sr^-1.
        Shape: [npix]
    eff_area : array_like
        The effective area binned in sin(dec) (unitless) along first
        dimension and and in energy (in GeV) along the second axis.
        shape: [n_bins_sindec, n_bins_energy]
    sindec_bins : array_like
        The bin edges along the first dimension corresponding to sin(dec).
        Shape: [n_bins_sindec + 1]
    energy_bins : TYPE
        The bin edges along the second dimension corresponding to
        the energy in GeV.
        Shape: [n_bins_energy + 1]
    livetime : float
        The livetime of the dataset in seconds.
        For the 10-year cascade dataset, the livetime corresponds to
        304047105.0735066 seconds, which is the default value.

    Returns
    -------
    array_like
        The flux in terms of E^2 dN/dE at 100 TeV in units
        of TeV cm^-2 s^-1.
    """

    npix = len(template)
    nside = hp.npix2nside(npix)
    
    # First we need to construct Phi(sindec, energy)
    # in units of GeV^-1 cm^-2 s^-1 sr^-1.
    # We will do this by splitting Phi into the spatial term (units of sr^-1)
    # and the energy term (units of GeV^-1 cm^-2 s^-1).
    # We can do this splitting for the pi0 model because the energy spectrum
    # is assumed not to depend on the location in the sky
    theta, phi = hp.pix2ang(nside, np.r_[:npix])
    pix_dec = np.pi/2. - theta
    pix_ra = phi
    pix_sindec = np.sin(pix_dec)

    # Phi(sindec), shape: [n_sindec], units of sr^-1
    phi_sindec = np.zeros(len(sindec_bins)-1)

    # walk through each declination band
    for i, sindec_max in enumerate(sindec_bins[1:]):
        sindec_min = sindec_bins[i]

        # get all pixels belonging to this dec band
        mask_pixels = np.logical_and(
            pix_sindec >= sindec_min,
            pix_sindec < sindec_max,
        )
        phi_sindec[i] = np.sum(template[mask_pixels]) 

    # Sky-integrated, per-flavor (nu + nubar) flux
    # Choose an arbitrary flux normalization to scale
    norm = 2.4368317158544137e-18  # in units of GeV^-1 cm^-2 s^-1
    e0 = 1e5  # 100 TeV in units of GeV
    gamma = 2.7

    # compute average energy in each energy bin
    energy_avg = (
        (-gamma + 1) / (-gamma + 2) *
        (energy_bins[1:]**(-gamma+2) - energy_bins[:-1]**(-gamma+2)) /
        (energy_bins[1:]**(-gamma+1) - energy_bins[:-1]**(-gamma+1))
    )

    # Phi(energy) in units of GeV^-1 cm^-2 s^-1 sr^-1
    # shape: [n_energy]
    phi_e = norm * (energy_avg / e0) ** (-gamma)

    # Phi(sindec, energy)
    # shape: [n_sindec, n_energy],  GeV^-1 cm^-2 s^-1 sr^-1
    phi = phi_e[np.newaxis] * phi_sindec[:, np.newaxis]

    # "integrate" over energy bin width
    # shape: [n_sindec, n_energy], cm^-2 s^-1 sr^-1
    phi *= np.diff(energy_bins)[np.newaxis]

    # "integrate" over solid angle
    # shape: [n_pix, n_bins], s^-1 * cm^-2
    phi *= hp.nside2pixarea(nside)

    # Now we can compute the total number of expected events
    # for the given flux.
    # eff_area is in units of cm^-2 and livetime in units of s
    total_acceptance = livetime * np.sum(eff_area * phi)

    # we can then compute the ratio of the number of signal events versus
    # the number of events one would have expected based on the previously
    # defined flux. This can then be used to scale the arbitrary flux
    # normalization that we chose as a reference point.
    model_norm = ns / total_acceptance
    print('model_norm', model_norm)
    
    # compute E^2 dN/dE at 100 TeV in units of TeV cm^-2 s^-1
    norm_tev = norm * 1e3  # in units of TeV^-1 cm^-2 s^-1
    norm_E2_tev = norm_tev * (100)**2
    return model_norm*norm_E2_tev

acceptance_correct_pi0(
    ns=748,
    template=pi0,
    eff_area=eff_area,
    sindec_bins=sindec_bins,
    energy_bins=energy_bins,
    livetime=a.livetime,
)
    

In [None]:
def acceptance_correct_kra(ns, template, template_emids, eff_area, sindec_bins, energy_bins, livetime):
    
    npix = len(template)
    nside = hp.npix2nside(npix)
    pixarea = hp.nside2pixarea(nside)
    
    # compute energy bin widths
    t_energy_bins = np.r_[template_emids, template_emids[-1]*1.1]
    t_delta_e = np.diff(t_energy_bins)
    
    # compute Phi(sindec, energy)   
    theta, phi = hp.pix2ang(nside, np.r_[:npix])
    pix_dec = np.pi/2. - theta
    pix_ra = phi
    pix_sindec = np.sin(pix_dec)
    
    # Phi(sindec, energy), shape: [n_sindec, n_energy]
    phi = np.zeros((len(sindec_bins)-1, len(energy_bins) -1))
    
    for i, sindec_max in enumerate(sindec_bins[1:]):
        sindec_min = sindec_bins[i]
        
        # get all pixels belonging to this dec band
        mask_pixels = np.logical_and(
            pix_sindec >= sindec_min,
            pix_sindec < sindec_max,
        )
        
        # select bins corresponding to this dec band
        template_decband = template[mask_pixels]
        
        for j, energy_max in enumerate(energy_bins[1:]):
            
            # get all bins belonging to this energy band
            mask_bins = np.logical_and(
                template_emids >= energy_bins[j],
                template_emids < energy_max,
            )
            
            # Template files are in GeV^-1 * sr^-1 * s^-1 * cm^-2
            # shape: [n_pix, n_bins], GeV^-1 * sr^-1 * s^-1 * cm^-2
            flux_bins = template_decband[:, mask_bins]
            
            # "integrate" over energy range
            # shape: [n_pix, n_bins], sr^-1 * s^-1 * cm^-2
            flux_bins *= t_delta_e[mask_bins]
            
            # "integrate" over solid angle
            # shape: [n_pix, n_bins], s^-1 * cm^-2
            flux_bins *= pixarea
            
            # shape: [], s^-1 * cm^-2
            phi[i, j] = np.sum(flux_bins)
            
            # template is for sum of all neutrino flavors
            # including nu and nubar
            # correct to per-flavor flux (nu+nubar)
            phi[i, j] /= 3.
    
    print('eff_area.shape', eff_area.shape)
    total_acceptance = livetime * np.sum(eff_area * phi)
    print('total_acceptance', total_acceptance)
    model_norm = ns / total_acceptance
    print('model_norm', model_norm)
    
    return model_norm

ns = 273
model_norm = acceptance_correct_kra(
    ns=ns,
    template=kra5,
    template_emids=ebins5,
    eff_area=eff_area,
    sindec_bins=sindec_bins,
    energy_bins=energy_bins,
    livetime=a.livetime,
)
model_norm_tr5 = tr_dict['kra5'].to_model_norm(ns)
model_norm_tr50 = tr_dict['kra50'].to_model_norm(ns)
print(model_norm, model_norm_tr5, model_norm_tr50)


In [None]:
def get_flux_from_template(ra, dec, energy, template, template_emids, csky_indexing=False):
    
    npix = len(template)
    nside = hp.npix2nside(npix)

    # compute bin edges
    t_energy_bins = np.r_[template_emids, template_emids[-1]*1.1]
    
    phi = ra
    theta = np.pi/2. - dec
    ipix = hp.ang2pix(nside=nside, theta=theta, phi=phi)
    
    # compute energy bin
    if csky_indexing:
        energy_idx = np.searchsorted(template_emids, energy) + 1 # correct would be -1 instead of +1
        energy_idx[energy_idx >= template_emids.size] = template_emids.size - 1
    else:
        energy_idx = np.searchsorted(t_energy_bins, energy) - 1
    
    print('Order matches up [mid interpretation]:', np.logical_and(
        energy > template_emids[np.clip(energy_idx-1, 0, template_emids.size-1)], 
        energy < template_emids[np.clip(energy_idx+1, 0, template_emids.size-1)], 
    ).all())
    print('Order matches up [edge interpretation]:', np.logical_and(
        energy >= template_emids[np.clip(energy_idx, 0, template_emids.size-1)], 
        energy < template_emids[np.clip(energy_idx+1, 0, template_emids.size-1)], 
    ).all())
    if False:
        for e, idx in zip(energy, energy_idx[:10]):
            print(template_emids[idx], e, template_emids[idx+1])
            #print(e, t_energy_bins[idx], t_energy_bins[idx+1])
            #print(e, template_emids[idx-1], template_emids[idx], t_energy_bins[idx+1])
            #print(np.logical_and(e > template_emids[idx-1], e < template_emids[idx+1]))
    
    # select corresponding flux
    flux = template[ipix, energy_idx]
    
    return flux

n_tota_dict = {}
for csky_indexing in [True, False]:
    print('Using csky indexing:', csky_indexing)
    
    n_samples = 1
    flux = 0
    for i in range(n_samples):
        rng = np.random.RandomState(42 + i)
        flux += get_flux_from_template(
            ra=rng.uniform(0., 2*np.pi, size=len(a.sig.true_ra)),
            dec=a.sig.true_dec,
            energy=a.sig.true_energy,
            #template=kra5,
            template=kra5/3.,
            #template=csky_kra5_template * 2/3,
            #template=csky_kra5_template,
            template_emids=ebins5,
            csky_indexing=csky_indexing,
        )
    flux /= n_samples
    n_total = np.sum(a.sig.oneweight * a.livetime * flux)
    print(csky_indexing, n_total)
    n_tota_dict[csky_indexing] = n_total
    print()

for key, tr in tr_dict.items():
    print('csky n_total : {} [{}]'.format(tr._sig_injs[0].acc_total, key))
print(n_tota_dict[False] / n_tota_dict[True], 1/(1.1**2)**(-2.5))
    

## Contribution Map

#### Helper Methods

In [None]:
from multiprocessing import Pool
from tqdm.notebook import tqdm_notebook as tqdm
from dnn_cascade_selection.utils.notebook import ps_pdf

def get_energy_and_space_contribution(trial, tr, gamma=2.7):
    L = tr.get_one_llh_from_trial(trial)
    
    res = L.fit(**tr.fitter_args)
    ns = res[1]['ns']
    N = float(len(trial.evss[0][0]))
    print(res, ns, N)
    
    space_eval = cy.inspect.get_space_eval(L, -1, 0) # 0: background events (1 would be for signal events)
    energy_eval = cy.inspect.get_energy_eval(L, -1, 0)
    SoB_space_ss = space_eval(gamma=gamma)[1] 
    SoB_energy = energy_eval(gamma=gamma)[0]
    SoB_space = space_eval(gamma=gamma)[0] 
    #w = (SoB_space - SoB_space_ss) * SoB_energy
    #lr = w * ns/N + 1.
    return SoB_space, SoB_space_ss, SoB_energy, ns, N

def get_lr_from_trial(trial, tr, gamma=2.7):
    """Get event likelihood-ratio value of signal-subtracted likelihood
    
    Info here:
        https://wiki.icecube.wisc.edu/index.php/
        Cascade_7yr_PS_GP/Galactic_Source_Search_Methods#Signal-Subtracted_Likelihood
    """
    L = tr.get_one_llh_from_trial(trial)
    
    res = L.fit(**tr.fitter_args)
    ns = res[1]['ns']
    N = float(len(trial.evss[0][0]))
    print(res, ns, N)
    
    space_eval = cy.inspect.get_space_eval(L, -1, 0) # 0: background events (1 would be for signal events)
    energy_eval = cy.inspect.get_energy_eval(L, -1, 0)
    StoB_space_ss = space_eval(gamma=gamma)[1] 
    SoB_energy = energy_eval(gamma=gamma)[0]
    SoB_space = space_eval(gamma=gamma)[0] 
    w = (SoB_space - StoB_space_ss) * SoB_energy
    lr = w * ns/N + 1.
    return lr
    
def get_lr(template_str, gamma=2.7, seed=None, TRUTH=True):
    """Get event likelihood-ratio value of signal-subtracted likelihood
    
    Info here:
        https://wiki.icecube.wisc.edu/index.php/
        Cascade_7yr_PS_GP/Galactic_Source_Search_Methods#Signal-Subtracted_Likelihood
    """
    trial = tr_dict[template_str].get_one_trial(seed=seed, TRUTH=TRUTH)
    return get_lr_from_trial(trial=trial, tr=tr_dict[template_str], gamma=gamma)

def get_contribution_list(template_str, N, w, ws, seed=None, TRUTH=True):
    trial = tr_dict[template_str].get_one_trial(seed=seed, TRUTH=TRUTH)
    
    ci_list = []
    ra_list = []
    dec_list = []
    sigma_list = []
    for a in tqdm(ws[::-1][:N]):
        mask = trial.evss[0][0].idx == a
        dec = trial.evss[0][0][mask].dec[0]
        ra = trial.evss[0][0][mask].ra[0]
        sigma = trial.evss[0][0][mask].sigma[0]
        w_i = w[a]
        
        #ci_list.append(10**(w_i))
        ci_list.append(w_i)
        ra_list.append(ra)
        dec_list.append(dec)
        sigma_list.append(sigma)
    
    ci_list = np.array(ci_list)
    ra_list = np.array(ra_list)
    dec_list = np.array(dec_list)
    sigma_list = np.array(sigma_list)
    return ci_list, ra_list, dec_list, sigma_list

def compute_pixel(args):
    ipix, nside, ci_list, ra_list, dec_list, sigma_list = args
    theta, phi = hp.pix2ang(nside=nside, ipix=ipix)
    dec = np.pi/2.  - theta
    ra = phi

    ang_dist = cy.coord.delta_angle(
        zenith1=dec,
        azimuth1=ra,
        zenith2=dec_list,
        azimuth2=ra_list,
        latlon=True,
    )
    space_pdf = ps_pdf.von_mises_pdf(ang_dist, sigma=sigma_list)

    pixel_value = np.sum(space_pdf * ci_list)
    return pixel_value
    
def get_contribution_map(template_str, N, w, ws, nside=64, cpus=25, normalize=False, seed=None, TRUTH=True):
    ci_list, ra_list, dec_list, sigma_list = get_contribution_list(
        template_str=template_str, N=N, w=w, ws=ws, seed=seed, TRUTH=TRUTH)
    
    npix = hp.nside2npix(nside)
    arg_list = [(i, nside, ci_list, ra_list, dec_list, sigma_list) for i in range(npix)]
    
    if cpus > 1:
        print('Running pool with {} cpus'.format(cpus))
        

        with Pool(cpus) as p:
            skymap = list(tqdm(p.imap(compute_pixel, arg_list), total=npix))
        skymap = np.array(skymap)
        p.close()
    else:
        skymap = np.zeros(npix)
        for i in tqdm(range(npix), total=npix):
            skymap[i] = compute_pixel(arg_list[i])
    
    # normalize skymap
    if normalize:
        skymap = np.array(skymap) / np.sum(skymap) / hp.nside2pixarea(nside=nside)
    return skymap



#### Space vs Energy Contribution

In [None]:
SoB_space, SoB_space_ss, SoB_energy, ns, N = get_energy_and_space_contribution(
    trial=tr_dict['pi0'].get_one_trial(TRUTH=True),
    tr=tr_dict['pi0'],
)
def get_ts(SoB_space, SoB_space_ss, SoB_energy, ns, N):
    
    # shape: [1, n_events]
    SoB_space = SoB_space[np.newaxis, ]
    SoB_space_ss = SoB_space_ss[np.newaxis, ]
    SoB_energy = SoB_energy[np.newaxis, ]
    
    # shape: [n_ns_vals, 1]
    ns = np.atleast_1d(ns)
    ns = ns[:, np.newaxis]
    
    ns_over_N = ns/N
    
    # shape: [1, n_events] * [n_ns_vals, 1] = [n_ns_vals, n_events]
    ts_events = 2*np.log((SoB_space - SoB_space_ss) * SoB_energy * ns_over_N + 1)
    
    # shape: [n_ns_vals]
    ts = np.sum(ts_events, axis=1)
    
    return ts


x = np.linspace(0., 2000, 1000)
ts_vals = get_ts(
    SoB_space=SoB_space, 
    SoB_space_ss=SoB_space_ss, 
    SoB_energy=SoB_energy, 
    ns=x, 
    N=N,
)

fig, ax = plt.subplots()
ax.plot(x, ts_vals)
ax.axvline(ns, ls='--', color='0.7')
ax.set_xlabel('$n_s$')
ax.set_ylabel('TS')

ts = get_ts(
    SoB_space=SoB_space, 
    SoB_space_ss=SoB_space_ss, 
    SoB_energy=SoB_energy, 
    ns=ns, 
    N=N,
)
print('ts', ts)
print('ns, N', ns, N)



In [None]:
fig, ax = plt.subplots(figsize=(9, 6))
bins = np.linspace(-20, 30, 100)
ax.hist(SoB_space, bins=bins, histtype='step', label='SoB_space')
ax.hist(SoB_space_ss, bins=bins, histtype='step', label='SoB_space_ss')
ax.hist(SoB_energy, bins=bins, histtype='step', label='SoB_energy')
ax.hist((SoB_space - SoB_space_ss)*SoB_energy, bins=bins, histtype='step', label='Combined Contribution')
ax.legend()
ax.set_yscale('log')


### Save event contributions for data release table

This creates a table where each row corresponds to an event.
Columns include contributions from 
- $(S/B)_\mathrm{space}$
- $(\tilde{S}/B)_\mathrm{space}$
- $(S/B)_\mathrm{energy}$


for each of the three Galactic Plane models

In [None]:
df_events = pd.DataFrame()
for key, tr in tr_dict.items():
    SoB_space, SoB_space_ss, SoB_energy, ns, N = get_energy_and_space_contribution(
        trial=tr.get_one_trial(TRUTH=True),
        tr=tr,
    )
    df_events['{}_SoB_space'.format(key)] = SoB_space
    df_events['{}_SoB_space_ss'.format(key)] = SoB_space_ss
    df_events['{}_SoB_energy'.format(key)] = SoB_energy

# add reconstructed energy
df_events['energy'] = a.bg_data.energy

df_events = df_events.reset_index()

# save to file
df_events.to_csv(
    '{}/event_contributions_gp.csv'.format(plot_dir),
    index=False,
)

df_events 

#### Compute event contribution

In [None]:
lr_dict = {
    'pi0': get_lr('pi0'),
    'kra5': get_lr('kra5', None),
    'kra50': get_lr('kra50', None),
}

ts_dict = {}
for key, lr in lr_dict.items():
    ts = 2 * np.log(lr)
    print(key, np.sum(ts))
    ts_dict[key] = ts



In [None]:
bkg_scrample_seed = 4
lr_bkg_dict = {
    'pi0': get_lr('pi0', seed=bkg_scrample_seed, TRUTH=False),
    'kra5': get_lr('kra5', None, seed=bkg_scrample_seed, TRUTH=False),
    'kra50': get_lr('kra50', None, seed=bkg_scrample_seed, TRUTH=False),
}

ts_bkg_dict = {}
for key, lr in lr_bkg_dict.items():
    ts = 2 * np.log(lr)
    print(key, np.sum(ts))
    ts_bkg_dict[key] = ts
    
    try:
        bg_tsd = cy.dists.TSD(bkg_dict[key])
        total_ts = np.sum(ts)
        print('  {} | ts: {:3.3f} | p-value: {:3.3f} | n-sigma: {:3.3f}'.format(
            key, total_ts, bg_tsd.sf(total_ts), bg_tsd.sf_nsigma(total_ts)))
    except Exception as e:
        print(e)
        pass

print('bkg_scrample_seed', bkg_scrample_seed)

#### Compute contribution Maps

In [None]:
nside = 64

skymaps_all = {}
for key, ts_values in ts_dict.items():
    print('Creating contributions for {}'.format(key))
    
    sorted_idx = np.argsort(ts_values)
    skymaps_all[key] = get_contribution_map(
        template_str=key, N=len(ts_values), w=ts_values, ws=sorted_idx, nside=nside)


In [None]:
skymaps_all_bkg = {}
for key, ts_values in ts_bkg_dict.items():
    print('Creating contributions for {}'.format(key))
    
    sorted_idx = np.argsort(ts_values)
    skymaps_all_bkg[key] = get_contribution_map(
        template_str=key, N=len(ts_values), w=ts_values, ws=sorted_idx, nside=nside, 
        seed=bkg_scrample_seed, TRUTH=False,
    )
    

#### Save contribution maps to file

In [None]:
df_contribution = pd.DataFrame()

df_contribution['ipix'] = np.arange(hp.nside2npix(nside))
theta, phi = hp.pix2ang(nside=nside, ipix=df_contribution['ipix'])
df_contribution['dec'] = np.pi/2.  - theta
df_contribution['ra'] = phi
    
for key, skymap in skymaps_all.items():
    df_contribution[key] = skymap

for key, skymap in skymaps_all_bkg.items():
    df_contribution['bkg_trial_' + key] = skymap

df_contribution.to_csv(
    '{}/contribution_map.csv'.format(plot_dir),
    index=False,
)
df_contribution

In [None]:
# check how these are saved
if True:
    df_contribution_ = pd.read_csv('{}/contribution_map.csv'.format(plot_dir))
    for key, _ in skymaps_all.items():

        # load from csv
        skymap = df_contribution_[key]

        hp.mollview(skymap)

# Scratch Space

#### Check bkg trial distributions after csky bug fix

In [None]:
def get_gp_tr_mod(template, template_str, ana, cutoff=np.inf, gamma=None, cpus=20):
    cutoff_GeV = cutoff * 1e3
    gp_conf = cg.get_gp_conf(
        template_str=template_str, gamma=gamma, 
        cutoff_GeV=cutoff_GeV, base_dir=cg.base_dir)
    
    gp_conf.pop('dir')
    gp_conf['extra_keeps'] = ['azimuth']
    gp_conf['template'] = template

    tr = cy.get_trial_runner(gp_conf, ana=ana, mp_cpus=cpus)
    return tr

def get_kra5_tr_mod(ana, template, energy_bins, cpus=30):
    kra_flux = cy.hyp.BinnedFlux(
        bins_energy=energy_bins,
        flux=template.sum(axis=0)*hp.nside2pixarea(hp.npix2nside(len(template))))
    gp_conf = {
        'template': template,
        'bins_energy': energy_bins,
        'randomize': ['ra'],
        'update_bg': True,
        'fast_weight': False,
        'sigsub': True,
        cy.pdf.CustomFluxEnergyPDFRatioModel: dict(
            hkw=dict(bins=(
                   np.linspace(-1, 1, 20),
                   np.linspace(np.log10(500), 8.001, 20)
                   )),
            flux=kra_flux,
            features=['sindec', 'log10energy'],
            normalize_axes=([1])),
        'energy': False,
    }

    tr = cy.get_trial_runner(gp_conf, ana=ana, mp_cpus=cpus)
    return tr

tr_kra5_mod = get_kra5_tr_mod(ana=ana, template=kra5/3., energy_bins=ebins5)


In [None]:
bkg_trials_kra5 = tr_kra5_mod.get_many_fits(cpus=30, n_trials=1000000)

In [None]:
#plot_bkg_trials_tsd = cy.dists.Chi2TSD(bkg_trials_kra5)
plot_bkg_trials_tsd_pre = cy.dists.Chi2TSD(bkg_dict['kra5'][:10000000])
bins = np.linspace(0, 30, 50)
fig, ax, h = plot_bkg_trials(plot_bkg_trials_tsd, density=True, bins=bins)
plot_bkg_trials(plot_bkg_trials_tsd_pre, density=True, bins=bins, ax=ax, fig=fig, color='orange')
ax.set_xlabel('TS [KRA-$\gamma$ 5]')
fig.savefig('{}/ts_comparison_kra5.png'.format(plot_dir))