In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os

# set env flags to catch BLAS used for scipy/numpy 
# to only use 1 cpu, n_cpus will be totally controlled by csky
if False:
    os.environ['MKL_NUM_THREADS'] = "1"
    os.environ['NUMEXPR_NUM_THREADS'] = "1"
    os.environ['OMP_NUM_THREADS'] = "1"
    os.environ['OPENBLAS_NUM_THREADS'] = "1"
    os.environ['VECLIB_MAXIMUM_THREADS'] = "1"

import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'w'
mpl.rcParams['savefig.facecolor'] = 'w'
import matplotlib.pyplot as plt
from matplotlib import colors, cm
import csky as cy
from csky import cext
import numpy as np
import astropy
#from icecube import astro
from tqdm.notebook import tqdm_notebook as tqdm
import histlite as hl
import healpy
import healpy as hp
import socket
import pickle
from scipy import stats
import copy
healpy.disable_warnings()
plt.rc('figure', facecolor = 'w')
plt.rc('figure', dpi=100)

## Define Settings

In [None]:
selection_version = 'version-001-p00'

host_name = socket.gethostname()

if 'cobalt' in host_name:
    print('Working on Cobalts')
    #data_prefix = '/data/user/ssclafani/data/cscd/final'
    #ana_dir = '/data/user/ssclafani/data/analyses/'
    plot_dir = cy.utils.ensure_dir('/data/user/mhuennefeld/data/analyses/DNNCascadeCodeReview/unblinding_checks/plots/unblinding/confidence_intervals_stacking_catalogs_2d')
    
else:
    raise ValueError('Unknown host:', host_name)

In [None]:
for dir_path in [plot_dir]:
    if not os.path.exists(dir_path):
        print('Creating directory:', dir_path)
        os.makedirs(dir_path)

## Load Data

In [None]:
repo = cy.selections.Repository()
specs = cy.selections.DNNCascadeDataSpecs.DNNC_10yr

In [None]:
%%time

ana = cy.get_analysis(
    repo, selection_version, specs, 
    #gammas=np.r_[0.1:6.01:0.125],
)

In [None]:
a = ana.anas[0]
a.sig

In [None]:
a.bg_data

## Helpers

In [None]:
from cycler import cycle
from copy import deepcopy

soft_colors = cy.plotting.soft_colors
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


def get_bias_allt(tr, n_trials=200, n_sigs=np.r_[:101:10], quiet=False):
    trials = [
        (None if quiet else print(f'\r{n_sig:4d} ...', end='', flush=True))
        or
        tr.get_many_fits(n_trials, n_sig=n_sig, logging=False, seed=n_sig)
        for n_sig in n_sigs]
    if not quiet:
        print()
    for (n_sig, t) in zip(n_sigs, trials):
        t['ntrue'] = np.repeat(n_sig, len(t))
    allt = cy.utils.Arrays.concatenate(trials)
    return allt

def get_color_cycler():
    return cycle(colors)

def plot_bias(ax, x_fit, y_true, label=''):
    
    y_unique = np.unique(y_true)
    dy = np.mean(np.diff(y_unique))
    y_bins = np.r_[y_unique - 0.5*dy, y_unique[-1] + 0.5*dy]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)

    h = hl.hist((y_true, x_fit), bins=(y_bins, 100))
    hl.plot1d(ax, h.contain_project(1), errorbands=True, 
              drawstyle='default', label=label)
    lim = y_bins[[0, -1]]
    ax.set_xlim(ax.set_ylim(lim))
    ax.plot(lim, lim, **expect_kw)
    ax.set_aspect('equal')

    ax.grid()
    return h
    
def plot_ns_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)

    h = hl.hist((allt.ntrue, allt.ns), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(ax.set_ylim(lim))
    ax.plot(lim, lim, **expect_kw)
    ax.set_aspect('equal')

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$n_s$')
    ax.grid()

def plot_gamma_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)
    expect_gamma = tr.sig_injs[0].flux[0].gamma

    h = hl.hist((allt.ntrue, allt.gamma), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(lim)
    ax.set_ylim(1, 4)
    ax.axhline(expect_gamma, **expect_kw)

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$\gamma$')
    ax.grid()

def plot_bkg_trials(
            bg, fig=None, ax=None, 
            label='{} bg trials', 
            label_fit=r'$\chi^2[{:.2f}\mathrm{{dof}},\ \eta={:.3f}]$', 
            color=colors[0],
            density=False,
            bins=50,
        ):
    if ax is None:
        fig, ax = plt.subplots(figsize=(6, 4))
    
    if density:
        h = bg.get_hist(bins=bins).normalize()
    else:
        h = bg.get_hist(bins=bins)
    if label is not None:
        label = label.format(bg.n_total)
    hl.plot1d(ax, h, crosses=True, color=color, label=label)

    # compare with the chi2 fit:
    if hasattr(bg, 'pdf'):
        x = h.centers[0]
        norm = h.integrate().values
        if label_fit is not None:
            label_fit = label_fit.format(bg.ndof, bg.eta)
        if density:
            ax.semilogy(x, bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)
        else:
            ax.semilogy(x, norm * bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)

    ax.set_xlabel(r'TS')
    if density:
        ax.set_ylabel(r'Density')
    else:
        ax.set_ylabel(r'number of trials')
    ax.legend()
        
    return fig, ax

## Setup Analysis

In [None]:
import sys
sys.path.insert(0, '../..')

import config as cg

cg.base_dir = '/data/user/mhuennefeld/data/analyses/unblinding_v1.0.0/'

In [None]:

def get_catalog_tr(ana, catalog, gamma, cutoff=np.inf, cpus=20):
    catalog_file = os.path.join(
        cg.catalog_dir, '{}_ESTES_12.pickle'.format(catalog))
    cat = np.load(catalog_file, allow_pickle=True)
    src = cy.utils.Sources(dec=cat['dec_deg'], ra=cat['ra_deg'], deg=True)
    cutoff_GeV = cutoff * 1e3

    conf = cg.get_ps_conf(src=src, gamma=gamma, cutoff_GeV=cutoff_GeV)
    tr = cy.get_trial_runner(ana=ana, conf=conf, mp_cpus=cpus)
    return tr


#### SnowStorm Systematics

In [None]:
import pandas as pd

df_dir = '/data/ana/PointSource/DNNCascade/analysis/{}/'.format(selection_version)
df = pd.read_hdf(
    df_dir + '/systematics/SnowStorm_Spice321/MC_NuGen_snowstorm_214xx.hdf', key='df',
)
df = df[['SnowstormParameters_{:05d}'.format(i) for i in range(6)] + ['run', 'energy', 'ow']]

In [None]:
import pandas as pd
from copy import deepcopy
from IPython.utils import io

sim_ranges = {
    'Scattering': [0.9, 1.1],
    'Absorption': [0.9, 1.1],
    'AnisotropyScale': [0., 2.],
    'DOMEfficiency': [0.9, 1.1],
    'HoleIceForward_Unified_00': [-1.0, 1.0],
    'HoleIceForward_Unified_01': [-0.2, 0.2],
}

allowed_ranges = {
    'Scattering': [0.9, 1.1],
    'Absorption': [0.9, 1.1],
    'AnisotropyScale': [0., 2.],
    'DOMEfficiency': [0.9, 1.1],
    
    # slightly increase range from recommendation to not have too little stats
    'HoleIceForward_Unified_00': [-0.75, 0.45], #[-0.5, 0.3],
    'HoleIceForward_Unified_01': [-0.15, 0.075], #[-0.1, 0.05],
}


def get_snowstorm_ana(sys_ranges, sim_ranges=sim_ranges):
    
    # define SnowStorm dataset with reduced range
    class DNNCascade_10yr_sys_reduced(cy.selections.DNNCascadeDataSpecs.DNNCascade_10yr_snowstorm_fullrange):
        def dataset_modifications(self, ds):
            print('Adding SnowStorm Parameters to MC')
            path_sig_df = (
                '/data/ana/PointSource/DNNCascade/analysis/' + 
                self._path_sig.format(version=self._version).replace('dnn_cascades/', '').replace('.npy', '.hdf')
            )
            # (use global df to avoid loading multiple times)
            #if df is None:
            #    df = pd.read_hdf(path_sig_df, key='df')
            assert np.allclose(df['run'], ds.sig.run)
            assert np.allclose(df['energy'], ds.sig.energy)
            assert np.allclose(df['ow'], ds.sig.oneweight)

            # load and rename SnowStorm parameters
            parameter_names=[
                'Scattering', 'Absorption', 'AnisotropyScale', 
                'DOMEfficiency', 'HoleIceForward_Unified_00', 
                'HoleIceForward_Unified_01',
            ]
            for i, param in enumerate(parameter_names):
                ds.sig[param] = np.array(df['SnowstormParameters_{:05d}'.format(i)])
            
            print('Reducing Dataset')
            mask = np.ones(len(ds.sig), dtype=bool)
            factor = 1.
            for param, sys_range in sys_ranges.items():
                if sys_range != sim_ranges[param]:
                    assert sys_range[0] < sys_range[1], sys_range
                    factor *= (sys_range[1] - sys_range[0]) / (sim_ranges[param][1] - sim_ranges[param][0])
                    mask_i = np.logical_and(
                        ds.sig[param] >= sys_range[0],
                        ds.sig[param] < sys_range[1],
                    )
                    mask = np.logical_and(mask, mask_i)
            
            print('Reduction factor: {:3.3f}'.format(factor))
            ds.sig = ds.sig._subsample(mask)
            ds.sig.oneweight[:] = ds.sig.oneweight/factor
            
    ana_sys = cy.get_analysis(
        cy.selections.Repository(), selection_version, [DNNCascade_10yr_sys_reduced], 
        #_quiet=True,
    )
    return ana_sys

def sample_snowstorm_ranges(
            seed=None, 
            sim_ranges=sim_ranges, 
            allowed_ranges=allowed_ranges, 
            min_red_factor=0.05,
            max_k=3,
        ):
    rng = np.random.RandomState(seed)
    
    # sample number of parameters to perturb
    k = rng.randint(1, 1 + max_k)
    
    # sample which parameters to perturb
    parameter_names=[
        'Scattering', 'Absorption', 'AnisotropyScale', 
        'DOMEfficiency', 'HoleIceForward_Unified_00', 
        'HoleIceForward_Unified_01',
    ]
    params = rng.choice(parameter_names, size=k, replace=False)
    
    # compute reduction fraction from allowed range
    fractions = []
    allowed_fraction = 1.
    for param, allowed_range in allowed_ranges.items():
        if allowed_range != sim_ranges[param]:
            fraction_i = (allowed_range[1] - allowed_range[0]) / (sim_ranges[param][1] - sim_ranges[param][0])
        else:
            fraction_i = 1.
        allowed_fraction *= fraction_i
        if param in params:
            fractions.append(fraction_i)
            
    # define relative reduction fraction of allowed range
    rel_fr = np.power(min_red_factor / allowed_fraction , 1./k)
    
    # sample intervals
    sys_range = deepcopy(allowed_ranges)

    current_factor = 1.
    for param, fraction_i in zip(params, fractions):
        allowed_range = allowed_ranges[param]
        
        interval_width = (allowed_range[1] - allowed_range[0]) * rel_fr / 2.
        sample_range = [allowed_range[0] + interval_width, allowed_range[1] - interval_width]
        
        assert sample_range[1] > sample_range[0], sample_range
        
        mid_point = rng.uniform(*sample_range)
        sys_range[str(param)] = [mid_point - interval_width, mid_point + interval_width]
    
    return sys_range, params

def get_snowstorm_tr(
            catalog,
            gamma,
            seed=None, 
            sim_ranges=sim_ranges, 
            allowed_ranges=allowed_ranges, 
            min_red_factor=0.05,
            max_k=3,
        ):
    
    # sample SnowStorm parameters
    sys_ranges, params = sample_snowstorm_ranges(seed=seed, min_red_factor=min_red_factor, max_k=max_k)
    
    # get snowstorm ana object
    with io.capture_output() as captured:
        ana_sys = get_snowstorm_ana(sys_ranges=sys_ranges)

        # get trial runner
        tr_sys = get_catalog_tr(ana=ana_sys, catalog=catalog, gamma=gamma)
    
    return tr_sys

##### Test Sampling

In [None]:
n_samples = 10000
min_red_factor = 0.02
max_k = 3

mids = {k: [] for k in sim_ranges.keys()}
mids_all = {k: [] for k in sim_ranges.keys()}
for i in tqdm(range(n_samples), total=n_samples):
    sys_ranges, params = sample_snowstorm_ranges(min_red_factor=min_red_factor, max_k=max_k)
    for k, sys_range in sys_ranges.items():
        if k in params:
            mids[k].append(np.mean(sys_range))
        mids_all[k].append(np.mean(sys_range))
    

fig, axes = plt.subplots(2, 3, figsize=(9, 6))
for i, ax in enumerate(axes.flatten()):
    key = sorted(sim_ranges.keys())[i]
    ax.set_xlabel(key)
    ax.set_ylabel('Number of samples')
    ax.hist(mids[key], bins=30)
    ax.axvline(sim_ranges[key][0], color='0.3', ls='--', label='Simulation Range')
    ax.axvline(sim_ranges[key][1], color='0.3', ls='--')
    ax.axvline(allowed_ranges[key][0], color='0.7', ls='-', label='Allowed Range')
    ax.axvline(allowed_ranges[key][1], color='0.7', ls='-')
axes[0, 0].legend()
fig.suptitle('Min Reduction: {:1.3f} | Max k: {}'.format(min_red_factor, max_k))
fig.tight_layout()
fig.savefig('{}/snowstorm_sampling_check.png'.format(plot_dir))


In [None]:
sys_ranges, params = sample_snowstorm_ranges(min_red_factor=0.02)
sys_ranges, params

In [None]:
%%time

ana_sys = get_snowstorm_ana(
    #sys_ranges={
    #    'Scattering': [1.0, 1.1],
    #    'Absorption': [0.9, 1.0],
    #    'AnisotropyScale': [0., 1.],
    #},
    sys_ranges=sys_ranges,
    #sys_ranges=sample_snowstorm_ranges(),
)
tr_sys = get_catalog_tr(
    ana=ana_sys, catalog='snr', gamma=2.0,
)
print(len(ana_sys.anas[0].sig)/len(df), len(ana_sys.anas[0].sig))
print('ana', np.sum(a.sig.oneweight * a.sig.true_energy**-2.5))
print('sys', np.sum(ana_sys.anas[0].sig.oneweight * ana_sys.anas[0].sig.true_energy**-2.5))


In [None]:
len(ana_sys.anas[0].sig) / len(ana.anas[0].sig)

#### Get Results for each template

In [None]:
res_dict = {}
for key in ['snr', 'pwn', 'unid']:
    f_path = os.path.join(
        cg.base_dir, 
        'stacking/results/{}/{}_unblinded.npy'.format(key, key), 
    )
    res_dict[key] = np.load(f_path)
res_dict

In [None]:
for key, res in res_dict.items():
    ts, ns, gamma, pval, nsigma = res
    tr = get_catalog_tr(ana=ana, catalog=key, gamma=gamma)
    print('{} | ns: {:3.1f} | gamma: {:3.2f} | flux: {}'.format(
        key, ns, gamma, tr.to_E2dNdE(ns, E0=100, unit=1e3)
    ))

#### Spot-check ns/gamma bias

In [None]:
recalculate = False
allt_dict_file = '{}/allt_dict_file.pkl'.format(plot_dir)

if os.path.exists(allt_dict_file) and not recalculate:
    with open(allt_dict_file, 'rb') as handle:
        allt_dict = pickle.load(handle)
else:
    allt_dict = {}
    
for catalog in ['snr']:
#for catalog in ['snr', 'pwn', 'unid']:
    for gamma in [2.0, 2.5, 3.0]:
        if (catalog, gamma) not in allt_dict:
            print('Computing for {} and gamma: {}'.format(catalog, gamma))

            # get trial runner (for baseline MC)
            tr = get_catalog_tr(ana=ana, catalog=catalog, gamma=gamma)

            # compute bias
            allt_dict[(catalog, gamma)] = get_bias_allt(tr, n_trials=200, n_sigs=np.r_[:301:10])       

            # save file
            with open(allt_dict_file, 'wb') as f:
                pickle.dump(allt_dict, f, protocol=-1)

In [None]:
for (catalog, gamma), allt in allt_dict.items():
    # plot bias
    tr = get_catalog_tr(ana=ana, catalog=catalog, gamma=gamma)
    fig, axes = plt.subplots(1, 2, figsize=(6, 4))
    plot_ns_bias(ax=axes[0], tr=tr, allt=allt)
    plot_gamma_bias(ax=axes[1], tr=tr, allt=allt)
    axes[0].set_title('{} | $\gamma$= {:3.2f}'.format(catalog, gamma))
    fig.tight_layout()
    fig.savefig('{}/bias_check_{}_{:3.2f}.png'.format(plot_dir, catalog, gamma))


#### Get ns and gamma-bias correction

In [None]:
use_poisson = False # GP paper uses: False
add_sys = False  # GP paper uses: False

if add_sys:
    sys_suffix = '_sys'
else:
    sys_suffix = ''
    
if use_poisson:
    bias_file = os.path.join(plot_dir, 'bias_poisson{}.pkl'.format(sys_suffix))
else:
    bias_file = os.path.join(plot_dir, 'bias{}.pkl'.format(sys_suffix))


def get_bias_dict_from_sys(bias_dict_sys):
    bias_dict = {}
    for key, gamma_ns_trials_dict in bias_dict_sys.items():
        bias_dict[key] = {}
        
        for gamma, ns_trials_dict in gamma_ns_trials_dict.items():

            # get ns_values
            ns_values = [sorted(ns_trials_dict[trial_i].keys()) for trial_i in ns_trials_dict.keys()]
            for ns_values_i in ns_values:
                assert np.allclose(ns_values_i, ns_values[0])
            ns_values = ns_values[0]

            trials_dict = {ns: [] for ns in ns_values}
            for trial_i in sorted(ns_trials_dict.keys()):
                for ns in ns_values:
                    trials_dict[ns].append(ns_trials_dict[trial_i][ns])

            bias_dict[key][gamma] = {
                ns: cy.utils.Arrays.concatenate(trials_dict[ns]) for ns in ns_values
            }
    return bias_dict

if os.path.exists(bias_file):
    print('Loading from file')
    if add_sys:
        with open(bias_file, 'rb') as handle:
            bias_dict_sys = pickle.load(handle)
        
        # restructure dict
        bias_dict = get_bias_dict_from_sys(bias_dict_sys)
    else:
        with open(bias_file, 'rb') as handle:
            bias_dict = pickle.load(handle)
        
        bias_dict_sys = {}
else:
    print('Creating new dict')
    bias_dict = {}
    bias_dict_sys = {}
    

In [None]:
from multiprocessing import Pool

ns_bias_range = {
    'snr': [0, 2000, 30],  # GP paper uses: [0, 2000, 30]
    'unid': [0, 2000, 30],  # GP paper uses: [0, 2000, 30]
    'pwn': [0, 2000, 30],  # GP paper uses: [0, 2000, 30]
}
gammas = np.r_[1:4.01:0.25]  # GP paper uses: np.r_[1:4.01:0.25]

n_trials = 100 # GP paper uses: 100
recalculate = False
cpus = 25

for key, ns_range in ns_bias_range.items():
    
    for dictionary in [bias_dict, bias_dict_sys]:
        if key not in dictionary:
            dictionary[key] = {gamma:{} for gamma in gammas}
        
    print('Submitting {} with {} gammas for ns: {}:{}:{}'.format(key, len(gammas), *ns_range))
    
    for gamma in gammas:
        print('  ... at gamma: {:3.2f}'.format(gamma))
        
        if add_sys:
            for trial_i in tqdm(range(n_trials), total=n_trials):

                if trial_i not in bias_dict_sys[key][gamma] or recalculate:
                    bias_dict_sys[key][gamma][trial_i] = {}

                    print('Getting trial runner for {}'.format(key))
                    tr = get_snowstorm_tr(
                        catalog=key, gamma=gamma, seed=trial_i, 
                    )

                    print('Starting pool with {} cpus'.format(cpus))
                    def compute_trial_i(ns):
                        trials = tr.get_many_fits(1, n_sig=ns, logging=False, seed=ns, poisson=use_poisson, cpus=1)
                        trials['ntrue'] = np.repeat(ns, len(trials))
                        return trials

                    arg_list = list(range(*ns_range))
                    with Pool(cpus) as p:
                        trials = list(tqdm(p.imap(compute_trial_i, arg_list), total=len(arg_list)))

                    for j, ns in enumerate(range(*ns_range)):
                        bias_dict_sys[key][gamma][trial_i][ns] = trials[j]

                    with open(bias_file, 'wb') as f:
                        pickle.dump(bias_dict_sys, f, protocol=-1)

        else:
            tr = get_catalog_tr(ana=ana, catalog=key, gamma=gamma)

            for ns in tqdm(range(*ns_range), total=len(range(*ns_range))):

                if ns not in bias_dict[key][gamma] or recalculate:
                    trials = tr.get_many_fits(n_trials, n_sig=ns, logging=False, seed=ns, cpus=cpus, poisson=use_poisson)
                    trials['ntrue'] = np.repeat(ns, len(trials))

                    bias_dict[key][gamma][ns] = trials

                    with open(bias_file, 'wb') as f:
                        pickle.dump(bias_dict, f, protocol=-1)

if add_sys:
    bias_dict = get_bias_dict_from_sys(bias_dict_sys)

#### Compute ns and gamma-bias correction

Goal is to obtain a bias-correction function of the form:
    $f(n_\mathrm{inj}, \gamma_\mathrm{inj}) \rightarrow (\overline{n}_\mathrm{fit}, \overline{\gamma}_\mathrm{fit})$

In [None]:
from scipy.interpolate import SmoothBivariateSpline
from copy import deepcopy

bias_corr_funcs_non_pickable = {}

smoothing_settings = {
    'unid': {
        'gamma': dict(s=0.87), # GP paper uses: dict(s=0.87)
        'ns': dict(s=174200), # GP paper uses: dict(s=174200)
    },
    'snr': {
        'gamma': dict(s=1.5, kx=3, ky=1), # GP paper uses: dict(s=1.5, kx=3, ky=1)
        'ns': dict(s=174200), # GP paper uses: dict(s=174200)
    },
    'pwn': {
        'gamma': dict(s=1.5, kx=3, ky=1), # GP paper uses: dict(s=1.5, kx=3, ky=1)
        'ns': dict(s=174200), # GP paper uses: dict(s=174200)
    },
}

for key, bias_dict_i in bias_dict.items():
    #if key != 'pwn': continue
    
    # create bias correction function
    inj_n = []
    inj_gamma = []
    fit_gamma = []
    fit_ns = []
    
    for gamma, ns_dict_i in bias_dict_i.items():
        for n_inj in sorted(ns_dict_i.keys()):
            inj_n.append(n_inj)
            inj_gamma.append(gamma)
            fit_gamma.append(np.median(ns_dict_i[n_inj].gamma))
            fit_ns.append(np.median(ns_dict_i[n_inj].ns))
    
    spline_gamma = SmoothBivariateSpline(
        x=inj_n, y=inj_gamma, z=fit_gamma, **smoothing_settings[key]['gamma'])
    spline_ns = SmoothBivariateSpline(
        x=inj_n, y=inj_gamma, z=fit_ns, **smoothing_settings[key]['ns'])

    def bias_corr(n_inj, gamma_inj):
        corr_gamma = spline_gamma.ev(n_inj, gamma_inj)
        corr_gamma = np.clip(corr_gamma, 1., 4.)
        corr_ns = spline_ns.ev(n_inj, gamma_inj)
        corr_ns = np.clip(corr_ns, 0., np.inf)
        
        return np.stack([corr_ns, corr_gamma], axis=0)
    
    bias_corr_funcs_non_pickable[key] = bias_corr
    
    # -------------------------------------
    # make 2D plot to check smoothing value
    # -------------------------------------
    cmap = plt.cm.viridis
    cmaplist = [cmap(i) for i in range(cmap.N)]
    cmap = mpl.colors.LinearSegmentedColormap.from_list(
        'Custom cmap', cmaplist, cmap.N)

    # define the bins and normalize
    bounds_gamma = np.linspace(1, 4, 17)
    norm_gamma = mpl.colors.BoundaryNorm(bounds_gamma, cmap.N)
    
    max_ns = max(max(fit_ns), max(inj_n))
    bounds_ns = np.linspace(0, max_ns, 17)
    norm_ns = mpl.colors.BoundaryNorm(bounds_ns, cmap.N)

    x_bins = np.arange(0, max_ns, 1)
    y_bins = np.arange(1, 4, 0.01)
    test_x = x_bins[:-1] + 0.5*np.diff(x_bins)
    test_y = y_bins[:-1] + 0.5*np.diff(y_bins)
    grid_x, grid_y = np.meshgrid(x_bins, y_bins)

    gamma_result = spline_gamma(test_x, test_y)
    ns_result = spline_ns(test_x, test_y)

    fig, axes = plt.subplots(1, 2, figsize=(9, 6))
    pc_gamma = axes[0].pcolormesh(grid_x, grid_y, gamma_result.T, cmap=cmap, norm=norm_gamma)
    cb_gamma = fig.colorbar(pc_gamma, ax=axes[0])
    cb_gamma.set_label('$\gamma$')
    pc_ns = axes[1].pcolormesh(grid_x, grid_y, ns_result.T, cmap=cmap, norm=norm_ns)
    cb_ns = fig.colorbar(pc_ns, ax=axes[1], orientation='horizontal', ticks=np.arange(0, max_ns, 500))
    cb_ns.set_label('$n_s$')
    
    axes[0].set_title('Model: {} | $\gamma$-Interpolation'.format(key))
    axes[1].set_title('$n_s$-Interpolation'.format(key))
    
    for ax in axes:
        ax.set_xlabel('$n_\mathrm{inj}$')
        ax.set_ylabel('$\gamma_\mathrm{inj}$')
    fig.tight_layout()
    fig.savefig('{}/biascorrection_2d_{}{}.png'.format(plot_dir, key, sys_suffix))
    
    # ---------------
    # 1D slices in ns
    # ---------------
    if True:            
        ns_values = sorted(np.unique(inj_n))
        n_axes = len(ns_values)
        n_1d = int(np.ceil(np.sqrt(n_axes)))
        fig, axes = plt.subplots(n_1d, n_1d, figsize=(3*n_1d, 3*n_1d))
        axes = axes.flatten()
        for ax, ns in zip(axes, ns_values):
            trials = []
            for gamma, ns_dict_i in bias_dict_i.items():
                trials_i = cy.utils.Arrays(deepcopy(ns_dict_i[ns]))
                trials_i['gamma_true'] = np.ones(len(trials_i)) * gamma
                trials.append(trials_i)
            allt = cy.utils.Arrays.concatenate(trials)

            plot_bias(ax, x_fit=allt.gamma, y_true=allt.gamma_true, label='')

            x_gamma = np.linspace(1, 4, 100)
            x_ns = np.ones_like(x_gamma) * ns

            ax.plot(x_gamma, bias_corr_funcs_non_pickable[key](x_ns, x_gamma)[1], label='Spline Fit')
            ax.set_title('Model: {} | {} = {:3.2f}'.format(key, r'$n_\mathrm{inj}$', ns))
            ax.set_xlabel(r'$\gamma_\mathrm{inj}$')
            ax.set_ylabel(r'$\gamma_\mathrm{fit}$')
            ax.legend()
            
        fig.tight_layout()
        fig.savefig('{}/biascorrection_1d_ns_slices_{}{}.png'.format(plot_dir, key, sys_suffix))
    
    
    # ------------------
    # 1D slices in gamma
    # ------------------
    if True:
        n_axes = len(bias_dict_i.keys())
        n_1d = int(np.ceil(np.sqrt(n_axes)))
        fig, axes = plt.subplots(n_1d, n_1d, figsize=(3*n_1d, 3*n_1d))
        axes = axes.flatten()
        for ax, (gamma, ns_dict_i) in zip(axes, bias_dict_i.items()):
            allt = cy.utils.Arrays.concatenate([t for t in ns_dict_i.values()])

            tr = get_catalog_tr(ana=ana, catalog=key, gamma=gamma)

            plot_ns_bias(ax=ax, tr=tr, allt=allt)
            
            x_ns = np.linspace(0, 2000, 100)
            x_gamma = np.ones_like(x_ns) * gamma
            
            ax.plot(x_ns, bias_corr_funcs_non_pickable[key](x_ns, x_gamma)[0], label='Spline Fit')
            ax.set_title('Model: {} | $\gamma$ = {:3.2f}'.format(key, gamma))
            ax.set_xlabel('$n_\mathrm{inj}$')
            ax.set_ylabel('$n_s$')
            ax.legend()
        
        fig.tight_layout()
        fig.savefig('{}/biascorrection_1d_gamma_slices_{}{}.png'.format(plot_dir, key, sys_suffix))
    

#### Make top-level, pickable functions for Multiprocessing

In [None]:
bias_corr_funcs = {}

if 'unid' in bias_corr_funcs_non_pickable:
    
    def bias_corr_func_unid(n_inj, gamma_inj):
        return bias_corr_funcs_non_pickable['unid'](n_inj, gamma_inj)
    
    bias_corr_funcs['unid'] = bias_corr_func_unid

if 'snr' in bias_corr_funcs_non_pickable:
    
    def bias_corr_func_snr(n_inj, gamma_inj):
        return bias_corr_funcs_non_pickable['snr'](n_inj, gamma_inj)
    
    bias_corr_funcs['snr'] = bias_corr_func_snr

if 'pwn' in bias_corr_funcs_non_pickable:
    
    def bias_corr_func_pwn(n_inj, gamma_inj):
        return bias_corr_funcs_non_pickable['pwn'](n_inj, gamma_inj)
    
    bias_corr_funcs['pwn'] = bias_corr_func_pwn

print('Got the following keys:', bias_corr_funcs.keys())

#### Some Sanity checks

In [None]:
def f(x, gamma=2.84):
    return bias_corr_funcs['unid'](x, gamma)[0]

x = np.linspace(0, 3000, 100)
for gamma in np.linspace(1, 4, 5):
    plt.plot(x, f(x, gamma=gamma), label='$\gamma=${:3.2f}'.format(gamma))
ax.legend()

In [None]:
bias_corr_funcs['unid'](200, 2.80) #2.37906900e+02, 2.84580387e+00

#### Get Critical Values

In [None]:
from multiprocessing import Pool
from tqdm.notebook import tqdm_notebook as tqdm


def get_critical_value_trial(
            E2dNdE, gamma, tr, tr_inj, bias_corr_func=None,
            E0=100, unit=1e3, seed=None, TRUTH=False, 
        ):
    
    # get number of ns corresponding to flux
    n_sig = tr_inj.to_ns(E2dNdE, E0=E0, unit=unit)
    
    if TRUTH:
        n_inj = 0
    else:
        n_inj = n_sig
    
    # get trial
    trial = tr_inj.get_one_trial(n_sig=n_inj, poisson=True, seed=seed, TRUTH=TRUTH)
    
    # get best fit ts and ns for this trial
    fit = tr.get_one_fit_from_trial(trial)
    ts_fit, ns_fit, gamma_fit = fit
    
    # apply bias correction for tested nsig?
    if bias_corr_func is not None:
        n_sig, gamma = bias_corr_func(n_sig, gamma)
        
    # get Likelihood object
    L = tr.get_one_llh_from_trial(trial)
    fitter_kwargs = deepcopy(tr.fitter_args)
    fitter_kwargs.pop('gamma')
    ts_test = L.get_ts(ns=n_sig, gamma=gamma, **fitter_kwargs)
    
    # compute test-statistic tau for critical value definition
    # tau = -2 log llh-ratio = - 2 log {L_0(ns_test) / L_1(ns=n_fit)}
    # In this case, we want to test against ns_test = ns(E2dNdE)
    # tau = (-2 log LR(ns=0) - (-2 log LR(ns=ns_test))
    #     = -2 log L(ns=0) + 2 log L(ns=n_fit) + 2 log L(ns=0) - 2 log L(ns=ns_test)
    #     = -2 log L(ns=ns_test) + 2 log L(ns=ns_fit)
    #     = -2 log {L(ns_ns_test) / L(ns=ns_fit)}
    tau = ts_fit - ts_test
    
    return tau

# ------------------------------------------------------------------------------------
# define global functions for multiprocessing (pickle has issues with local functions)
# ------------------------------------------------------------------------------------

def compute_trial_i(args):
    i, E2dNdEs, gamma, catalog, E0, unit, bias_corr_func, sys_seed, min_red_factor, max_k, TRUTH = args
    if sys_seed is None:
        tr_inj = get_catalog_tr(ana=ana, catalog=catalog, gamma=gamma)
    else:
        tr_inj = get_snowstorm_tr(
            catalog=catalog, gamma=gamma, seed=sys_seed, min_red_factor=min_red_factor, max_k=max_k)
    
    tr = get_catalog_tr(ana=ana, catalog=catalog, gamma=gamma)
    
    tau_values = [] 
    for E2dNdE in E2dNdEs:
        tau_values_i = get_critical_value_trial(E2dNdE=E2dNdE, gamma=gamma, tr=tr, tr_inj=tr_inj, bias_corr_func=bias_corr_func, E0=E0, unit=unit, seed=i, TRUTH=TRUTH)
        tau_values.append(tau_values_i)
    return tau_values

# ------------------------------------------------------------------------------------

def run_critical_value_trials_for_one_gamma(
            n_trials, E2dNdE_list, gamma, key, add_systematics=False, TRUTH=False,
            bias_corr_funcs=None, min_red_factor=0.02, max_k=3,
            E0=100, unit=1e3, seed=0, cpus=20,
        ):
    
    if bias_corr_funcs is not None:
        bias_corr_func = bias_corr_funcs[key]
    else:
        bias_corr_func = None
        
    tau_values = [[] for norm in E2dNdE_list]
    seed_values = list(range(seed, seed + n_trials))
    
    if add_systematics:
        arg_list = [(i, E2dNdE_list, gamma, key, E0, unit, bias_corr_func, i, min_red_factor, max_k, TRUTH) 
                    for i in seed_values]
    else:
        arg_list = [(i, E2dNdE_list, gamma, key, E0, unit, bias_corr_func, None, min_red_factor, max_k, TRUTH) 
                    for i in seed_values]

    if cpus > 1:
        print('Running pool with {} cpus'.format(cpus))
        
        with Pool(cpus) as p:
            tau_values_map = list(tqdm(p.imap(compute_trial_i, arg_list), total=n_trials))
        print('tau_values_map.shape', np.array(tau_values_map).shape)
        for j, tau_values_i in enumerate(tau_values_map):
            for i, values in enumerate(tau_values_i):
                tau_values[i].append(values)
        p.close()
    else:
        for args in tqdm(arg_list, total=n_trials):
            tau_values_i = compute_trial_i(args)
            for i, values in enumerate(tau_values_i):
                tau_values[i].append(values)
            
    return np.array(tau_values)

def run_critical_value_trials(n_trials, E2dNdE_list, gamma_list, key, **kwargs):
    tau_matrix = []
    for gamma in gamma_list:
        print('  ... runnning for gamma = {:3.2f}'.format(gamma))
        tau_values = run_critical_value_trials_for_one_gamma(
            n_trials=n_trials, E2dNdE_list=E2dNdE_list, gamma=gamma, key=key, **kwargs
        )
        tau_matrix.append(tau_values)
    return np.array(tau_matrix)


In [None]:
%%time

seed = 0  # GP paper uses: seeds x-y
n_trials = 100  # GP paper uses: N trials in total
apply_correction = True # GP paper uses: True
add_systematics = True # GP paper uses: True
min_red_factor = 0.02 # GP paper uses: 0.02
max_k = 3 # GP paper uses: 3

cpus = 25
recalculate = False

E2dNdE_dict = {
    'unid': np.linspace(1e-13, 13e-12, 50), # GP paper uses: np.linspace(1e-13, 13e-12, 50)
    'snr': np.linspace(1e-13, 16e-12, 61), # GP paper uses: np.linspace(1e-13, 16e-12, 61)
    'pwn': np.linspace(1e-13, 12e-12, 45), # GP paper uses: np.linspace(1e-13, 12e-12, 45)
}
gamma_dict = {
    'unid': np.linspace(2.2, 3.35, 24), # GP paper uses: np.linspace(2.2, 3.35, 24)
    'snr': np.linspace(2.1, 3.35, 26), # GP paper uses: np.linspace(2.1, 3.35, 26)
    'pwn': np.linspace(2.45, 3.55, 23), # GP paper uses: np.linspace(2.45, 3.55, 23)
}

    
tau_dict = {}
for key in E2dNdE_dict.keys():
        
    print('Running {} trials for {} with {} normalizations and {} gammas'.format(
        n_trials, key, len(E2dNdE_dict[key]), len(gamma_dict[key])))
    if apply_correction:
        print('Applying correction')
        bias_corr_funcs_kw = bias_corr_funcs
    else:
        bias_corr_funcs_kw = None
    print('Adding Systematic:', add_systematics)
    
    if add_systematics:
        sys_str = '{}_red_{:0.3f}_k_{}'.format(add_systematics, min_red_factor, max_k)
    else:
        sys_str = '{}'.format(add_systematics)
        
    file_path = os.path.join(plot_dir, 'trials_{}_corr_{}_sys_{}_seeds_{}_{}.pkl'.format(
        key, apply_correction, sys_str, seed, seed+n_trials))
    
    if not os.path.exists(file_path) or recalculate:
        tau_matrix = run_critical_value_trials(
            n_trials, seed=seed, E2dNdE_list=E2dNdE_dict[key], gamma_list=gamma_dict[key], 
            key=key, add_systematics=add_systematics, cpus=cpus, bias_corr_funcs=bias_corr_funcs_kw)
        
        # save trials
        with open(file_path, 'wb') as f:
            seeds = list(range(seed, seed+n_trials))
            pickle.dump((E2dNdE_dict, gamma_dict, tau_matrix, seeds), f, protocol=-1)
    else:
        print('Skipping because file already exists...')
        


#### Load trials

In [None]:
import glob

tau_dict = {}
for key in ['unid', 'snr', 'pwn']:
#for key in ['unid']:
    
    print('Loading trials for {} with {} normalizations and {} gammas'.format(
        key, len(E2dNdE_dict[key]), len(gamma_dict[key])))
    key_s = (key, apply_correction)
    
    if key_s not in tau_dict:
        tau_dict[key_s] = {gamma: {norm: [] for norm in E2dNdE_dict[key]} for gamma in gamma_dict[key]}
    
    # find a list of files
    if add_systematics:
        sys_str = '{}_red_{:0.3f}_k_{}'.format(add_systematics, min_red_factor, max_k)
    else:
        sys_str = '{}'.format(add_systematics)
        
    file_pattern = os.path.join(plot_dir, 'trials_{}_corr_{}_sys_{}_seeds_*_*.pkl'.format(
        key, apply_correction, sys_str))
    file_list = sorted(glob.glob(file_pattern))
    print('Found {} files...'.format(len(file_list)))
    
    # load files and check for overlapping seeds
    seed_values = set([])
    for file_i in file_list:
        with open(file_i, 'rb') as handle:
            E2dNdE_dict_loaded, gamma_dict_loaded, tau_matrix, seeds = pickle.load(handle)
        
        # make sure model norms match
        assert np.all([k in E2dNdE_dict for k in E2dNdE_dict_loaded.keys()])
        for k, norms in E2dNdE_dict_loaded.items():
            assert np.allclose(norms, E2dNdE_dict[k]), (norms, E2dNdE_dict[k])
        
        # make sure gammas match
        assert np.all([k in gamma_dict for k in gamma_dict_loaded.keys()])
        for k, gammas in gamma_dict_loaded.items():
            assert np.allclose(gammas, gamma_dict[k]), (gammas, gamma_dict[k])

        # make sure seeds do not overlap
        overlapping_seeds = seed_values.intersection(set(seeds))
        if overlapping_seeds:
            raise ValueError('Found overlapping seeds: {}!'.format(overlapping_seeds))
        seed_values = seed_values.union(set(seeds))
        
        # append tau values from this file
        for i, gamma in enumerate(gamma_dict[key]):
            for j, E2dNdE in enumerate(E2dNdE_dict[key]):
                tau_dict[key_s][gamma][E2dNdE].append(tau_matrix[i, j])
    
    # concatenate into single array
    for i, gamma in enumerate(gamma_dict[key]):
        for j, E2dNdE in enumerate(E2dNdE_dict[key]):
            tau_dict[key_s][gamma][E2dNdE] = np.concatenate(tau_dict[key_s][gamma][E2dNdE])

#### Investigate boundary issues

When applying correction, there is a boundary effect when ns goes beyond extrapolation end, at which the splines provide a constant value. In this range, the trials for the confidence value calculation are meaningless.

In [None]:
E0 = 100

def get_flux_norm(ns_values, catalog, gamma, E0=100, unit=1e3):
    ns_values = np.atleast_1d(ns_values)
    tr = get_catalog_tr(ana=ana, catalog=catalog, gamma=gamma)
    norm_values = [tr.to_E2dNdE(ns, E0=E0, unit=unit) for ns in ns_values]
    return np.array(norm_values)

fig, ax = plt.subplots()
flux_gammas = np.linspace(2., 4, 20)
flux_norms = []
ns_values = [600, 2000, 3000]
for gamma in tqdm(flux_gammas, total=len(flux_gammas)):
    flux_norms.append(get_flux_norm(ns_values, catalog='unid', gamma=gamma, E0=E0))
flux_norms = np.stack(flux_norms, axis=1)

for i, ns in enumerate(ns_values):
    ax.plot(flux_gammas, flux_norms[i], label='$n_s$ = {}'.format(ns))

ax.set_ylim(0, 1.2e-11)
ax.axvline(3.3, ls='--', color='0.7', label='$\gamma$ = 3.3')
ax.legend()
ax.set_xlabel('Spectral index $\gamma$')
units_label = ' $\cdot 10^{-11}$'
ax.set_ylabel(
    '$\mathrm{E}^2 \cdot \mathrm{dN/dE}$'+ units_label + ' at {:.0f} TeV'.format(E0)  + 
    ' [$\mathrm{TeV} \, \mathrm{s}^{-1} \, \mathrm{cm}^{-2}$]')
ax.grid()
fig.savefig('{}/spline_boundary_effect.png'.format(plot_dir))


#### Make critical value plot

In [None]:
def check_correction(bias_corr_funcs, catalog):
    if bias_corr_funcs is None:
        print('Not applying correction')
        apply_correction = False
        bias_corr_func_kw = None
    else:
        print('Applying correction')
        apply_correction = True
        bias_corr_func_kw = bias_corr_funcs[catalog]
    return apply_correction, bias_corr_func_kw
    
def compute_tau_observed_matrix(
            catalog, gammas, norms,
            bias_corr_funcs=None,
            cpus=20,
            E0=100, unit=1e3,
        ):
    
    apply_correction, bias_corr_func_kw = check_correction(bias_corr_funcs, catalog)
        
    # create matrix of tau values: [gammas, norm, trials]
    n_gammas = len(gammas)
    n_norms = len(norms)
    
    tau_observed_matrix = np.zeros((n_gammas, n_norms))
    
    for i, gamma in enumerate(tqdm(gammas, total=n_gammas)):
                           
        
        # get trial runner
        tr = get_catalog_tr(ana=ana, catalog=catalog, gamma=gamma)
        
        arg_list = [(42, [E2dNdE], gamma, catalog, E0, unit, bias_corr_func_kw, None, min_red_factor, max_k, True) 
                    for E2dNdE in norms]
        if cpus > 1:
            with Pool(cpus) as p:
                tau_observed_i = list(tqdm(p.imap(compute_trial_i, arg_list), total=len(arg_list)))
            tau_observed_matrix[i] =  np.squeeze(tau_observed_i)
        else:            
            for j, norm in enumerate(tqdm(norms, total=n_norms)):

                # get tau value for observed data
                tau_observed_i = get_critical_value_trial(
                    E2dNdE=norm, gamma=gamma, tr=tr, tr_inj=tr, E0=E0, unit=unit, seed=42, TRUTH=True, 
                    bias_corr_func=bias_corr_func_kw,
                )
                tau_observed_matrix[i, j] =  tau_observed_i
            
    return tau_observed_matrix, gammas, norms


def make_critical_value_plot(
            tau_dict, catalog,
            bias_corr_funcs=None,
            confidence_levels=[0.68, 0.9, 0.95],
            E0=100, unit=1e3,
            mask_ns_region_above=2000,
        ):
    
    apply_correction, bias_corr_func_kw = check_correction(bias_corr_funcs, catalog)
        
    key_s = (catalog, apply_correction)
    tau_dict_i = tau_dict[key_s]
    
    # create matrix of tau values: [gammas, norm, trials]
    gammas = np.sort(list(tau_dict_i.keys()))
    norms = np.sort(list(tau_dict_i[gammas[0]].keys()))
    
    n_gammas = len(gammas)
    n_norms = len(norms)
    n_trial = len(tau_dict_i[gammas[0]][norms[0]])
    
    tau_matrix = np.zeros((n_gammas, n_norms, n_trial))
    pval_matrix = np.zeros((n_gammas, n_norms))
    
    # compute observed tau for each bin
    tau_observed_matrix, _, _ = compute_tau_observed_matrix(
        catalog=catalog, bias_corr_funcs=bias_corr_funcs, 
        gammas=gammas, norms=norms,
    )
    
    for i, gamma in enumerate(gammas):
        
        if mask_ns_region_above is None:
            valid_region = np.ones_like(norms, dtype=bool)
        else:
            norm_bound = get_flux_norm(
                ns_values=mask_ns_region_above, catalog=catalog, gamma=gamma, E0=E0, unit=unit)
            valid_region = np.array([n < norm_bound for n in norms])
            
        for j, (norm, valid) in enumerate(zip(norms, valid_region)):
            if valid:
                tau_matrix[i, j, :] = tau_dict_i[gamma][norm]
                pval_matrix[i, j] = np.sum(tau_matrix[i, j] < tau_observed_matrix[i, j]) * 1. / n_trial
            else:
                tau_matrix[i, j, :] = np.nan
                pval_matrix[i, j] = np.nan
            
    return tau_matrix, tau_observed_matrix, pval_matrix, gammas, norms



#### Compute Matrices

In [None]:
matrices_dict = {}
for catalog in ['unid', 'pwn', 'snr']:
#for catalog in ['unid']:
    tau_matrix, tau_observed_matrix, pval_matrix, gammas, norms = (
        make_critical_value_plot(tau_dict=tau_dict, catalog=catalog, bias_corr_funcs=bias_corr_funcs)
    )
    
    matrices_dict[catalog] = {
        'tau_matrix': tau_matrix, 
        'tau_observed_matrix': tau_observed_matrix, 
        'pval_matrix': pval_matrix, 
        'gammas': gammas, 
        'norms': norms,
    }
    
    if False:
        tau_observed_matrix_full, gammas_full, norms_full = compute_tau_observed_matrix(
            catalog=catalog, bias_corr_funcs=bias_corr_funcs, 
            gammas=np.linspace(2.2, 3.35, 2), norms=np.linspace(1e-13, 13e-12, 2),
            cpus=30,
        )
        
        matrices_dict[catalog]['tau_observed_matrix_full'] = tau_observed_matrix_full
        matrices_dict[catalog]['gammas_full'] = gammas_full
        matrices_dict[catalog]['norms_full'] = norms_full


#### Add another Wilk's based Contour?

In [None]:
#for catalog in ['unid', 'pwn', 'snr']:
for catalog in ['pwn', 'snr']:
    if False:
        tau_observed_matrix_full, gammas_full, norms_full = compute_tau_observed_matrix(
            catalog=catalog, bias_corr_funcs=bias_corr_funcs, 
            gammas=np.linspace(2.2, 3.7, 24), norms=np.linspace(1e-13, 18e-12, 50),
            cpus=30,
        )
        
        matrices_dict[catalog]['tau_observed_matrix_full'] = tau_observed_matrix_full
        matrices_dict[catalog]['gammas_full'] = gammas_full
        matrices_dict[catalog]['norms_full'] = norms_full


#### Make Plots

In [None]:
import matplotlib
from scipy.interpolate import UnivariateSpline, interp2d
from scipy import optimize


def draw_critical_value_plot(
            pval_matrix, gammas, norms, res_dict,
            E0=100, unit=1e3, unit_multiplier=1e-11,
            fig=None, ax=None,
            confidence_levels=[0.68, 0.9, 0.95],
            bounds=np.linspace(0, 1, 21),
            confidence_colors=['1.', '1.', '1.'],
            confidence_ls=['-', '--', '-.'],
            confidence_labels=None,
            cb_label=r'Fraction of $\tau_\mathrm{fit} < \tau_\mathrm{observed}$',
            cax=None,
            xlabel='Spectral index $\gamma$',
            ylabel='<FILL>',
            plot_best_fit=True, plot_minimum=True,
        ):
    cmap = plt.cm.viridis
    cmaplist = [cmap(i) for i in range(cmap.N)[::-1]]
    cmap = mpl.colors.LinearSegmentedColormap.from_list(
        'Custom cmap', cmaplist, cmap.N)

    # define the bins and normalize
    norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
    
    if unit_multiplier == 1e-11:
        units_label = ' $\cdot 10^{-11}$'
        norms = np.array(norms) / 1e-11
    elif unit_multiplier is None:
        units_label = ''
    else:
        raise ValueError(unit_multiplier)
    
    gamma_width = np.diff(gammas) * 0.5
    assert np.allclose(gamma_width, gamma_width[0]), gamma_width
    gamma_width = gamma_width[0]
    
    norm_width = np.diff(norms) * 0.5
    assert np.allclose(norm_width, norm_width[0]), gamma_width
    norm_width = norm_width[0]
    
    x_bins = np.array([g - gamma_width for g in gammas] + [gammas[-1] + gamma_width])
    y_bins = np.array([n - norm_width for n in norms] + [norms[-1] + norm_width])
    grid_x, grid_y = np.meshgrid(x_bins, y_bins)
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(9, 6))
    pc_gamma = ax.pcolormesh(grid_x, grid_y, pval_matrix.T, cmap=cmap, norm=norm)
    contours = ax.contour(
        gammas, norms, pval_matrix.T, levels=confidence_levels, colors=confidence_colors, 
        linestyles=confidence_ls,
    )
    ax.clabel(contours, inline=1, fontsize=10)
    if confidence_labels is not None:
        for color, ls, label, in zip(confidence_colors, confidence_ls, confidence_labels):
            ax.plot(np.nan, np.nan, color=color, ls=ls, label=label)
    if cax is None:
        cb_gamma = fig.colorbar(pc_gamma, ax=ax)
    else:
        cb_gamma = fig.colorbar(pc_gamma, cax=cax)
    cb_gamma.set_label(cb_label)
    
    # plot analysis best fit
    tr = get_catalog_tr(ana=ana, catalog=catalog, gamma=res_dict[catalog][2])
    best_fit_norm = tr.to_E2dNdE(res_dict[catalog][1], E0=E0, unit=unit) / unit_multiplier
    if plot_best_fit:
        ax.scatter(
            res_dict[catalog][2], best_fit_norm, marker='x', color='red', 
            label='$\gamma$: {:3.2f} | $\Phi$: {:3.2e} | uncorrected'.format(
                res_dict[catalog][2], best_fit_norm * unit_multiplier),
        )
    
    # compute minimum
    spline2d = interp2d(x=gammas, y=norms, z=pval_matrix.T, kind='cubic')
    
    def loss(args):
        gamma, norm = args
        return spline2d(gamma, norm)
    res = optimize.minimize(loss, x0=(res_dict[catalog][2], best_fit_norm))
    min_gamma = res.x[0]
    min_norm = res.x[1] * unit_multiplier

    if plot_minimum:
        ax.scatter(
            res.x[0], res.x[1], marker='^', color='white', 
            label='$\gamma$: {:3.2f} | $\Phi$: {:3.2e} | bias-corrected'.format(min_gamma, min_norm),
        )
    
    ax.set_xlabel(xlabel)
    if ylabel == '<FILL>':
        ylabel = (
            '$\mathrm{E}^2 \cdot \mathrm{dN/dE}$'+ units_label + ' at {:.0f} TeV'.format(E0)  + 
            ' [$\mathrm{TeV} \, \mathrm{s}^{-1} \, \mathrm{cm}^{-2}$]')
    ax.set_ylabel(ylabel)
    
    return fig, ax, min_gamma, min_norm


def find_intersection2(spline, x0, cl):
    """Find intersection points based on splines"""
    x0 = np.atleast_1d(x0)
    def fun(x):
        return spline(x) - cl
    sol = optimize.root(fun, x0=x0)
    return np.sort(np.unique(sol.x.round(decimals=4)))

def find_intersection(spline, x0, x_bounds, cl, n_steps=10000, eps=1e-4):
    """Find intersection points based on splines
    
    Brute force in provided range, assumes one intersection
    is left and one right of x0
    """
    x_bounds = np.sort(x_bounds)
    intersections = []
    for bound in x_bounds:
        x = np.linspace(bound, x0, n_steps)
        y = (spline(x) - cl)**2
        y_min = np.min(y)
        if y_min < eps:
            intersections.append(x[np.argmin(y)])
        else:
            intersections.append(np.nan)
    return np.sort(intersections)

def draw_combined_confidence_plot(
            pval_matrix, gammas, norms, res_dict,
            E0=100, unit=1e3, unit_multiplier=1e-11,
            fig=None, ax=None, figsize=(12, 9),
            confidence_levels=[0.68, 0.9, 0.95],
            confidence_names=['68% CL', '90% CL', '95% CL'],
            bounds=np.linspace(0, 1, 21),
            confidence_colors=['1.', '1.', '1.'],
            confidence_ls=['-', '--', '-.'],
            confidence_labels=None,
            cb_label=r'Fraction of $\tau_\mathrm{fit} < \tau_\mathrm{obs}$',
            xlabel='Spectral index $\gamma$',
            plot_spline=True,
            replace_pval_matrix_nans=1.0,
        ):
    
    if replace_pval_matrix_nans is not None:
        pval_matrix = np.array(pval_matrix)
        pval_matrix[~np.isfinite(pval_matrix)] = 1.0
        
    if unit_multiplier == 1e-11:
        units_label = ' $\cdot 10^{-11}$'
        norms_mod = np.array(norms) / 1e-11
    elif unit_multiplier is None:
        units_label = ''
        norms_mod = np.array(norms)
    else:
        raise ValueError(unit_multiplier)
        
    if unit == 1e3:
        ylabel = (
            '$\mathrm{E}^2 \cdot \mathrm{dN/dE}$'+ units_label + ' at {:.0f} TeV'.format(E0)  + 
            ' [$\mathrm{TeV} \, \mathrm{s}^{-1} \, \mathrm{cm}^{-2}$]')
    else:
        raise ValueError(unit)
        
    norm_bounds = (np.min(norms_mod), np.max(norms_mod))
    gamma_bounds = (np.min(gammas), np.max(gammas))
        
    fig = plt.figure(figsize=figsize)
    gs = matplotlib.gridspec.GridSpec(2, 3, width_ratios=[3, 10, 0.5], height_ratios=[10, 3])
    ax_2d = fig.add_subplot(gs[0, 1])
    ax_cb = fig.add_subplot(gs[0, 2])
    ax_norm = fig.add_subplot(gs[0, 0], sharey=ax_2d)
    ax_gamma = fig.add_subplot(gs[1, 1], sharex=ax_2d)
    ax_text = fig.add_subplot(gs[1, 0])
    axes = [ax_2d, ax_gamma, ax_norm, ax_cb, ax_text]
    
    ax_text.axis('off')
    result_str = ''
    
    plt.setp(ax_2d.get_yticklabels(), visible=False)
    plt.setp(ax_2d.get_xticklabels(), visible=False)
    #ax_2d.axes.xaxis.set_ticklabels([]) # note due to share axis, this will apply to all
    #ax_2d.axes.yaxis.set_ticklabels([]) # note due to share axis, this will apply to all
    
    _, _, min_gamma, min_norm = draw_critical_value_plot(
        pval_matrix=pval_matrix, 
        gammas=gammas, 
        norms=norms, 
        res_dict=res_dict,
        E0=E0, unit=unit, unit_multiplier=unit_multiplier,
        fig=fig, ax=ax_2d, cax=ax_cb,
        confidence_levels=confidence_levels,
        bounds=bounds,
        confidence_colors=confidence_colors,
        confidence_ls=confidence_ls,
        confidence_labels=confidence_labels,
        cb_label=cb_label,
        xlabel=None,
        ylabel=None,
    )
    ax_2d.legend()
    
    
    # ----------------------------
    # 1D contour for normalization
    # ----------------------------
    matrix_norm = np.min(pval_matrix, axis=0)
    ax_norm.plot(matrix_norm, norms_mod)
    ax_norm.set_xlabel(cb_label)
    
    # fit spline
    spline = UnivariateSpline(x=norms_mod, y=matrix_norm, s=len(norms)*0.0002)
    if plot_spline:
        ax_norm.plot(spline(norms_mod), norms_mod, ls='--', label='Spline-Fit')
    
    # find intersections
    result_str += '$\Phi_\mathrm{corrected}$ = ' + '{:3.2e}\n'.format(min_norm)
    for cl, ls, cname in zip(confidence_levels, confidence_ls, confidence_names):
        intersections = find_intersection(
            spline=spline, x0=min_norm / unit_multiplier, x_bounds=norm_bounds, cl=cl)
        for intersection_i in intersections:
            ax_norm.axhline(intersection_i, color='0.8', ls=ls)
        result_str += '  {} | +{:3.2e} -{:3.2e}\n'.format(
            cname, 
            (intersections[1] * unit_multiplier - min_norm),
            (min_norm - intersections[0]*unit_multiplier),
        )
        ax_norm.axhline(
            np.inf, color='0.8', ls=ls, label='{}: [{:3.2e}, {:3.2e}]'.format(
                cname, *intersections * unit_multiplier))
    ax_norm.legend(loc='center right', fontsize=6)
    result_str += '\n'
    
    # ----------------------------
    # 1D contour for gamma
    # ----------------------------
    matrix_gamma = np.min(pval_matrix, axis=1)
    ax_gamma.plot(gammas, matrix_gamma)
    ax_gamma.set_ylabel(cb_label)
    
    # fit spline
    spline = UnivariateSpline(x=gammas, y=matrix_gamma, s=len(norms)*0.00005)
    if plot_spline:
        ax_gamma.plot(gammas, spline(gammas), ls='--', label='Spline-Fit')
    
    # find intersections
    result_str += '$\gamma_\mathrm{corrected}$ = ' + '{:3.2f}\n'.format(min_gamma)
    for cl, ls, cname in zip(confidence_levels, confidence_ls, confidence_names):
        intersections = find_intersection(spline=spline, x0=min_gamma, x_bounds=gamma_bounds, cl=cl)
        for intersection_i in intersections:
            ax_gamma.axvline(intersection_i, color='0.8', ls=ls)
        result_str += '  {} | +{:3.2f} -{:3.2f}\n'.format(
            cname, intersections[1] - min_gamma, min_gamma - intersections[0],
        )
        ax_gamma.axvline(
            np.inf, color='0.8', ls=ls, label='{}: [{:3.2f}, {:3.2f}]'.format(
                cname, *intersections))
    ax_gamma.legend(loc='upper center', fontsize=6)
    # ----------------------------
    
    ax_text.text( 
        -.2, 0, result_str,
        ha='left', va='bottom',
        transform=ax_text.transAxes,
        fontsize=8,
        bbox=dict(facecolor='none', edgecolor='0.3', boxstyle='round,pad=1'),
    )
    
    ax_gamma.set_xlabel(xlabel)
    ax_norm.set_ylabel(ylabel)
    ax_norm.set_ylim(norm_bounds)
    ax_gamma.set_xlim(gamma_bounds)
    
    return fig, axes



In [None]:
if apply_correction:
    bias_corr_funcs_kw = bias_corr_funcs
    file_suffix = '_corrected'
else:
    bias_corr_funcs_kw = None
    file_suffix = ''

if add_systematics:
    file_suffix += '_sys_red_{:1.3f}_k_{}'.format(min_red_factor, max_k)

for catalog, res in matrices_dict.items():
    fig, axes = draw_combined_confidence_plot(
        pval_matrix=res['pval_matrix'], gammas=res['gammas'], norms=res['norms'], res_dict=res_dict)
    axes[0].set_title('Stacking Catalog: {}'.format(catalog))
    fig.savefig('{}/confidence_2d_{}{}.png'.format(plot_dir, catalog, file_suffix))

    fig, axes = draw_combined_confidence_plot(
        pval_matrix=res['tau_observed_matrix'], gammas=res['gammas'], norms=res['norms'],
        bounds=np.linspace(0, 10, 21), confidence_levels=[2.28, 4.6, 6.],
        confidence_labels=["Wilk's 68%", "Wilk's 90%", "Wilk's 95%"],
        cb_label=r'Test-statistic $\tau$',
        res_dict=res_dict,
    )
    axes[0].set_title("Stacking Catalog: {} | Utilizing Wilk's Theorem".format(catalog))
    axes[0].legend()
    fig.savefig('{}/confidence_2d_{}{}_wilks.png'.format(plot_dir, catalog, file_suffix))
    
    if 'tau_observed_matrix_full' in res:
        fig, axes = draw_combined_confidence_plot(
            pval_matrix=res['tau_observed_matrix_full'], gammas=res['gammas_full'], norms=res['norms_full'],
            bounds=np.linspace(0, 10, 21), confidence_levels=[2.28, 4.6, 6.],
            confidence_labels=["Wilk's 68%", "Wilk's 90%", "Wilk's 95%"],
            cb_label=r'Test-statistic $\tau$',
            res_dict=res_dict,
        )
        axes[0].legend()
        axes[0].set_title("Stacking Catalog: {} | Utilizing Wilk's Theorem".format(catalog))
        fig.savefig('{}/confidence_2d_{}{}_wilks_full.png'.format(plot_dir, catalog, file_suffix))
        

In [None]:
bins = np.linspace(0, 20, 10)
bins = 10
fig, ax = plt.subplots(figsize=(12, 12))
for i, gamma in enumerate(gammas):
    if i < len(gammas) - 2: continue
    for j, norm in enumerate(norms):
        if j % 5 != 0: continue
        ax.hist(
            tau_matrix[i, j], bins=bins, 
            label='$\gamma$ = {:3.2f} | $\Phi$ = {:3.3e}'.format(gamma, norm), 
            histtype='step',
        )
ax.legend(fontsize=6)
