In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

#PDFs in BDT and sindec?
import os

# set env flags to catch BLAS used for scipy/numpy 
# to only use 1 cpu, n_cpus will be totally controlled by csky
if False:
    os.environ['MKL_NUM_THREADS'] = "1"
    os.environ['NUMEXPR_NUM_THREADS'] = "1"
    os.environ['OMP_NUM_THREADS'] = "1"
    os.environ['OPENBLAS_NUM_THREADS'] = "1"
    os.environ['VECLIB_MAXIMUM_THREADS'] = "1"

import matplotlib as mpl
mpl.rcParams['figure.facecolor'] = 'w'
mpl.rcParams['savefig.facecolor'] = 'w'
import matplotlib.pyplot as plt
from matplotlib import colors, cm
import csky as cy
from csky import cext
import numpy as np
import astropy
#from icecube import astro
import histlite as hl
import healpy
import healpy as hp
import socket
import pickle
import copy
healpy.disable_warnings()
plt.rc('figure', facecolor = 'w')
plt.rc('figure', dpi=100)

## Define Settings

In [None]:
selection_version = 'version-001-p00'

host_name = socket.gethostname()

if 'cobalt' in host_name:
    print('Working on Cobalts')
    data_prefix = '/data/user/ssclafani/data/cscd/final'
    ana_dir = '/data/user/ssclafani/data/analyses/'
    plot_dir = '/data/user/mhuennefeld/data/analyses/DNNCascadeCodeReview/unblinding_checks/plots/unblinding/trials_distribution_check'

else:
    raise ValueError('Unknown host:', host_name)

In [None]:
for dir_path in [plot_dir]:
    if not os.path.exists(dir_path):
        print('Creating directory:', dir_path)
        os.makedirs(dir_path)

## Load Data

In [None]:
repo = cy.selections.Repository()
specs = cy.selections.DNNCascadeDataSpecs.DNNC_10yr

In [None]:
%%time

ana = cy.get_analysis(
    repo, selection_version, specs, 
    #gammas=np.r_[0.1:6.01:0.125],
)

In [None]:
a = ana.anas[0]
a.sig

In [None]:
a.bg_data

## Helpers

In [None]:
from cycler import cycle
from copy import deepcopy

soft_colors = cy.plotting.soft_colors
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


def get_bias_allt(tr, ntrials=200, n_sigs=np.r_[:101:10], quiet=False):
    trials = [
        (None if quiet else print(f'\r{n_sig:4d} ...', end='', flush=True))
        or
        tr.get_many_fits(ntrials, n_sig=n_sig, logging=False, seed=n_sig)
        for n_sig in n_sigs]
    if not quiet:
        print()
    for (n_sig, t) in zip(n_sigs, trials):
        t['ntrue'] = np.repeat(n_sig, len(t))
    allt = cy.utils.Arrays.concatenate(trials)
    return allt

def get_color_cycler():
    return cycle(colors)

def plot_ns_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)

    h = hl.hist((allt.ntrue, allt.ns), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(ax.set_ylim(lim))
    ax.plot(lim, lim, **expect_kw)
    ax.set_aspect('equal')

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$n_s$')
    ax.grid()

def plot_gamma_bias(ax, tr, allt, label=''):

    n_sigs = np.unique(allt.ntrue)
    dns = np.mean(np.diff(n_sigs))
    ns_bins = np.r_[n_sigs - 0.5*dns, n_sigs[-1] + 0.5*dns]
    expect_kw = dict(color='C0', ls='--', lw=1, zorder=-10)
    expect_gamma = tr.sig_injs[0].flux[0].gamma

    h = hl.hist((allt.ntrue, allt.gamma), bins=(ns_bins, 100))
    hl.plot1d(ax, h.contain_project(1),errorbands=True, 
              drawstyle='default', label=label)
    lim = ns_bins[[0, -1]]
    ax.set_xlim(lim)
    ax.set_ylim(1, 4)
    ax.axhline(expect_gamma, **expect_kw)

    ax.set_xlabel(r'$n_{inj}$')
    ax.set_ylabel(r'$\gamma$')
    ax.grid()

def plot_bkg_trials(
            bg, fig=None, ax=None, 
            label='{} bg trials', 
            label_fit=r'$\chi^2[{:.2f}\mathrm{{dof}},\ \eta={:.3f}]$', 
            color=colors[0],
            density=False,
            bins=50,
        ):
    if ax is None:
        fig, ax = plt.subplots(figsize=(6, 4))
    
    if density:
        h = bg.get_hist(bins=bins).normalize()
    else:
        h = bg.get_hist(bins=bins)
    if label is not None:
        label = label.format(bg.n_total)
    hl.plot1d(ax, h, crosses=True, color=color, label=label)

    # compare with the chi2 fit:
    if hasattr(bg, 'pdf'):
        x = h.centers[0]
        norm = h.integrate().values
        if label_fit is not None:
            label_fit = label_fit.format(bg.ndof, bg.eta)
        if density:
            ax.semilogy(x, bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)
        else:
            ax.semilogy(x, norm * bg.pdf(x), lw=1, ls='--', label=label_fit, color=color)

    ax.set_xlabel(r'TS')
    if density:
        ax.set_ylabel(r'Density')
    else:
        ax.set_ylabel(r'number of trials')
    ax.legend()
        
    return fig, ax

## Setup Analysis

In [None]:
import sys
sys.path.insert(0, '../..')

import config as cg

cg.base_dir = '/data/user/mhuennefeld/data/analyses/unblinding_v1.0.0/'

#### Get bkg trials

In [None]:
bgfile = os.path.join(cg.base_dir, 'ps/trials/DNNC/bg.dict')
bg_trials = np.load(bgfile, allow_pickle=True)['dec']
bg_trials

#### Plot ts distribution

In [None]:
plt.hist(bg.ns, bins=100)
plt.yscale('log')

#### Correlation of ns vs TS

ns is well correlated against TS, if conditional on gamma

In [None]:
gammas = np.linspace(1., 4, 7)
gamma_width = 0.25

fig, ax = plt.subplots()
bins = (np.linspace(0, 400, 30), np.linspace(0, 30, 30))
for i, gamma in enumerate(gammas):
    mask = np.logical_and(
        bg.gamma > gamma - gamma_width,
        bg.gamma < gamma + gamma_width,
    )
    ax.scatter(bg.ns[mask], bg.ts[mask], label='$\gamma$ = {:3.2f} $\pm$ {:3.2f}'.format(gamma, gamma_width))
ax.legend(loc='lower right')
ax.set_xlabel('NS')
ax.set_ylabel('TS')



In [None]:
from cycler import cycle

dec_degs = [-43]
gammas = np.linspace(1., 4, 4)
nsigmas = [3]#[3, 5]
ns_bins = [0, np.inf]
gamma_width = 0.25

bins = np.linspace(0, 30, 50)

for dec_deg in dec_degs:
    
    fig, ax = plt.subplots(figsize=(9, 6))
    
    color_cycle = cycle(cy.plotting.soft_colors)
    
    color = next(color_cycle)
    bg = bg_trials[dec_deg]
    bg_tsd = cy.dists.TSD(bg)
    plot_bkg_trials(bg_tsd, density=True, bins=bins, color=color, fig=fig, ax=ax)

    for nsigma, ls in zip(nsigmas, ['--', '-']):
        ts_val = bg_tsd.isf_nsigma(nsigma)
        ax.axvline(
            ts_val, ls=ls, lw=1, color=color,
            label='{}-sigma TS: {:3.3f}'.format(nsigma, ts_val), 
        )
    
    for i, gamma in enumerate(gammas):
        color = next(color_cycle)
        mask = np.logical_and(
            bg.gamma > gamma - gamma_width,
            bg.gamma < gamma + gamma_width,
        )
        for j in range(len(ns_bins) - 1):
            mask_ns = np.logical_and(
                bg.ns >= ns_bins[j],
                bg.ns < ns_bins[j + 1],
            )
            ns = np.mean(ns_bins[j:j+2])
            mask = np.logical_and(mask, mask_ns)
            print('ns:', np.sum(mask_ns), np.sum(mask))
            if np.sum(mask) > 100:
            
                bg_tsd_gamma = cy.dists.TSD(bg[mask])
                plot_bkg_trials(
                    bg_tsd_gamma, density=True, bins=bins, color=color, 
                    fig=fig, ax=ax,
                    label='{} bg trials' + ' [$\gamma$={:3.2f} $\pm$ {:3.2f}]'.format(gamma, gamma_width),
                )

                for nsigma, ls in zip(nsigmas, ['--', '-']):
                    print('Gamma: {:3.2f} | ns: {:3.2f} | From {:3.2f} to {:3.2f}'.format(
                        gamma, ns, nsigma, bg_tsd_gamma.sf_nsigma(ts_val)))
                    ts_val = bg_tsd_gamma.isf_nsigma(nsigma)
                    ax.axvline(
                        ts_val, ls=ls, lw=1, color=color,
                        label='{}-sigma TS: {:3.3f}'.format(nsigma, ts_val), 
                    )
        

    ax.set_title('Declination: {:3.2f}°'.format(dec_deg))
    ax.set_yscale('log')
    ax.legend()
    fig.savefig('{}/ts_distribution.png'.format(plot_dir))

In [None]:
for key, bg in bkg_dict.items():
    bg_tsd = cy.dists.TSD(bg)
    fig, ax = plot_bkg_trials(bg_tsd)
    ts = res_dict[key][0]
    ns = res_dict[key][1]
    ax.axvline(
        ts, color='0.8', ls='--', lw=2,
        label='TS: {:3.3f} | ns: {:3.1f}'.format(ts, ns), 
    )
    ts_5sig = bg_tsd.isf_nsigma(5)
    ax.axvline(
        ts_5sig, ls='--', lw=1,
        label='5-sigma TS: {:3.3f}'.format(ts_5sig), 
    )
    ax.set_title('Template: {}'.format(key))
    ax.set_yscale('log')
    ax.legend()
    fig.savefig('{}/ts_dist_{}.png'.format(plot_dir, key))