Testing the whole production using package methods

In [None]:
# from importlib import reload
import os
import math
import helper as hlp
from IPython.display import FileLink
from time import time
import numpy as np
import numpy.lib.recfunctions

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mpldates
import matplotlib.gridspec as gridspec
from matplotlib.colors import LogNorm, Normalize
%matplotlib inline

import scipy.interpolate as sci
import scipy.optimize as sco
import scipy.integrate as scint
import scipy.stats as scs
import scipy.signal as scsignal

from tqdm import tqdm_notebook as tqdm
import json
import datetime
import pickle
from astropy.time import Time as astrotime
from corner import corner

import sklearn.neighbors as skn
import sklearn.model_selection as skms  # Newer version of grid_search
from sklearn.utils import check_random_state

from anapymods3.plots.general import (split_axis, get_binmids, dg,
                                      hist_marginalize, hist_from_counts)
from anapymods3.stats.sampling import rejection_sampling
from anapymods3.general.misc import (fill_dict_defaults,
                                     flatten_list_of_1darrays)
import anapymods3.plots.astro as amp_plt
from anapymods3.healpy import wrap_theta_phi_range

import tdepps.bg_injector as BGInj
import tdepps.bg_rate_injector as BGRateInj
import tdepps.rate_function as RateFunc
import tdepps.llh as LLH
import tdepps.analysis as Analysis
import tdepps.signal_injector as SigInj
from tdepps.utils import rejection_sampling, func_min_in_interval, rotator

# from corner_hist import corner_hist
# from skylab.utils import FitDeltaChi2
secinday = 24. * 60. * 60.

# Load data

Load IC86 data from epinat, which should be the usual IC86-I (2011) PS sample, but pull corrected and OneWeights corrected by number of events generated.

We applay a sigma cut, to remove badly reconstructed events.
Use `_exp`and `_mc` for the cells below.

In [None]:
exp = np.load("data/IC86_I_data.npy")
mc = np.load("data/IC86_I_mc.npy")
# Use the officially stated livetime, not the ones from below
livetime = 332.61

In [None]:
# Make a global sigma cut (only removes a handful of badly reconstructed evts)
_mc = mc[mc["sigma"] < np.deg2rad(20)]
_exp = exp[exp["sigma"] < np.deg2rad(20)]

# `sample` is used as a wrapper for plotting, where it is sometimes easier to
# have a normal array. Shape is (nevts, nfeatures), each row is a data point
sample = np.vstack((_exp["logE"], _exp["dec"], _exp["sigma"], _exp["ra"])).T
mc_sample = np.vstack((_mc["logE"], _mc["dec"], _mc["sigma"], _mc["ra"])).T

# Test Production Modules

Test if the modules work correctly.
They contain the same code as in the main test notebook, but can be used as classes.
This should simplyfy production.

Currently each submodul only does a very special task:

- `bg_injector`: Samples ("injects") backgorund events for trials
- `bg_rate_injector`: Samples ("injects") the number of BG events to be injected per trial.
- `rate_function`: Describes the time depence of the background rate.
- `llh`: Implements the likelihood function and signal and background PDFs.
- `signal_injector`: Same as `bg_injector` but injecting signal evts from MC.
- `analysis`: Main module pulling it all together, making trials, fitting llhs, provides methods for advanced tasks.

## BG Injector

Injects information for background-like events.

In [None]:
# Setup for tests in this chapter
n_samples = int(1e5)
rndgen = np.random.RandomState(7353)

xlabel = ["logE", "dec", "logE", "dec"]
ylabel = ["dec", "sigma", "sigma", "ra"]

axes = [[0, 1], [1, 2], [0,2], [1, 3]]

### Data Resampling

In [None]:
data_inj = BGInj.DataBGInjector(random_state=rndgen)
data_inj.fit(_exp)
data_sam = data_inj.sample(n_samples)
X_names = data_inj._X_names + ["ra"]

# shape (n_samples, n_features) for plotting
_d_sam = np.vstack((data_sam[n] for n in X_names)).T
for i, axis in enumerate(axes):
    fig, (al, ar) = hlp.hist_comp(sample[:, axis], _d_sam[:, axis])
    al.set_xlabel(xlabel[i])
    ar.set_xlabel(xlabel[i])
    al.set_ylabel(ylabel[i])
    ar.set_ylabel(ylabel[i])
    al.set_title("Data")
    ar.set_title("Data sample: {} evts from original Data".format(
        len(_d_sam)))
    plt.show()

### Adaptive Width KDE sampling

In [None]:
# Assign model from CV, which has already evaluated adaptive kernels.
# Otherwise we would have to reevaluate which takes a long time.
# This should be an official option, to set KDE values for datasets.
with open("data/awKDE_CV/CV10_glob_bw_alpha_EXP_IC86I_CUT_sig.ll.20_" +
          "PARS_diag_True_pass2.pickle", "rb") as f:
    model_selector = pickle.load(f)
    print(model_selector.best_params_)

# Pickled KDE was created in a different version, so recomnstruct using the
# params to avoid refitting (DON'T USE PICKLE!)
alpha = model_selector.best_params_["alpha"]
glob_bw = model_selector.best_params_["glob_bw"]
diag_cov = model_selector.best_estimator_.diag_cov
    
kde_inj = BGInj.KDEBGInjector(alpha=alpha, glob_bw=glob_bw,
                              diag_cov=diag_cov, random_state=rndgen)
kde_inj._kde_model._kde_values = model_selector.best_estimator_._kde_values

# Fit doesn't take long because all adaptive kernels are set.
# Note: The original order cannot be changed now [logE, dec, sigma]
bounds = np.array([[None, None], [-np.pi / 2. , np.pi / 2.], [0, None]])
kde_inj.fit(_exp, bounds)

# Sample (bounds are preventing spillover in undefined regions)
kde_sam = kde_inj.sample(n_samples)
X_names = kde_inj._X_names + ["ra"]
_kde_sam = np.vstack((kde_sam[n] for n in X_names)).T

for i, axis in enumerate(axes):
    fig, (al, ar) = hlp.hist_comp(sample[:, axis], _kde_sam[:, axis])
    al.set_xlabel(xlabel[i])
    ar.set_xlabel(xlabel[i])
    al.set_ylabel(ylabel[i])
    ar.set_ylabel(ylabel[i])
    al.set_title("Data")
    ar.set_title("KDE sample: {} evts".format(len(_kde_sam)))
    plt.show()

### GRBLLH style

In [None]:
# If False, only sample where data was
# If True sample in global min/max bounding box
minmax = True

mrinj = BGInj.MRichmanBGInjector(random_state=rndgen)
ax0_bins, ax1_bins, ax2_bins = mrinj.fit(_exp, nbins=10, minmax=minmax)
mr_sam = mrinj.sample(n_samples=n_samples)
X_names = mrinj._X_names + ["ra"]
_mr_sam = np.vstack((mr_sam[n] for n in X_names)).T

for i, axis in enumerate(axes):
    fig, (al, ar) = hlp.hist_comp(sample[:, axis], _mr_sam[:, axis])
    al.set_xlabel(xlabel[i])
    ar.set_xlabel(xlabel[i])
    al.set_ylabel(ylabel[i])
    ar.set_ylabel(ylabel[i])
    al.set_title("Data")
    ar.set_title("Pseudo MR sample: {} evts".format(len(_mr_sam)))
    plt.show()

### Pseudo Data (uniform) sampling

In [None]:
uni_inj = BGInj.UniformBGInjector(random_state=rndgen)
uni_sam = uni_inj.sample(n_samples)
X_names = uni_inj._X_names + ["ra"]
_uni_sam = np.vstack([uni_sam[n] for n in X_names]).T

for i, axis in enumerate(axes):
    fig, (al, ar) = hlp.hist_comp(sample[:, axis], _uni_sam[:, axis])
    al.set_xlabel(xlabel[i])
    ar.set_xlabel(xlabel[i])
    al.set_ylabel(ylabel[i])
    ar.set_ylabel(ylabel[i])
    al.set_title("Data")
    ar.set_title("Pseudo (uniform) sample: {} evts".format(len(_uni_sam)))
    plt.show()

## BG Rate Injector

This module injects times of background like events.

### Injector created from runlist

First step is always to fit a RateFunction to rates from detector runs.
Here we use a Sinus1yrRateFunction with fixed period.

In [None]:
# Now parse the rundict and make the fitted injector from that
def filter_runs(run):
    """
    Filter runs as stated in jfeintzig's doc.
    """
    exclude_runs = [120028, 120029, 120030, 120087, 120156, 120157]
    if ((run["good_i3"] == True) & (run["good_it"] == True) &
        (run["run"] not in exclude_runs)):
        return True
    else:
        return False
    
# Create a rate function. We fix the period to 1 year here
rndgen = np.random.RandomState(7353)
rate_func_obj = RateFunc.Sinus1yrRateFunction(random_state=rndgen)
    
# Let's create an injector using a goodrun list. This creates a run dict
runlist = "data/runlists/ic86-i-goodrunlist.json"
runlist_inj = BGRateInj.RunlistBGRateInjector(rate_func_obj, runlist,
                                              filter_runs, rndgen)

# Fit function to exp times to runlist bins
times = exp["timeMJD"]
rate_func = runlist_inj.fit(T=times, x0=None, remove_zero_runs=True)

In [None]:
# Rebin (donglians proposal)
rates = runlist_inj._rate_rec
start_mjd = rates["start_mjd"]
stop_mjd = rates["stop_mjd"]

tmin, tmax = np.amin(start_mjd), np.amax(stop_mjd)
ntbins = 12
tbins = np.linspace(tmin, tmax, ntbins + 1)

# Get bin idx in which the runs fall
# This is not a 100% correct, because runs may be right over bin edges
idx = np.digitize(stop_mjd, tbins) - 1
rates_per_bin = np.zeros(ntbins, dtype=np.float)

evts_in_run = rates["nevts"]
dts = (stop_mjd - start_mjd) * secinday
for i in range(ntbins):
    rates_per_bin[i] = np.sum(evts_in_run[idx == i]) / np.sum(dts[idx == i])

In [None]:
# Plot runs (zorder, because errorbar seems to have high zorder for centers)
xerr = 0.5 * (stop_mjd - start_mjd)
yerr = rates["rate_std"]
binmids = 0.5 * (stop_mjd + start_mjd)

plt.errorbar(binmids, rates["rate"], xerr=xerr, yerr=yerr,
             fmt=",", alpha=0.25, zorder=0)
plt.ylim(0, None);

# Plot fit
t = np.linspace(start_mjd[0], stop_mjd[-1], 1000)
y = rate_func(t)
plt.plot(t, y, zorder=5, lw=2, color="C1")

# Plot y shift dashed to see baseline or years average
avg = runlist_inj.best_pars[2]
plt.axhline(avg, 0, 1, color="C1", ls="--", label="", lw=1.5)

plt.xlim(start_mjd[0], stop_mjd[-1])
plt.xlabel("MJD")
plt.ylabel("Rate in Hz")

# Show rebinned (as expected you see nothing new)
m = get_binmids([tbins])[0]
plt.errorbar(m, rates_per_bin, xerr=np.diff(tbins),
             fmt=",", lw=2, color="C2", zorder=3)

# plt.savefig("./data/figs/time_rate_sinus_rebinned.png", dpi=200)
plt.ylim(0, 0.009)
plt.tight_layout()
plt.show()

print("Best fit params:")
for par, name in zip(runlist_inj.best_pars, ["amp", "toff", "base"]):
    print(" {:5} : {:+.3g}".format(name, par))


Sample some trials for a single src and time with the poisson=True keyword to see if we sample correctly for each trial.

Also compare with poisson=False to see if it's working correctly.

In [None]:
rates = runlist_inj._rate_rec
start_mjd = rates["start_mjd"]

# Pick some random time and time frame
t = rndgen.choice(start_mjd, size=1)
trange = np.array([-120, 220])

# This is a list of times per trial
ntrials = int(1e4)
trials = []
for i in range(ntrials):
    trial = runlist_inj.sample(t, trange, poisson=True)
    # Make one array of times, because we have only one src here
    trials.append(flatten_list_of_1darrays(trial))

nevents = np.array(list(map(len, trials)))
print("Sampled total of {:d} events in {:d} trials.".format(
        np.sum(nevents), ntrials))

# Plot poisson distribution of nevents with expectation from integral
expect = runlist_inj.best_estimator_integral(t, trange)
_ = plt.hist(nevents, bins=np.arange(10), normed=True)
plt.axvline(expect, 0, 1, color="C1", ls="--", lw=2, label="expect")
x = np.arange(0, 10)
y = scs.poisson.pmf(x, mu=expect)
_ = plt.plot(x, y, "C1", lw=2, drawstyle="steps-post")
plt.legend()
plt.show()

# Now the same for possion=False as a crosscheck
trials = []
for i in range(ntrials):
    trial = runlist_inj.sample(t, trange, poisson=False)
    # Make one array of times, because we have only one src here
    trials.append(flatten_list_of_1darrays(trial))

nevents = np.array(list(map(len, trials)))
print("Sampled total of {:d} events in {:d} trials.".format(
        np.sum(nevents), ntrials))

# Plot poisson distribution of nevents with expectation from integral, here
# for comparison to the previous case only
expect = runlist_inj.best_estimator_integral(t, trange)
_ = plt.hist(nevents, bins=np.arange(10), normed=True)
plt.axvline(expect, 0, 1, color="C1", ls="--", lw=2, alpha=0.5,
            label="expect")
plt.axvline(np.round(expect), 0, 1, color="C1", ls="--", lw=2,
            label="round expect")
x = np.arange(0, 10)
y = scs.poisson.pmf(x, mu=expect)
_ = plt.plot(x, y, "C1", lw=2, drawstyle="steps-post")
plt.legend()
plt.show()

Now we do the same, but with multiple sources.
Each src gets a larger time window, so the expectation gets higher and we can compare different poisson distributions at once.

In [None]:
rates = runlist_inj._rate_rec
start_mjd = rates["start_mjd"]

# Pick random times and make increasing time frames per source
nsrcs = 3
t = rndgen.choice(start_mjd, size=nsrcs)
trange = np.vstack((np.repeat([-100], nsrcs),
                    500 * np.arange(1, 3 * nsrcs + 1, 3))).T

# This is a list of times per trial
ntrials = int(1e4)
trials = []
for i in range(ntrials):
    trial = runlist_inj.sample(t, trange, poisson=True)
    # Make one array of times, because we have only one src here
    trials.append(trial)

# The format of `trials` is list(array_src1, array_src2, ...) for each trial.
# We want the number of events sampled per src per trial
nevents = []
for i in range(nsrcs):
    nevents.append([len(trial[i]) for trial in trials])
    print("Sampled {:d} events in {:d} trials for src {:d}.".format(
          np.sum(nevents[i]), ntrials, i))

# Plot poisson distributions of nevents with expectations from integrals
expect = runlist_inj.best_estimator_integral(t, trange)
colors = ["C0", "C1", "C3"]
for i in range(nsrcs):
    _ = plt.hist(nevents[i], bins=np.arange(np.amax(nevents)), normed=True,
                 color=colors[i], alpha=.25)
    plt.axvline(expect[i], 0, 1, ls="--", lw=2, label="mu src {}".format(i),
                color=colors[i])
    x = np.arange(0, np.amax(nevents))
    y = scs.poisson.pmf(x, mu=expect[i])
    _ = plt.plot(x, y, lw=2, drawstyle="steps-post", color=colors[i])

plt.legend()
plt.show()

Now we want to look at the actual sampled times in each trial.
First we sample a single in a small timeframe.
It should be approximately uniformly distributed, respectively not to distinguish by eye from a constant PDF, because the sine is way to broad to be resolved on such a small time scale.
We also show the bg and signal pdf for comparison.

In [None]:
# First the small time frame
# Arbitrary start date from data
nsrcs = 1
t0 = np.random.choice(start_mjd, size=nsrcs)
t0_sec = t0 * secinday

# dt from t0 in seconds, clip at 4 sigma
dt = 200
nsig = 4.

# Make t values for plotting in MJD around t0
clip = np.clip(dt, 2, 30) * nsig
trange = np.array([-clip, dt + clip]).reshape(nsrcs, 2)
ntrials = int(1e4)

# Sample times for each trial and flatten to single array with all trials
trials = []
for i in range(ntrials):
    trials += runlist_inj.sample(t0, trange, poisson=True)
trials = flatten_list_of_1darrays(trials)

# Plot them in together with the PDFs
def time_bg_pdf(t, t0, a, b):
    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
  
    pdf = np.zeros_like(_t, dtype=np.float)
    uni = (_t >= a) & (_t <= b)
    pdf[uni] = 1. / (b - a)
    return pdf

def time_sig_pdf(t, t0, dt, nsig=4):
    if dt < 0:
        raise ValueError("dt must not be negative.")

    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
    
    # Constrain sig_t to [2, 30]s regardless of uniform time window
    sig_t = np.clip(dt, 2, 30)
    sig_t_clip = nsig * sig_t
    gaus_norm = (np.sqrt(2 * np.pi) * sig_t)
    
    # Split in def regions gaus rising, uniform, gaus falling and zero
    gr = (_t < 0) & (_t >= -sig_t_clip)
    gf = (_t > dt) & (_t <= dt + sig_t_clip)
    uni = (_t >= 0) & (_t <= dt)
    
    pdf = np.zeros_like(t, dtype=np.float)
    pdf[gr] = scs.norm.pdf(_t[gr], loc=0, scale=sig_t)
    pdf[gf] = scs.norm.pdf(_t[gf], loc=dt, scale=sig_t)
    # Connect smoothly with the gaussians
    pdf[uni] = 1. / gaus_norm
    
    # Normalize whole distribtuion
    dcdf = (scs.norm.cdf(dt + sig_t_clip, loc=dt, scale=sig_t) -
            scs.norm.cdf(-sig_t_clip, loc=0., scale=sig_t))
    norm = dcdf + dt / gaus_norm
    
    return pdf / norm


# Plot the pdfs
t = np.linspace(t0_sec + trange[:, 0], t0_sec + trange[:, 1], 200) / secinday
bg_pdf = time_bg_pdf(t, t0, -clip, dt + clip)
sig_pdf = time_sig_pdf(t, t0, dt, nsig)

# Plot in normalized time
_t = t * secinday - t0 * secinday
plt.plot(_t, bg_pdf, "C0-")
plt.plot(_t, sig_pdf, "C1-")
plt.axvline(dt, 0, 1, color="C3", ls="--")
plt.axvline(0, 0, 1, color="C2", ls="--")

# Plot injected events from all trials, relative times
times = (trials - t0) * secinday
_ = plt.hist(times, bins=50, normed=True, color=dg, alpha=.25)

plt.xlabel("Time relative to t0 in sec")
plt.ylim(0, None);
plt.tight_layout()

# plt.savefig("./data/figs/bg_events_time_sampled_narrow.png", dpi=200)

plt.show()

In [None]:
# Now the really large time frame, over the whole time range
t0 = start_mjd[0]
t0_sec = t0 * secinday

# Maximum dt over all runs
dt = (stop_mjd[-1] - start_mjd[0]) * secinday
nsig = 4.

# Make t values for plotting in MJD around t0
clip = np.clip(dt, 2, 30) * nsig
trange = [-clip, dt + clip]
ntrials = 100  # More trials mean smaller errors, better see the sinus shape 

# Sample times
trials = []
for i in range(ntrials):
    trials += runlist_inj.sample(t0, trange, poisson=True)
trials = flatten_list_of_1darrays(trials)

# We choose the same style as in the intial rate plot further above
h, b = np.histogram(trials, bins=1081)
m = get_binmids([b])[0]
scale = np.diff(b) * secinday * ntrials
yerr = np.sqrt(h) / scale
h = h / scale

plt.errorbar(m, h, yerr=yerr, fmt=",")

# Plot normalized rate function to compare
t = np.linspace(start_mjd[0], stop_mjd[-1], 100)
r = runlist_inj.best_estimator(t)
plt.plot(t, r, lw=2, zorder=5)
plt.axhline(runlist_inj.best_pars[2], 0, 1, color="C1",
            ls="--", label="", zorder=5)

plt.xlim(start_mjd[0], stop_mjd[-1])
plt.ylim(0.004, 0.006)
plt.tight_layout()

# plt.savefig("./data/figs/bg_events_time_sampled_wide.png", dpi=200)

plt.show()

### Injector from binned rates

Here we use the sampled rate from above to create a new injector.
Everything else is staying the same.
So we just reproduce the plot last plot above.
Also see the note about the livetime further below.

Make up ten bins with a total live time of 100 days, but the difference between first and last event time is 365 days

In [None]:
dt = 365.
start_mjd = np.amin(_exp["timeMJD"])
stop_mjd = start_mjd + dt

nruns = 10
tbins_start = np.linspace(start_mjd, stop_mjd, nruns)
# Runs are 10 days long
tbins_stop = tbins_start + 10.

# Make approximately the same rates as in the real sample
mids = 0.5 * (tbins_start + tbins_stop)
rate = -0.0005 * np.sin(2 * np.pi / dt * (mids - start_mjd)) + 0.005

In [None]:
# Create a rate function. We fix the period to 1 year here
rndgen = np.random.RandomState(7353)
rate_func_obj = RateFunc.Sinus1yrRateFunction(random_state=rndgen)
    
binned_inj = BGRateInj.BinnedBGRateInjector(rate_func_obj)

# Fit function to the sampled times from the above runlist injector
# tbins = [start_mjd_arr, stop_mjd_arr], shape = (2, nruns)
tbins = np.vstack((tbins_start, tbins_stop)).T

rate_func = binned_inj.fit(tbins, rate, x0=None)

# Livetime should be 100 days by construction
print("Livetime is: {:.2f} days".format(binned_inj.livetime))

In [None]:
trange = [0, (stop_mjd - start_mjd) * secinday]
ntrials = 10

trials = []
for i in range(ntrials):
    trials += binned_inj.sample(start_mjd, trange, poisson=True)
trials = flatten_list_of_1darrays(trials)

# We choose the same style as in the intial rate plot further above
h, b = np.histogram(trials, bins=1081)
m = get_binmids([b])[0]
scale = np.diff(b) * secinday * ntrials
yerr = np.sqrt(h) / scale
h = h / scale

plt.errorbar(m, h, yerr=yerr, fmt=",")

# Plot normalized rate function to compare
t = np.linspace(start_mjd, stop_mjd, 100)
r = binned_inj.best_estimator(t)
plt.plot(t, r, lw=2, zorder=5)
plt.axhline(binned_inj.best_pars[2], 0, 1, color="C1",
            ls="--", label="", zorder=5)

plt.errorbar(mids, rate, xerr=(tbins_stop - tbins_start), color="w",
             fmt=",", zorder=5, lw=3)

plt.xlim(start_mjd, stop_mjd)
plt.ylim(0.004, 0.006)
plt.tight_layout()

# plt.savefig("./data/figs/bg_events_time_sampled_wide_100days.png", dpi=200)

plt.show()

In [None]:
# First the small time frame. Arbitrary start date from data
nsrcs = 1
t0 = start_mjd
t0_sec = t0 * secinday

# dt from t0 in seconds, clip at 4 sigma
dt = 200
nsig = 4.

# Make t values for plotting in MJD around t0
clip = np.clip(dt, 2, 30) * nsig
trange = np.array([-clip, dt + clip]).reshape(nsrcs, 2)
ntrials = int(1e4)

# Sample times for each trial and flatten to single array with all trials
trials = []
for i in range(ntrials):
    trials += binned_inj.sample(t0, trange, poisson=True)
trials = flatten_list_of_1darrays(trials)

# Plot them in together with the PDFs
def time_bg_pdf(t, t0, a, b):
    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
  
    pdf = np.zeros_like(_t, dtype=np.float)
    uni = (_t >= a) & (_t <= b)
    pdf[uni] = 1. / (b - a)
    return pdf

def time_sig_pdf(t, t0, dt, nsig=4):
    if dt < 0:
        raise ValueError("dt must not be negative.")

    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
    
    # Constrain sig_t to [2, 30]s regardless of uniform time window
    sig_t = np.clip(dt, 2, 30)
    sig_t_clip = nsig * sig_t
    gaus_norm = (np.sqrt(2 * np.pi) * sig_t)
    
    # Split in def regions gaus rising, uniform, gaus falling and zero
    gr = (_t < 0) & (_t >= -sig_t_clip)
    gf = (_t > dt) & (_t <= dt + sig_t_clip)
    uni = (_t >= 0) & (_t <= dt)
    
    pdf = np.zeros_like(t, dtype=np.float)
    pdf[gr] = scs.norm.pdf(_t[gr], loc=0, scale=sig_t)
    pdf[gf] = scs.norm.pdf(_t[gf], loc=dt, scale=sig_t)
    # Connect smoothly with the gaussians
    pdf[uni] = 1. / gaus_norm
    
    # Normalize whole distribtuion
    dcdf = (scs.norm.cdf(dt + sig_t_clip, loc=dt, scale=sig_t) -
            scs.norm.cdf(-sig_t_clip, loc=0., scale=sig_t))
    norm = dcdf + dt / gaus_norm
    
    return pdf / norm


# Plot the pdfs
t = np.linspace(t0_sec + trange[:, 0], t0_sec + trange[:, 1], 200) / secinday
bg_pdf = time_bg_pdf(t, t0, -clip, dt + clip)
sig_pdf = time_sig_pdf(t, t0, dt, nsig)

# Plot in normalized time
_t = t * secinday - t0 * secinday
plt.plot(_t, bg_pdf, "C0-")
plt.plot(_t, sig_pdf, "C1-")
plt.axvline(dt, 0, 1, color="C3", ls="--")
plt.axvline(0, 0, 1, color="C2", ls="--")

# Plot injected events from all trials, relative times
times = (trials - t0) * secinday
_ = plt.hist(times, bins=50, normed=True, color=dg, alpha=.25)

plt.xlabel("Time relative to t0 in sec")
plt.ylim(0, None);
plt.tight_layout()

# plt.savefig("./data/figs/bg_events_time_sampled_narrow.png", dpi=200)

plt.show()

## Utils -- rejection_sampler

Test the utils.py rejection sampler.
We generate trials for multiple sources at once and check how fast this is.
Currently the method just loops over sources, rejection sampling for each interval, but it seems fast enough.

A short note on the test below:
We make nsrcs, each with ordered center times and increasing time windows.
Then we sample an increasing number of samples per source.
This is the same as a single trial.

In the histogram we expect 2 things:

1. If the time windows are smaller than 1 day, which is the bin size, then we just get a nice lineraly increaing bin content (triangle shaped, with hard cut at the right edge).
2. If the time windows increase, we get spillover resulting in a way more spread distribution. Also the time windows are only widened to to the right, so the spillover occurs only to the right edges. When the time windows are really large, we even begin to see the underlying oscillation of the sinusodial test function we generate samples from.

In [None]:
def sample_test_sin(t):
    """Simple sinus, similar to fitted rate function"""
    return 0.001 * np.sin(2 * np.pi / 365. * (t - 50000)) + 0.005

rndgen = np.random.RandomState(7353)

# Make some srcs and incresing time windows
nsrcs = 100
t = np.arange(0, nsrcs) + 50000
scaler = 2 * secinday  # Time window scaler: Increase to see spillover
dts = np.vstack((np.zeros(nsrcs), scaler * np.arange(1, nsrcs + 1))).T
dts = t.reshape(nsrcs, 1) + dts / secinday

# Sample increasing number of events in time windows
n_samples = 100 * np.arange(1, nsrcs + 1)
sample = rejection_sampling(sample_test_sin, dts,
                            n_samples=n_samples, rndgen=rndgen)

flatsam = flatten_list_of_1darrays(sample)

# If the time windows are larger than one day (the binning) we get spillover
_ = plt.hist(flatsam, bins=nsrcs)
plt.show()

In [None]:
# Let's look at the distribution in the largest time window.
# It should be sinusodial
_ = plt.hist(sample[-1], bins=20, normed=True)
# Plot sampled function as comparison
t = np.linspace(dts[-1, 0],dts[-1, 1], 100)
intgrl = scint.quad(sample_test_sin, dts[-1, 0],dts[-1, 1])[0]
y = sample_test_sin(t) / intgrl
plt.plot(t, y)
plt.show()

In [None]:
%%timeit
# Quickly check how fast we are. Set nsrcs to 100 above for many srcs
rejection_sampling(sample_test_sin, dts, n_samples=n_samples, rndgen=rndgen)

Cache fmax values and measure the time agian. We should get significantly faster because we don't have to find the maximum in each step.

**Attention:** Be sure that the values match with the used time windows. If they're off or not matching we either don't produce the correct distribution or are getting very inefficient because we start to reject almost everything.

In [None]:
def negpdf(t):
    return -1. * sample_test_sin(t)

rndgen = np.random.RandomState(7353)

f0 = []
fmax = []
for bound in dts:
    # Repeat some code from func_min_in_interval to get the seed
    x_scan = np.linspace(bound[0], bound[1], 7)[1:-1]
    max_idx = np.argmin(negpdf(x_scan))
    x0 = x_scan[max_idx]
    f0.append(-1 * negpdf(x0))

    fmax.append(-1. * func_min_in_interval(negpdf, bound))

fmax = flatten_list_of_1darrays(fmax)

# Time windows get so large, that the maximium is always in the windows
start_dts = dts[:, 0].flatten()
plt.plot(start_dts, sample_test_sin(start_dts), label="fun")
plt.plot(start_dts, fmax, label="fit")
plt.plot(start_dts, f0, label="seed")
plt.legend(loc="lower right")
plt.show()

In [None]:
%%timeit
# Should be faster than before
rejection_sampling(sample_test_sin, dts, n_samples=n_samples,
                   rndgen=rndgen, max_fvals=fmax)

## Utils -- rotator

Test correct conservation of circles

In [None]:
"""
Here do the following:

1. Target point is (ra2, dec2)
2. Points (r3, dec3) to be rotated are random and the same as the ones
   (ra2, dec2) defining the rotation angles.
3. After rotation they should be all exactly at (ra2, dec2)
"""
plt.figure(figsize=(12, 8))

npts = 10000
# Coordinates to rotate to
ra2 = np.repeat(np.deg2rad(250.), npts)
dec2 = np.repeat(np.deg2rad(30.), npts)

# Positions that shall be rotated to (ra2, dec2)
ra3 = np.random.uniform(0, 2 * np.pi, npts)
# Get more at the poles, else use: np.arcsin(rndgen.uniform(-1, 1, npts))
dec3 = np.random.uniform(-np.pi / 2., np.pi / 2., npts)
# Add some special cases for dec3, as it is not periodical
dec3_spec = np.deg2rad([-90, -60, -30, 0, 30, 60, 90])
ra3_spec = np.ones_like(dec3_spec) * 2 * np.pi
nspec = len(dec3_spec)
dec3[-nspec:] = dec3_spec
ra3[-nspec:] = ra3_spec

# Here the point defining the rotation angles dra, dtheta are the same as the
# point that get rotated, so they end up all on (ra2, dec2)
ra1 = ra3
dec1 = dec3

# Rotate
ra3t, dec3t = rotator(ra1, dec1, ra2, dec2, ra3, dec3)

# Plot original points, highlight special cases
plt.plot(ra3, dec3, "C0.")
plt.plot(ra3[-nspec:], dec3[-nspec:], "kx", ms=10)

# Plot intdermediate steps
plt.plot(ra3t, dec3, "C2.")
plt.plot(ra3, dec3t, "C3.")

# Plot fixed target point (ra2, dec2)
plt.plot(ra2, dec2, "wo", ms=10, mec="k")

# Plot all transformed
plt.plot(ra3t, dec3t, "C4x")
plt.plot(ra3t[-nspec:], dec3t[-nspec:], "k+")

# Check if we ended up in target point
print("All points where they should be: ", np.allclose(ra3t, ra2))
print("All points where they should be: ", np.allclose(dec3t, dec2))

# ra guides
plt.axvline(0, 0, 1, color="#353132", ls="--")
plt.axvline(np.pi, 0, 1, color="#353132", ls="-")
plt.axvline(2 * np.pi, 0, 1, color="#353132", ls="--")
# dec guides
plt.axhline(-np.pi / 2., 0, 1, color="#353132", ls="--")
plt.axhline(0, 0, 1, color="#353132", ls="-")
plt.axhline(np.pi / 2., 0, 1, color="#353132", ls="--")

plt.xlabel("ra")
plt.ylabel("dec")
plt.show()

In [None]:
"""
Here do the following:

1. Target points are ra2, dec2
2. Initial point is a circle center at ra1, dec1
3. Points to be rotated are (ra3, dec3) in a circle around (ra1, dec1)
4. After rotation they should be in a circla around (ra2, dec2)

Test with multiple targets (ra2, dec2)
"""
plt.figure(figsize=(12, 8))

# Fixed coordinates to rotate to. Try 5 different places
npts = 20
ntarget = 5

ra2_all = np.deg2rad(np.linspace(5, 355, ntarget))
dec2_all = np.deg2rad(np.linspace(-85, 85, ntarget))

# Center of the fixed initial circle
radius = np.deg2rad(10)
ra1 = np.repeat(np.deg2rad(90), npts)
dec1 = np.repeat(np.deg2rad(30), npts)
theta1 = np.pi / 2. - dec1

# Plot circle dots with squential cmap to see correct order
cmap = plt.cm.get_cmap("viridis", npts + 2)
colors = cmap.colors[1:-1]

# Fixed initial center point
fig, ax = plt.subplots(1, 1, figsize=(12, 8))
ax.plot(ra1, dec1, "C2o", label="orig")

# Fixed positions around (ra1, dec1) that shall be rotated around (ra2, dec2)
t = np.linspace(0, 2 * np.pi, npts)
ra3 = radius * np.cos(t) + ra1
dec3 = radius * np.sin(t) + dec1

for i in range(npts):
    ax.plot(ra3[i], dec3[i], marker=".", ls="", color=colors[i])


# Now rotate to various places
for ra2, dec2 in zip(ra2_all, dec2_all):
    # Rotate
    ra3t, dec3t = rotator(ra1, dec1,
                          np.repeat([ra2], npts), np.repeat([dec2], npts),
                          ra3, dec3)

    ax.plot(ra2, dec2, "C1o")
    ax.arrow(ra1[0], dec1[0], (ra2 - ra1[0]), (dec2 - dec1[0]),
             fc="C7", ec="C7", length_includes_head=True,
             head_width=0.05, head_length=0.1)

    for i in range(npts):
        ax.plot(ra3t[i], dec3t[i], marker=".", ls="", color=colors[i])

# ra guides
plt.axvline(0, 0, 1, color="C7", ls="--")
plt.axvline(np.pi, 0, 1, color="C7", ls="-")
plt.axvline(2 * np.pi, 0, 1, color="C7", ls="--")
# dec guides
plt.axhline(-np.pi / 2., 0, 1, color="C7", ls="--")
plt.axhline(0, 0, 1, color="C7", ls="-")
plt.axhline(np.pi / 2., 0, 1, color="C7", ls="--")

plt.xlabel("ra")
plt.ylabel("dec")
plt.show()

In [None]:
"""
Here do the following:

1. Target point is ra0, dec0
2. Initial point is ra1, dec1
3. Points to be rotated are in a circle around (ra1, dec1)
4. After rotation they should be in a circla around (ra0, dec0)

Test with multiple targets (ra0, dec0)
"""

# Fixed coordinates to rotate to. Try 5 different places
ntarget = 5
npts = 20

ra2_all = np.deg2rad(np.linspace(5, 355, ntarget))
dec2_all = np.deg2rad(np.linspace(-85, 85, ntarget))

# Center of the initial circle
radius = np.deg2rad(15)
ra1 = np.repeat(np.deg2rad(90), npts)
dec1 = np.repeat(np.deg2rad(30), npts)

# Plot circle dots with squential cmap to see correct order
cmap = plt.cm.get_cmap("viridis", npts)
colors = cmap.colors
sm = amp_plt.skymap()
fig, ax = sm.figure(tex=False)

# Fixed initial center point
x1, y1 = amp_plt.EquCoordsToMapCoords(ra1, dec1)
ax.plot(x1, y1, "C2o", label="orig")

# Fixed positions around (ra1, dec1) that shall be rotated to (ra0, dec0)
t = np.linspace(0, 2 * np.pi, npts)
ra3 = radius * np.cos(t) + ra1
dec3 = radius * np.sin(t) + dec1
x3, y3 = amp_plt.EquCoordsToMapCoords(ra3, dec3)

for i in range(npts):
    ax.plot(x3[i], y3[i], marker=".", ls="", color=colors[i])

# Now rotate to various places
for ra2, dec2 in zip(ra2_all, dec2_all):
    ra3t, dec3t = rotator(ra1, dec1,
                          np.repeat([ra2], npts), np.repeat([dec2], npts),
                          ra3, dec3)

    x2, y2 = amp_plt.EquCoordsToMapCoords(ra2, dec2)
    ax.plot(x2, y2, "C1o")

    ax.arrow(x1[0], y1[0], (x2 - x1)[0], (y2 - y1)[0], fc="C7", ec="C7",
             length_includes_head=True, head_width=0.05, head_length=0.1)

    x3t, y3t = amp_plt.EquCoordsToMapCoords(ra3t, dec3t)

    for i in range(npts):
        ax.plot(x3t[i], y3t[i], marker=".", ls="", color=colors[i])

ax.legend(loc="upper right")
plt.tight_layout()
plt.show()

## BG Rate Function

Test if fit, sample and integral works, with a simple example.
First for only a single source.

### SinusRateFunction

In [None]:
# Define parameters for the test function
period_days = 300.
b = 2 * np.pi / period_days  # Period in 1/MJD
c = 0  # t-Offset in MJD
d = 1  # Rate offset in Hz = 1 evt/sec is average -> 86400 evts/day
a = d / 2.  # Amplitude in Hz = +- 0.5 evts / per second
pars = np.array([a, b, c, d])

sinfun = RateFunc.SinusRateFunction()

# Plot function
t0, t1 = c, c + period_days
t = np.linspace(0, t1, 200)  # In MJD days
y = sinfun.fun(t, pars)

_ = plt.plot(t, y, lw=2, label="fun")

# Plot integral
intgrl = np.zeros_like(t)
for i, ti in enumerate(t):
    intgrl[i] = sinfun.integral(t=t0, trange=[t0, ti*secinday], pars=pars)
    
# Scale integral, we expect 24*3600=86400 evts/day * (period_days days)
print("Expect   : ", secinday * t1)
print("Integral : ", intgrl[-1])
_ = plt.plot(t, intgrl / 1e7, lw=2, label="integral/1e7")

# Sample from whole range and scale normed hist with time scale to match rate
nsam = [int(1e4),]
trange = np.array([[t0, t1],]) * secinday
sam = sinfun.sample(t=t0, trange=trange,
                    pars=pars, n_samples=nsam)
h, b = np.histogram(sam, range=[t0, t1], bins=50, density=True)
m = get_binmids([b])[0]
_ = plt.hist(m, bins=b, weights=h * (t1 - t0), color="C0",
             alpha=0.5, label="sampled")

# Finally fit the sampled points again
runtime = (t1 - t0)
p0 = None  # Test default args
bf_pars = sinfun.fit(t=m, rate=h * (t1 - t0), rate_std=None, p0=p0)
yfit = sinfun.fun(t, bf_pars)
_ = plt.plot(t, yfit, lw=2, color="C3", ls="--", label="fitted")
p0 = sinfun._get_default_seed(t=m, rate=h * (t1 - t0),
                              rate_std=np.ones_like(m))
yseed = sinfun.fun(t, p0)
_ = plt.plot(t, yseed, lw=2, color="C3", ls="-", alpha=0.3,
             label="default seed")

plt.xlabel("time in MJD")
plt.ylabel("rate in Hz")
_ = plt.ylim(0, None)
plt.legend()
plt.tight_layout()

# plt.savefig("data/figs/rate_function_sinus.png", dpi=200)

plt.show()

### Sinus1yrRateFunction

The same as above, but now with fixed period of 1 year.

In [None]:
# Define parameters for the test function
period_days = 365.25
c = 0  # t-Offset in MJD
d = 1  # Rate offset in Hz = 1 evt/sec is average -> 86400 evts/day
a = d / 2.  # Amplitude in Hz = +- 0.5 evts / per second
pars = np.array([a, c, d])

sinfun = RateFunc.Sinus1yrRateFunction()

# Plot function
t0, t1 = c, c + period_days
t = np.linspace(0, t1, 200)  # In MJD days
y = sinfun.fun(t, pars)

_ = plt.plot(t, y, lw=2, label="fun")

# Plot integral
intgrl = np.zeros_like(t)
for i, ti in enumerate(t):
    intgrl[i] = sinfun.integral(t=t0, trange=[t0, ti*secinday], pars=pars)
    
# Scale integral, we expect 24*3600=86400 evts/day * (period_days days)
print("Expect   : ", secinday * t1)
print("Integral : ", intgrl[-1])
_ = plt.plot(t, intgrl / 1e7, lw=2, label="integral/1e7")

# Sample from whole range and scale normed hist with time scale to match rate
nsam = int(1e4)
sam = sinfun.sample(t=t0, trange=[t0, t1*secinday], pars=pars, n_samples=nsam)
h, b = np.histogram(sam, range=[t0, t1], bins=50, density=True)
m = get_binmids([b])[0]
_ = plt.hist(m, bins=b, weights=h * (t1 - t0), color="C0",
             alpha=0.5, label="sampled")

# Finally fit the sampled points again
runtime = (t1 - t0)
p0 = None  # Test default args
bf_pars = sinfun.fit(t=m, rate=h * (t1 - t0), rate_std=None, p0=p0)
yfit = sinfun.fun(t, bf_pars)
_ = plt.plot(t, yfit, lw=2, color="C3", ls="--", label="fitted")
p0 = sinfun._get_default_seed(t=m, rate=h * (t1 - t0),
                              rate_std=np.ones_like(m))
yseed = sinfun.fun(t, p0)
_ = plt.plot(t, yseed, lw=2, color="C3", ls="-", alpha=0.3,
             label="default seed")

plt.xlabel("time in MJD")
plt.ylabel("rate in Hz")
_ = plt.ylim(0, None)
plt.legend()
plt.tight_layout()

# plt.savefig("data/figs/rate_function_sinus1yr.png", dpi=200)

plt.show()

### ConstantRateFunction

Use constant rate function but leave the sinus to see how the fit behaves.
Otherwise it would be boring to just see 3 flat lines over another.

In [None]:
# Define sinus parameters
period_days = 365.25
c = 0  # t-Offset in MJD
d = 1  # Rate offset in Hz = 1 evt/sec is average -> 86400 evts/day
a = d / 2.  # Amplitude in Hz = +- 0.5 evts / per second
sinpars = np.array([a, c, d])

# Same for the constant function.
constpars = (d,)

sinfun = RateFunc.Sinus1yrRateFunction()
constfun = RateFunc.ConstantRateFunction()

# Plot sinus and constant function
t0, t1 = c, c + period_days
t = np.linspace(0, t1, 200)  # In MJD days
y = sinfun.fun(t, sinpars)
yc = constfun.fun(t, constpars)

_ = plt.plot(t, y, color="C0", lw=2, ls="--")
_ = plt.plot(t, yc, lw=2, label="fun")

# Plot integral
intgrl = np.zeros_like(t)
for i, ti in enumerate(t):
    intgrl[i] = constfun.integral(t=t0, trange=[t0, ti*secinday],
                                  pars=constpars)
    
# Scale integral, we expect 24*3600=86400 evts/day * (period_days days)
print("Expect   : ", secinday * t1)
print("Integral : ", intgrl[-1])
_ = plt.plot(t, intgrl / 1e7, lw=2, label="integral/1e7")

# Sample from whole range and scale normed hist with time scale to match rate
nsam = int(1e4)
sam = sinfun.sample(t=t0, trange=[t0, t1*secinday], pars=sinpars,
                      n_samples=nsam)
h, b = np.histogram(sam, range=[t0, t1], bins=50, density=True)
m = get_binmids([b])[0]
_ = plt.hist(m, bins=b, weights=h * (t1 - t0), color="C0",
             alpha=0.5, label="sampled")

# Finally fit the sampled points again
runtime = (t1 - t0)
p0 = None  # Test default args
bf_pars = constfun.fit(t=m, rate=h * (t1 - t0), rate_std=None, p0=p0)
yfit = constfun.fun(t, bf_pars)
_ = plt.plot(t, yfit, lw=2, color="C3", ls="--", label="fitted")
p0 = constfun._get_default_seed(t=m, rate=h * (t1 - t0),
                                rate_std=np.ones_like(m))
yseed = constfun.fun(t, p0)
_ = plt.plot(t, yseed, lw=2, color="C3", ls="-", alpha=0.3,
             label="default seed")

plt.xlabel("time in MJD")
plt.ylabel("rate in Hz")
_ = plt.ylim(0, None)
plt.legend()
plt.tight_layout()

# plt.savefig("data/figs/rate_function_const.png", dpi=200)

plt.show()

### Sinus fit with constant sampling

The fit is done with a sinus function to describe the bg rate properly.
But the sampling is just uniform in each time window to skip rejection sampling and to mimic older analysis.

So if we choose some windows small enought, then we basically can not differentiate between this methof and the true rejection sampling, because the sinus is pretty flat.

If we increase the time windows we see, that we sample truly uniformly and do not follow the sinus shape anymore.

We leave the integral and the fit out this time, because they are all derived from the Sinus1yrRateFunction.

In [None]:
# Define sinus parameters
period_days = 365.25
c = 0  # t-Offset in MJD
d = 1  # Rate offset in Hz = 1 evt/sec is average -> 86400 evts/day
a = d / 2.  # Amplitude in Hz = +- 0.5 evts / per second
sinpars = np.array([a, c, d])

rndgen = np.random.RandomState(7353)
sinfun = RateFunc.Sinus1yrConstRateFunction(random_state=rndgen)

# Plot true sinus function defining the rate
t0, t1 = c, c + period_days
t = np.linspace(0, t1, 200)  # In MJD days
y = sinfun.fun(t, sinpars)
_ = plt.plot(t, y, lw=2, label="fun")

# 1. Sample from whole range: See the unifrom samples averaging the sine
nsam = int(1e4)
sam = sinfun.sample(t=t0, trange=[t0, t1*secinday], pars=sinpars,
                      n_samples=[nsam])
h, b = np.histogram(sam, range=[t0, t1], bins=50, density=True)
m = get_binmids([b])[0]
_ = plt.hist(m, bins=b, weights=h * (t1 - t0), color="C0",
             alpha=0.5, label="full range")

# 2. Make finer sampling windows: See how the rate matches the sines better
t0s = np.linspace(t0, t1, 20)[:-1]
dts = np.repeat([[0., np.diff(t0s)[0]]], len(t0s), axis=0) * secinday
# Get the integral to see how many we must sample (as in bg_rate_injector)
expect = sinfun.integral(t0s, dts, sinpars)
expect = np.round(expect).astype(int)  # Keep it easy: No poisson sampling

fine_sam = sinfun.sample(t=t0s, trange=dts, pars=sinpars, n_samples=expect)
fine_sam = flatten_list_of_1darrays(fine_sam)
h, b = np.histogram(fine_sam, range=[t0, t1], bins=50, density=True)
m = get_binmids([b])[0]
_ = plt.hist(m, bins=b, weights=h * (t1 - t0), color="C0",
             alpha=0.5, label="small windows")


plt.xlabel("time in MJD")
plt.ylabel("rate in Hz")
_ = plt.ylim(0, None)
plt.legend()
plt.tight_layout()

# plt.savefig("data/figs/rate_function_sinus_uniform_sample.png", dpi=200)

plt.show()

See how it gets faster with uniform sampling

In [None]:
def fun(t):
    return sinfun.fun(t, sinpars)

bound = [[0, 10]]
fmax = fun(bound[0][1])

In [None]:
%%timeit
rejection_sampling(fun, bounds=bound, n_samples=100, max_fvals=fmax, rndgen=rndgen)

In [None]:
%%timeit
sinfun.sample(0., bound, sinpars, n_samples=[100])

## LLH

Test the LLH module.

It contains all functions for a specific LLH we want to use in our analysis.
Currently GRBLLH is implemented.

In [None]:
sin_dec_bins = np.linspace(-1, 1, 50)

min_logE = 1  #  min(np.amin(_exp["logE"]), np.amin(mc["logE"]))
max_logE = 10 #  max(np.amax(_exp["logE"]), np.amax(mc["logE"]))
logE_bins = np.linspace(min_logE, max_logE, 40)

spatial_pdf_args = {"bins": sin_dec_bins, "k": 3, "kent": True}

energy_pdf_args = {"bins": [sin_dec_bins, logE_bins],
                   "gamma": 2., "fillval": "col", "interpol_log": False}

time_pdf_args = {"nsig": 4., "sigma_t_min": 2., "sigma_t_max": 30.}

grbllh = LLH.GRBLLH(X=_exp, MC=mc,
                    spatial_pdf_args=spatial_pdf_args,
                    energy_pdf_args=energy_pdf_args,
                    time_pdf_args=time_pdf_args)

### Time PDF Ratio

Reproduce the paper plot.

Note that we get the PDFs for all srcs at once.
Their times are just all the same here.

In [None]:
# Make a plot with ratios for different time windows as in the paper
# dt from t0 in seconds, clip at 4 sigma
dts = np.array([[-1, 5], [-5, 50], [-20, 200]])
nsrcs = len(dts)
nsig = 4.

# Arbitrary start date. Choose t0 all the same for plotting
t0 = 50500.
t0_sec = t0 * secinday

# Make t values for plotting in MJD around t0, to fit all in one plot
max_dt, min_dt = np.amax(dts), np.amin(dts)
dt_tot = max_dt - min_dt
clip = np.clip(dt_tot, 2, 30) * nsig
plt_range = np.array([min_dt - clip, max_dt + clip])

npts = 1000
t = np.linspace(t0_sec + 1.2 * plt_range[0],
                t0_sec + 1.2 * plt_range[1], npts) / secinday

_t = t * secinday - t0 * secinday

# Mark t0 = 0 = rel. src time
plt.axvline(0, 0, 1, c="k", ls="--", lw=2, alpha=0.8)

# # Get all at once
SoB = grbllh._soverb_time(t=t, src_t=np.repeat([t0], nsrcs), dt=dts)
assert len(SoB) == nsrcs

colors = ["C0", "C3", "C2"]
for i in range(nsrcs):
    # Plot seperately to give colors and labels
    plt.plot(_t, SoB[i], lw=2, c=colors[i],
             label=r"$T_\mathrm{{uni}}$: {:>3d}s, {:>3d}s".format(*dts[i]))

# Make it look like the paper plot, but with slightly extended borders
plt.xlim(1.2 * plt_range)
plt.ylim(0, np.amax(SoB) * 1.05)
plt.xlabel("t - t0 in sec")
plt.ylabel("S / B")
plt.legend(loc="upper right")
plt.grid(ls="--", lw=1)

# plt.savefig("./data/figs/time_pdf_ratio.png", dpi=200)

plt.show()

Get the injection time window.
This is needed for the injector, so only events in regions with non-zero PDF are injected.

In [None]:
print("dts:\n", dts)
print("\nclip range:\n", np.hstack((dts[:, [0]] - clip, dts[:, [1]] + clip)))
print("\nsigma clips:\n", grbllh.time_pdf_def_range(np.repeat([t0], nsrcs),
                                                    dts))

In [None]:
# Compare manually
dts = np.array([[-1, 5], [-5, 50], [-20, 200]], dtype=np.float)
nsig, sig_min, sig_max = time_pdf_args.values()  # Beware if order is wrong :P
clip = np.clip(np.diff(dts, axis=1), sig_min, sig_max) * nsig
dts[:, 0] -= clip.reshape(len(dts))  # Same as flatten()
dts[:, 1] += clip.flatten()

dts

### Spatial background spline

This is the same technique as used in skylab, but with an extra step of adding the outermost bin edges to the spline gridpoints.
This way, the spline behaves reasonable at the edges and doesn't overshoot.

We could extend this by using the KDE integrated over every variable and then fitting a spline to that.
Or we could sample from the KDE and bin finely and fit a splien again.

For now we leave only the option to use data directly.
The spline fit is depending on the binning anyway.
Only the finely binned KDE version could resolve that issue.

In [None]:
sin_dec = np.linspace(-1.05, 1.05, 200)
y = np.exp(grbllh._spatial_bg_spl(sin_dec))
_ = plt.hist(np.sin(_exp["dec"]), bins=50, normed=True)
plt.plot(sin_dec, y, lw=2)

### Spatial background pdf

Should be identical to calling the spline directly, except that the BG PDF is normalized to the whole sphere.
So we multiply the values by 2pi to account for that.

Here we see the difference to just calling the spline directly: The PDF is zero outside the definition range, the spline extrapolated.
We raise an error if that happens, because we can expect that this is not wanted and caused by carelessness of the user.

In [None]:
_ = plt.hist(np.sin(_exp["dec"]), bins=grbllh.spatial_pdf_args["bins"],
             normed=True)
# sin_dec = np.linspace(-1.05, 1.05, 200)  # Will throw an error a wanted
sin_dec = np.linspace(-1., 1., 200)
y = 2 * np.pi * grbllh._pdf_spatial_background(ev_sin_dec=sin_dec)
plt.plot(sin_dec, y, lw=2)

### Spatial signal PDF

Compare signal and BG pdf.

First we create multiple sources and a single event and scan the event PDF by moving the event along the declination axis.
All PDFs have the height, because the same sigma is used.

Note that BG is here usually very small compared to the signal, because we sample the ev positions within 1 sigma around the source.

In [None]:
LOG = False

nsrcs = 4
# Choose the event sigma from data
ev_sigma = np.random.choice(_exp["sigma"], size=1)

# Make nsrcs, same ra, but different dec. decs are distributed uniformly in
# the range of the largest sigma from the events (for illustration only)
src_dec = np.random.uniform(-ev_sigma, ev_sigma, size=nsrcs)
src_ra = np.ones_like(src_dec) * np.pi
plt_rnge = [np.amin(src_dec) - ev_sigma, np.amax(src_dec) + ev_sigma]

# Scan signal PDF for event declination
ev_dec = np.sin(np.linspace(plt_rnge[0], plt_rnge[1], 200))
ev_sin_dec = np.sin(ev_dec)
ev_ra = src_ra[0] * np.ones_like(ev_sin_dec)
ev_sig = np.ones_like(ev_sin_dec) * ev_sigma

# y has shape (nsrcs, nevts), where nevts are the ev_sin_dec values here (scan)
y = grbllh._pdf_spatial_signal(src_ra, src_dec, ev_ra, ev_sin_dec, ev_sig)

if LOG:
    y = np.log10(y)

plt.plot(ev_dec, y.T, lw=2)
plt.vlines(src_dec, 0, np.amax(y), color="C7", linestyles="--", lw=2,
           label="srcs pos")

# Plot BG PDF to compare
bg = grbllh._pdf_spatial_background(ev_sin_dec=ev_sin_dec)
plt.plot(ev_dec, bg, lw=2, label="BG")

plt.xlim(*plt_rnge)
if LOG:
    plt.ylim(1e-5, 1.1 * np.amax(y))
else:
    plt.ylim(0, 1.1 * np.amax(y))
    
    
plt.xlabel("dec")
plt.ylabel("PDF per src")
plt.legend()
    
plt.tight_layout()

Here we use multiple events with different sigmas and scan again in declination by moving a single possible src position.
We get different heights, because of the different sigmas.

The PDFs each peak where the event position is.
If we had a single source, we would just read off the values at that position.

In [None]:
LOG = True
# Make nevts, same ra, but different dec. sigmas chosen from data
nevts = 4
ev_sigma = np.random.choice(_exp["sigma"], size=nevts)
# Sample some evt decs uniformly around the horizon with spread of the largest 
# sigma to get some variation
max_sig = np.amax(ev_sigma)
ev_dec = np.random.uniform(-max_sig, max_sig, size=nevts)
ev_sin_dec = np.sin(ev_dec)
ev_ra = np.ones_like(ev_dec) * np.pi

# Plot margin PDF scanned for each src position for each event position
src_dec = np.linspace(-2. * max_sig, 2 * max_sig, 200)
src_ra = ev_ra[0] * np.ones_like(src_dec)

# This has shape (nsrcs, nevts)
y = grbllh._pdf_spatial_signal(src_ra, src_dec, ev_ra, ev_sin_dec, ev_sigma)

if LOG:
    y = np.log10(y)

plt.plot(src_dec, y, lw=2)

plt.vlines(ev_dec, 0, np.amax(y) * 1.1, color="C7",
           linestyles="--", label="evts pos")

# Plot BG PDF to compare
bg = grbllh._pdf_spatial_background(ev_sin_dec=np.sin(src_dec))
plt.plot(src_dec, bg, lw=2, label="BG")

plt.xlim(src_dec[[0, -1]])
if LOG:
    plt.ylim(1e-5, 1.1 * np.amax(y))
else:
    plt.ylim(0, 1.1 * np.amax(y))
    
plt.legend()
plt.tight_layout()

### Spatial PDF ratio

In [None]:
def plot_dec_vs_signal(S, ev_dec, src_ra, src_dec, weights, ax=None):
    if ax is None:
        _, ax = plt.subplots(1, 1)
    # Plot signal per source for each event
    for i, (sra, sdec) in enumerate(zip(src_ra, src_dec)):
        ax.plot(np.rad2deg(ev_dec), S[i], ls="-")
        ax.plot(np.rad2deg(sdec), -10, "k|")

    # Simulate a simple stacking, one weight per source
    ax.plot(np.rad2deg(ev_dec), np.sum(weights * S, axis=0) / np.sum(weights),
             ls="--", c=dg, label="stacked")

    ax.set_xlim([-1 + smin, smax + 1])
    ax.set_xlabel("DEC in °")
    ax.set_ylabel("Signal pdf")
    ax.legend(loc="upper right")
    return ax

We make 4 plots to test everything:

1. [Top left] We place densely packed srcs at the declination range and scan the PDFs by varying the event declinations.
   Sigma is fixed to 1 for illustration.
   We expect just a row of gaussians along the dec range.
   The stacked signal is the weighted sum of all signal contributions at a single event dec position.
   
2. [Bottom left] We plot just the background PDF and its inverse for the dec range.
   The inverse PDF is what modulates the signal PDF.
   
3. [Top right] This modulation can be seen in this plot.
   It is basically the same as the first one, but now it's signal over background.
   So the signal peaks are modulated with the inverse BG PDF.
   
4. [Bottom right] This is the same plot as the third one, but this time we use the real data declination values instead of nicely spaced ones.
   The effect is the same but not reall visible, because each event has a different sigma, so the PDFs all have different heights and widths.
   It becomes more similar when using an 1° sigma for all events (just comment that line in).

In [None]:
# Make srcs across the dec range. The hull of SoB should be shaped like the
# 1/(sinDec BG distribtuion). With a single source we couldn't see that,
# because it drops to zero far from the src position
smin, smax, step = -90, +90, 10
src_ra = np.deg2rad(np.arange(smin, smax + step, step))
src_dec = np.deg2rad(np.arange(smin, smax + step, step))

# Scan in dec by varying the evts dec
ev_ra = np.deg2rad(np.linspace(smin, smax, 1000))
ev_dec = np.deg2rad(np.linspace(smin, smax, 1000))
ev_sin_dec = np.sin(ev_dec)
ev_sig = np.deg2rad(np.ones_like(ev_ra))

# Some pseudo weights to simulate stacking
weights = np.arange(1, len(src_dec) + 1)[:, np.newaxis]

fig, ((axtl, axtr), (axbl, axbr)) = plt.subplots(2, 2, figsize=(12, 10))

# Signal only (kent vs. gaus should look the same here)
grbllh.spatial_pdf_args["kent"] = True
S = grbllh._pdf_spatial_signal(src_ra, src_dec, ev_ra, ev_sin_dec, ev_sig)
_ = plot_dec_vs_signal(S, ev_dec, src_ra, src_dec, weights, ax=axtl)
axtl.set_xlim(-90, 90)

# Background only
bins = grbllh.spatial_pdf_args["bins"]
h, b = np.histogram(np.sin(_exp["dec"]), bins=bins, density=True)
m = 0.5 * (b[:-1] + b[1:])
_ = axbl.hist(m, bins=bins, weights=h / 2 / np.pi, alpha=0.5)
_sin_dec = np.linspace(-1, 1, 1000)
bg_pdf = grbllh._pdf_spatial_background(_sin_dec)
axbl.plot(_sin_dec, bg_pdf, lw=2, label="pdf")
axbl.set_ylim(0, 0.2)
# 1 / BG PDF on second axis
axbl2 = axbl.twinx()
axbl2.plot(_sin_dec, 1. / bg_pdf, c="C2", lw=2, ls="--", label="1/pdf")
axbl2.set_ylim(0, (1 / bg_pdf).max())
axbl.set_xlabel("sinus DEC")
axbl.set_xlim(-1, 1)
axbl.legend(loc="upper left")
axbl2.legend(loc="upper center")

# SoB on example + BG PDF
SoB = grbllh._soverb_spatial(src_ra, src_dec, ev_ra, ev_sin_dec, ev_sig)
weights = np.arange(1, len(src_dec) + 1)[:, np.newaxis]
_ = plot_dec_vs_signal(SoB, ev_dec, src_ra, src_dec, weights, ax=axtr)
axtr.plot(np.rad2deg(np.arcsin(_sin_dec)), bg_pdf, lw=3, label="BG pdf", c=dg)
axtr.set_xlim(-90, 90)
axtr.set_yscale("log")
axtr.set_ylim(np.amin(bg_pdf), 1e5)
axtr.legend(loc="upper left")

# Now with the real data. Sort first in dec to show with nice lines + BG PDF
idx = np.argsort(exp["dec"])
ev_ra = exp["ra"][idx]
ev_dec = exp["dec"][idx]
ev_sin_dec = np.sin(ev_dec)
ev_sig = exp["sigma"][idx]
# Comment in to match the simple example (all events have sigma 1°)
# ev_sig = np.deg2rad(np.ones_like(ev_ra))
SoB = grbllh._soverb_spatial(src_ra, src_dec, ev_ra, ev_sin_dec, ev_sig)

_ = plot_dec_vs_signal(SoB, ev_dec, src_ra, src_dec, weights, ax=axbr)
axbr.plot(np.rad2deg(np.arcsin(_sin_dec)), bg_pdf,
          lw=3, label="BG pdf", c="C0")
axbr.set_yscale("log")
axbr.set_ylim(np.amin(bg_pdf), 1e5)
axbr.legend(loc="upper left")

plt.show()

### Energy ratio spline

This is the creation of the signal over background ratio for the energy PDF.
It is resolved in sinDec and logE to account for different positions on the sky and energies.

Missing values, where no data or MC is present is filled with interpolation values, conttrolled by the "fillval" option.

In [None]:
# Test col vs minmax. Also try different interpolations in linear or
# logspace. Log interpolation falls off more quickly to the edge values.
fig, (al, ar) = plt.subplots(1, 2, figsize=(14,5))

energy_pdf_args = {"bins": [sin_dec_bins, logE_bins],
                   "gamma": 2., "fillval": "col", "interpol_log": False}
grbllh = LLH.GRBLLH(X=_exp, MC=mc,
                    spatial_pdf_args=spatial_pdf_args,
                    energy_pdf_args=energy_pdf_args,
                    time_pdf_args=time_pdf_args)

# Ratio spline with 'col' filling
x = np.linspace(-1.1, 1.1, num=1000 + 1)
y = np.linspace(0.5, 10.5, num=1000 + 1)
XX, YY = np.meshgrid(x, y)
xx, yy = map(np.ravel, [XX, YY])
gpts = np.vstack((xx, yy)).T
zz = np.exp(grbllh._energy_spl(gpts))
ZZ = zz.reshape(XX.shape)
# Plotting with hist creates strange effects... Use pcolormesh instead
img = al.pcolormesh(XX, YY, ZZ, norm=LogNorm(), cmap="coolwarm",
                    vmin=1e-3, vmax=1e3)
al.set_title("Spline interpolation: 'col'")
plt.colorbar(ax=al, mappable=img)

# With 'minmax' filling. Note: The small values in the lower row are due to
# plotting in log. We interpolate in linear space, so in log, the jump is
# very steep for small values.
energy_pdf_args = {"bins": [sin_dec_bins, logE_bins],
                   "gamma": 2., "fillval": "minmax", "interpol_log": False}
grbllh = LLH.GRBLLH(X=_exp, MC=mc,
                    spatial_pdf_args=spatial_pdf_args,
                    energy_pdf_args=energy_pdf_args,
                    time_pdf_args=time_pdf_args)

zz = np.exp(grbllh._energy_spl(gpts))
ZZ = zz.reshape(XX.shape)
img = ar.pcolormesh(XX, YY, ZZ, norm=LogNorm(), cmap="coolwarm",
                    vmin=1e-3, vmax=1e3)
ar.set_title("Spline interpolation: 'minmax'")
plt.colorbar(ax=ar, mappable=img)

fig.tight_layout()

In [None]:
# Test min vs minmax. Only with interpol log we see the structure, because
# Otherwise only in the last bin we get below 1, so the colors are mostly
# red in the log norm color scale.
fig, (al, ar) = plt.subplots(1, 2, figsize=(14,5))

energy_pdf_args = {"bins": [sin_dec_bins, logE_bins],
                   "gamma": 2., "fillval": "min", "interpol_log": True}
grbllh = LLH.GRBLLH(X=_exp, MC=mc,
                    spatial_pdf_args=spatial_pdf_args,
                    energy_pdf_args=energy_pdf_args,
                    time_pdf_args=time_pdf_args)

# Ratio spline with 'col' filling
x = np.linspace(-1.1, 1.1, num=1000 + 1)
y = np.linspace(0.5, 10.5, num=1000 + 1)
XX, YY = np.meshgrid(x, y)
xx, yy = map(np.ravel, [XX, YY])
gpts = np.vstack((xx, yy)).T
zz = np.exp(grbllh._energy_spl(gpts))
ZZ = zz.reshape(XX.shape)
# Plotting with hist creates strange effects... Use pcolormesh instead
img = al.pcolormesh(XX, YY, ZZ, norm=LogNorm(), cmap="coolwarm",
                    vmin=1e-3, vmax=1e3)
al.set_title("Spline interpolation: 'min'")
plt.colorbar(ax=al, mappable=img)

# With 'minmax' filling. Note: The small values in the lower row are due to
# plotting in log. We interpolate in linear space, so in log, the jump is
# very steep for small values.
energy_pdf_args = {"bins": [sin_dec_bins, logE_bins],
                   "gamma": 2., "fillval": "minmax", "interpol_log": True}
grbllh = LLH.GRBLLH(X=_exp, MC=mc,
                    spatial_pdf_args=spatial_pdf_args,
                    energy_pdf_args=energy_pdf_args,
                    time_pdf_args=time_pdf_args)

zz = np.exp(grbllh._energy_spl(gpts))
ZZ = zz.reshape(XX.shape)
img = ar.pcolormesh(XX, YY, ZZ, norm=LogNorm(), cmap="coolwarm",
                    vmin=1e-3, vmax=1e3)
ar.set_title("Spline interpolation: 'minmax'")
plt.colorbar(ax=ar, mappable=img)

fig.tight_layout()


### Energy PDF ratio

Here we see again the difference to the direct spline evaluation.
The ratio function set's values outside to zero probability.

In [None]:
fig, (al, ar) = plt.subplots(1, 2, figsize=(14,5))

energy_pdf_args = {"bins": [sin_dec_bins, logE_bins],
                   "gamma": 2., "fillval": "col", "interpol_log": True}
grbllh = LLH.GRBLLH(X=_exp, MC=mc,
                    spatial_pdf_args=spatial_pdf_args,
                    energy_pdf_args=energy_pdf_args,
                    time_pdf_args=time_pdf_args)

# Ratio spline with 'col' filling
x = np.linspace(-1.1, 1.1, num=1000 + 1)
y = np.linspace(0.5, 10.5, num=1000 + 1)
XX, YY = np.meshgrid(x, y)
xx, yy = map(np.ravel, [XX, YY])
gpts = np.vstack((xx, yy)).T
zz = grbllh._soverb_energy(xx, yy)
ZZ = zz.reshape(XX.shape)
# Plotting with hist creates strange effects... Use pcolormesh instead
img = al.pcolormesh(XX, YY, ZZ, norm=LogNorm(), cmap="coolwarm",
                    vmin=1e-3, vmax=1e3)
al.set_title("Spline interpolation: 'col'")
plt.colorbar(ax=al, mappable=img)

# With 'minmax' filling. Note: The small values in the lower row are due to
# plotting in log. We interpolate in linear space, so in log, the jump is
# very steep for small values.
energy_pdf_args = {"bins": [sin_dec_bins, logE_bins],
                   "gamma": 2., "fillval": "minmax", "interpol_log": True}
grbllh = LLH.GRBLLH(X=_exp, MC=mc,
                    spatial_pdf_args=spatial_pdf_args,
                    energy_pdf_args=energy_pdf_args,
                    time_pdf_args=time_pdf_args)

zz = grbllh._soverb_energy(xx, yy)
ZZ = zz.reshape(XX.shape)
img = ar.pcolormesh(XX, YY, ZZ, norm=LogNorm(), cmap="coolwarm",
                    vmin=1e-3, vmax=1e3)
ar.set_title("Spline interpolation: 'minmax'")
plt.colorbar(ax=ar, mappable=img)

fig.tight_layout()

### Detector source weights

We use the same spline method to create a spline describing the sinDec dependence of a signal MC weighted to a specific astrophysical flux modell (usually unbroken power law).

Depending on the src position, we expect more or less signal from that src.
This is equivalent to folding with the detector exposure function.

Our stacking form is described by a multi position search where the signal term gets modified to:

$$
    S^\text{tot} = \sum_{j=1}^{N_\text{srcs}} w_j S_{ij} \quad\text{with}\quad
    \sum_j w_j = 1 \quad\text{with}\quad w_j = w_j^\text{theo}\cdot w_j^\text{det}
$$

The weights are a combination of the exposure weights and a-priori fixed intrinsic source weights, eg. from a known gamma flux.

In [None]:
# Small hack to change the gamma without recreating the grbllh object
gamma_override = 2.13

grbllh.energy_pdf_args["gamma"] = gamma_override
mc_sin_dec = np.sin(mc["dec"])
mc_bins = grbllh.energy_pdf_args["bins"][0]
mc_dict = {"trueE": mc["trueE"], "ow": mc["ow"]}

grbllh._spatial_signal_spl = grbllh._create_sin_dec_spline(
    sin_dec=mc_sin_dec, bins=mc_bins, mc=mc_dict)

sin_dec = np.linspace(-1.05, 1.05, 200)
y = np.exp(grbllh._spatial_signal_spl(sin_dec))

# MC needs proper weighting
gamma = grbllh.energy_pdf_args["gamma"]
mc_w = mc["ow"] * mc["trueE"]**(-gamma)
mc_bins = energy_pdf_args["bins"][0]
h, b = np.histogram(np.sin(mc["dec"]), bins=mc_bins, weights=mc_w, normed=True)

# Smooth it, charge it, odd it, quick truncate it
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html
m = get_binmids([b])[0]
redux = 10  # Window len is nearest odd number to (number of bins / redux)
window_len = int(2 * np.floor((len(b) / redux) / 2) + 1)  # Must be odd
_h = scsignal.savgol_filter(h, window_len, 3, mode="mirror")
plt.hist(m, bins=b, weights=_h, histtype="step", lw=2, color="C1", ls="--")
plt.hist(m, bins=b, weights=h, histtype="step", lw=2, color="C3")

# Plot spline (fitted to unsmoothed)
plt.plot(sin_dec, y, lw=2, color="C2")

# Get weights for some srcs
src_sin_dec = np.linspace(-1, 1, 11)
src_dec = np.arcsin(src_sin_dec)
src_w_theo = np.ones_like(src_dec)
w = grbllh.src_weights(src_dec=src_dec, src_w_theo=src_w_theo)

# Revoke norm for plotting to see if weights are on the curve
src_dec_w = np.exp(grbllh._spatial_signal_spl(src_sin_dec))
_w = w * np.sum(src_dec_w * src_w_theo)

plt.plot(src_sin_dec, _w, "wo", ms=7, mew=1.5, mec="k")
plt.vlines(np.sin(src_dec), 0, 1.05 * np.amax(y), colors="C7",
           lw=1, linestyles="--")

plt.xlabel("sin(dec)")
plt.ylabel("PDF")
plt.title("$\gamma = {:.1f}$".format(gamma))

plt.xlim(-1, 1)
plt.ylim(0, 1.05 * np.amax(y))
plt.tight_layout()

print(w)
print(w.sum())

Here we just use some ascending theoretical weights in both directions.

- w1 should resemble the sindec curve from above
- w2 should rise overall to the right (less steep or reversed from w1)
- w3 should fall overall to the right (steeper than w1)

In [None]:
_src_dec = np.arcsin(np.linspace(-1, 1, 21))

# Compare for different theoretical weights
src_w_theo = np.ones_like(_src_dec)
w1 = grbllh.src_weights(src_dec=_src_dec, src_w_theo=src_w_theo)

src_w_theo = np.arange(len(_src_dec)) + 1
w2 = grbllh.src_weights(src_dec=_src_dec, src_w_theo=src_w_theo)

src_w_theo = (np.arange(len(_src_dec)) + 1)[::-1]
w3 = grbllh.src_weights(src_dec=_src_dec, src_w_theo=src_w_theo)

plt.plot(np.sin(_src_dec), w1, "o", label="theo = 1 (orig)")
plt.plot(np.sin(_src_dec), w2, "o", label="theo = arange")
plt.plot(np.sin(_src_dec), w3, "o", label="theo = arange[::-1]")
plt.legend(loc="upper left")

Check if weights are the same when using event density (skylab style) instead.
It should make no difference because the weights are normalized anyway.

**This will be included, if we get to fitting multiple years.
We can then use the same weights for the stacking and for normalizing ns per year.**

Note: The normalization and the pivot will not be included in the end, because they are constant for every source and for every dataset/year, so they get normalized out anyway.

In [None]:
# Weight with numu diffuse 6yr flux norm, but same index as above
index = gamma_override
norm = 0.9 * 1e-18  # (GeV s sr cm^2)^-1, valid at 100 TeV = 1e5 GeV
pivot = 1e5
flux = norm * (mc["trueE"] / pivot)**(-index)
mc_w = mc["ow"] * flux * livetime * secinday
mc_bins = energy_pdf_args["bins"][0]

density = True
h, b = np.histogram(np.sin(mc["dec"]), bins=mc_bins, weights=mc_w,
                    density=density)

# Normalize (same as density=True)
if not density:
    h /= np.diff(b) * np.sum(h)

# PDF * Number of total events = Event densitiy
_h = h * mc_w.sum()  

mids = get_binmids([mc_bins])[0]
_ = plt.hist(mids, bins=mc_bins, weights=_h)

# Make spline, use also outermost edges
x = np.concatenate((b[[0]], mids, b[[-1]]))
y = np.log(_h)
y = np.concatenate((y[[0]], y, y[[-1]]))
spl = sci.InterpolatedUnivariateSpline(x, y, k=3, ext="extrapolate")

x = np.linspace(-1, 1, 100)
y = np.exp(spl(x))
plt.plot(x, y, "C1-")


plt.xlabel("sindec")
plt.ylabel("Event density Nevts / sindec")

# Total events by integrating _h: Ntot = sum_i (_h_i * diff(bins)_i)
plt.title("Gamma = {:.2f}: Ntot = {:.2f}".format(
        index, np.sum(_h * np.diff(mc_bins))))

# Check the weight, deviation from float error I guess
src_w_dec = np.exp(spl(src_sin_dec[1:-1]))
src_w_theo = np.ones_like(src_dec[1:-1])
src_w = src_w_dec * src_w_theo / np.sum(src_w_dec * src_w_theo)

plt.plot(src_sin_dec[1:-1], src_w_dec, "wo", ms=7, mew=1.5, mec="k")
plt.tight_layout()

print("Spline from PDF only")
print(w[1:-1])

print("\nSpline from event density with livetime")
print(src_w.reshape(len(src_w), 1))

print("\nRatio")
for ratio in w[1:-1] / np.sum(w[1:-1]) / src_w.reshape(len(src_w), 1):
    print("[ {:.6f} ]".format(*ratio))

### ln-LLH ratio

Plot llh and gradient.
The gradient is calculated analytically.
With this test, we simply want to check, if the gradient is OK and the likelihood behaves correctly.

In [None]:
# Snippet to plot lnLLH ratio and gradient next each other
def plot_llh(ns, lnllh, lnllh_grad, ns_max, xmin, xmax, lw=2):
    fig, (al, ar) = plt.subplots(1, 2, figsize=(10, 4))
    al.plot(ns, lnllh, lw=lw)
    al.set_xlim(xmin, xmax)
    if np.amax(lnllh) == 0.:
        al.set_ylim(np.amin(lnllh), 1)
    else:
        al.set_ylim(0, 1.05 * np.amax(lnllh))
    al.axvline(ns_max, 0, 1, ls="--", lw=2, color="C7")
    al.set_title("LLH")

    ar.plot(ns, lnllh_grad, lw=lw)
    ar.axhline(0, 0, 1, ls="--", lw=2, color="C7")
    ar.axvline(ns_max, 0, 1, ls="--", lw=2, color="C7")
    al.set_xlim(xmin, xmax)
    ar.set_ylim(-5, 5)
    ar.set_title("LLH gradient in ns")
    fig.tight_layout()
    return fig, (al, ar)

names = ["t", "dt0", "dt1", "ra", "dec", "w_theo"]
rndgen = np.random.RandomState(7353)

#### No Events given

Quickly check if the LLH handles an empty array of events correctly.

If no events given, then the per event terms should all be zero.
Then the test statistic, which is $\Lambda = 2 \cdot \ln(\mathcal{L}_1-\mathcal{L}_0)$ reduces simply to $\Lambda = -2n_s$.

The gradient is then trivially $\partial_{ns}\Lambda = -2$.

In [None]:
# Make up some setup
nsrcs = 1
src_t = np.random.choice(_exp["timeMJD"], size=nsrcs)
dt = np.array([-20, 200])

# Expected background with rate 5mHz, kind of realistic.
# Increase nb scale to test different BG expectations.
scale = 1e5
nb = 0.005 * np.diff(dt) * scale
src_ra = np.deg2rad([180])  # Arbitrarily placed single source
src_dec = np.deg2rad([10])
src_w_theo = np.ones_like(src_dec)

# Setup src record array
srcs = np.vstack((src_t, [dt[0]], [dt[1]],
                  [src_ra], [src_dec], src_w_theo))
names = ["t", "dt0", "dt1", "ra", "dec", "w_theo"]
srcs = np.core.records.fromarrays(srcs, names=names,
                                       formats=len(names) * ["float64"])
args = {"nb": nb, "srcs": srcs}

# Setup zero events = empty array
X = np.empty((0,), dtype=[(n, np.float) for n in _exp.dtype.names])

# Scan a single LLH for the chosen data above
n_ns = 500
xmin, xmax = 0, 20
ns = np.linspace(xmin, xmax, n_ns)
lnllh = np.empty(n_ns)
lnllh_grad = np.empty(n_ns)
for i in range(n_ns):
    _lnllh, _lnllh_grad = grbllh.lnllh_ratio(X, ns[i], args)
    lnllh[i], lnllh_grad[i] = _lnllh, _lnllh_grad[0]
    
# Manual "fit" by scanning the maximum
ns_max = ns[np.argmax(lnllh)]

_, (al, ar) = plot_llh(ns, lnllh, lnllh_grad, ns_max, xmin, xmax, lw=4)

# Cross check with expected result:
lnllh_exp = -2 * ns
lnllh_grad_exp = -2 * np.ones_like(ns)
al.plot(ns, lnllh_exp, "C1--", lw=2, label="expect")
ar.plot(ns, lnllh_grad_exp, "C1--", lw=2, label="expect")

al.legend(loc="upper right")
ar.legend(loc="upper right")
plt.show()

#### Single Source

First with only one source.

Note: We test here "super-signal-like" events. Every event is exactly at the src position and every time is exactly in the time window, where the ratio is max.
Only the energy is distributed as background.
So only for really large time windows (really large) which have insanely high background rates we drop lower than the injected ns in our prediction.
This is because the background term can only counter background-like events, which have a low signal over background ratio.
For the events injected here, this rate is super high, so we always "fit" the exact amount of injected events.

For this setup each events SoB is equal and we can calculate the sum over the events directly by replacing it with N*SoB.
In this cse, the gradient is zero at:

\begin{align}
    0 &= -1 + \sum_i \frac{S}{n_b B}\cdot \frac{1}{n_s \frac{S}{n_b B} + 1}
       = -1 + N \frac{S}{n_b B}\cdot \frac{1}{n_s \frac{S}{n_b B} + 1} \\
    \Leftrightarrow \frac{1}{N} &= \frac{1}{n_s + \frac{n_b B}{S}} \\
    \Leftrightarrow N &= n_s + \frac{n_b B}{S}
\end{align}

So now if the signal is super large (and that's what we ensured by using our super-signal-like events) the term $\frac{n_b B}{S} \rightarrow 0$ and we get $\hat{n}_S = N$ which is exactly what we observe.

Only if we set super high $n_B$, our $\hat{n}_S$ shrinks as $\frac{n_b B}{S}$ gets larger and larger and in the end $\hat{n}_S$ even turns negative, when the 1 / SoB ratio is larger than N.

In [None]:
# Make up some setup
nsrcs = 1
src_t = rndgen.choice(_exp["timeMJD"], size=nsrcs)
dt = np.array([-20, 200])

# Expected background with rate 5mHz, kind of realistic.
# Increase nb scale to see ns best fit shrink.
scale = 1e4
nb = 0.005 * np.diff(dt) * scale
src_ra = np.deg2rad([180])  # Arbitrarily placed single source
src_dec = np.deg2rad([10])
src_w_theo = np.ones_like(src_dec)

# Setup src record array
srcs = np.vstack((src_t, [dt[0]], [dt[1]],
                  [src_ra], [src_dec], src_w_theo))
srcs = np.core.records.fromarrays(srcs, names=names,
                                       formats=len(names) * ["float64"])
args = {"nb": nb, "srcs": srcs}

# Set the events artificially where the srcs are in space and nicely spaced
# times inside the search window, where time sob is large. Otherwise the llh
# is almost always peaked at 0
N = 10
mint, maxt = src_t + dt / secinday  # In MJD
timeMJD = np.linspace(mint, maxt, N)
X = rndgen.choice(_exp, size=N)  # Only to copy the recarray structure
X["timeMJD"] = timeMJD
X["ra"] = np.ones_like(timeMJD) * src_ra
X["sinDec"] = np.ones_like(timeMJD) * np.sin(src_dec)
X["sigma"] = np.deg2rad(np.ones_like(timeMJD))

# Scan a single LLH for the chosen data above
n_ns = 500
xmin, xmax = 0, 2 * N
ns = np.linspace(xmin, xmax, n_ns)
lnllh = np.empty(n_ns)
lnllh_grad = np.empty(n_ns)
for i in range(n_ns):
    _lnllh, _lnllh_grad = grbllh.lnllh_ratio(X, ns[i], args)
    lnllh[i], lnllh_grad[i] = _lnllh, _lnllh_grad[0]

# Manual "fit" by scanning the maximum
ns_max = ns[np.argmax(lnllh)]

plot_llh(ns, lnllh, lnllh_grad, ns_max, xmin, xmax)
plt.show()

#### Multiple Sources -- All at same position

This time we use multiple sources, but all at the exact same location and with the exact same properties.
We expect the very same result as in the single source case above, because the weighted sum of the signal terms reduces to

\begin{align}
    S^\text{tot} &= \sum_{j=1}^{N_\text{srcs}} w_j S_{ij}
                 = S_{i} \sum_{j=1}^{N_\text{srcs}} \frac{1}{N_\text{srcs}}
                 = S_i \\
    \Lambda &= -2\ln\left(\frac{\mathcal{L}_0}{\mathcal{L}_1}\right)
             = -n_S + \sum_{i=1}^N\ln\left(\frac{n_S\ S^\text{tot}}{\langle n_B\rangle B_i} + 1\right)
             = -n_S + \sum_{i=1}^N\ln\left(\frac{n_S\ S_i}{\langle n_B\rangle B_i} + 1\right)
\end{align}

as all signal terms are exaxtly the same and no further background locations are introduced.

We only have to scale the BG manually because we do not treat overlapping windows correctly in the code.
So we just scale the nb expecation down by 1/nsrcs manually before feeding it into the llh.

In [None]:
# Repeat sources exactly as the single one from above
nsrcs = 100
_src_t = np.repeat(src_t, repeats=nsrcs, axis=0)
_dt = np.repeat(dt.reshape(1, 2), axis=0, repeats=nsrcs)
# Attention here: 100% overlapping windows so total BG is unchanged. To work
# in the stacking framework, we just split the expectation equally

# Increase nb scale as in single src case above to see ns best fit shrink
_nb = 0.005 * np.diff(_dt, axis=1).flatten() / nsrcs * scale

_src_ra = np.repeat(src_ra, repeats=nsrcs, axis=0)
_src_dec = np.repeat(src_dec, repeats=nsrcs, axis=0)
_src_w_theo = np.ones_like(_src_dec)

# Setup src record array
srcs = np.vstack((_src_t, _dt[:, 0], _dt[:, 1],
                  _src_ra, _src_dec, _src_w_theo))
_srcs = np.core.records.fromarrays(srcs, names=names,
                                       formats=len(names) * ["float64"])
_args = {"nb": _nb, "srcs": _srcs}

# Also use the very same events for all sources here
_X = np.copy(X)

# Scan a single LLH for the chosen data above
n_ns = 500
ns = np.linspace(xmin, xmax, n_ns)
_lnllh = np.empty(n_ns)
_lnllh_grad = np.empty(n_ns)
for i in range(n_ns):
    __lnllh, __lnllh_grad = grbllh.lnllh_ratio(_X, ns[i], _args)
    _lnllh[i], _lnllh_grad[i] = __lnllh, __lnllh_grad[0]

# Manual "fit" by scanning the maximum
_ns_max = ns[np.argmax(_lnllh)]

plot_llh(ns, _lnllh, _lnllh_grad, _ns_max, xmin, xmax)
plt.show()

#### Multiple Sources -- Different Right-Ascensions

Now the almost same thing, but with changed src right ascensions.
We distribute them equally around a fixed declination.
Everything else is left as before, except for giving an equally distributed number of events the same right ascension as the sources.

This case is a bit more tricky, and we don't expect the same TS here, because:
We inject the same number of events (N) but we get nsrcs times the BG (because the windows don't overlap anymore).
Each event only contributes to the window where it spatially is placed, so per source only N / nsrcs events (we choosed them so the number distribute nicely) have a SoB > 0.

This means, that the total signal term is reduced by a factor of nsrcs, as the zero signal terms can't compensate the unaffected backgound which is still the same for all events.

So even though our stacked signal term is reduced by a factor of nsrcs  we still get the same fit result for super signal like events, because the signal term is still huge and we still satisfy the condition $\frac{n_b B}{S}\rightarrow 0$.
But we need slightly less cranked up background rate to let the best fit ns shrink as in the previous cases because the signal term is reduced by 1/Nsrc.

So with the same scale factor as above the TS should end up lower in every case.

In [None]:
# Repeat sources exactly as the single one from above
nsrcs = 5
_src_t = np.repeat(src_t, repeats=nsrcs, axis=0)
_dt = np.repeat(dt.reshape(1, 2), axis=0, repeats=nsrcs)

# Windows don't overlap anymore, so use full BG for each window
# Increase nb scale to see ns best fit shrink
_nb = 0.005 * np.diff(_dt) * scale

# Handpick to let regions not overlap
_src_ra = np.deg2rad([0, 30, 60, 90, 120])
_src_dec = np.repeat(src_dec, repeats=nsrcs, axis=0)
_src_w_theo = np.ones_like(_src_dec)

# Setup src record array
srcs = np.vstack((_src_t, _dt[:, 0], _dt[:, 1],
                  _src_ra, _src_dec, _src_w_theo))

_srcs = np.core.records.fromarrays(srcs, names=names,
                                       formats=len(names) * ["float64"])
_args = {"nb": _nb, "srcs": _srcs}

# We used 5 srcs and 10 events, so we just repeat the ras once
# This is not very obvious on how to scale to arbirary Ns and nsrcs
# I'm not very sure here, how many events to inject to exactly match the cases
# above.
# Here we just have 2 evts per window and still have ns of 10, even though
# signal should get downweighted to 1/5 of the two cases above per source.
_X = np.copy(X)
_X["ra"] = np.repeat(_src_ra, repeats=2)

# Scan a single LLH for the chosen data above
n_ns = 500
xmin, xmax = 0, 2 * N
ns = np.linspace(xmin, xmax, n_ns)
_lnllh = np.empty(n_ns)
_lnllh_grad = np.empty(n_ns)
for i in range(n_ns):
    __lnllh, __lnllh_grad = grbllh.lnllh_ratio(_X, ns[i], _args)
    _lnllh[i], _lnllh_grad[i] = __lnllh, __lnllh_grad[0]

# Manual "fit" by scanning the maximum
_ns_max = ns[np.argmax(_lnllh)]

plot_llh(ns, _lnllh, _lnllh_grad, _ns_max, xmin, xmax)
plt.show()

#### SoB Thresholds

See if the absolute and relative thresholds are working to reduce the amount of points we need to calculate in the LLH.

In [None]:
# Just see the absolute threshold = 0.001 in action. Everything below is cut
# so with high enough nb the ratio drops below that value. When nb=2e9 all
# Events are gone, try it.
thresh = 1e-3
grbllh.llh_args["sob_abs_eps"] = thresh

# Just vary this between [0, 1]. The closer to 1 the less evts survive.
# At 1 only the highest sob evt survives the relativ cut. At 0 all survive
grbllh.llh_args["sob_rel_eps"] = 1e-2

nsurvive = []
sobs = []
nbs = np.linspace(6, 10, 100)
for _nb in nbs:
    args["nb"] = np.array([[int(10**_nb)]])
    _sob = grbllh._soverb(X, args)
    sobs.append(_sob)
    nsurvive.append(len(_sob))
    if len(_sob) == 1:
        last_sob = _sob[0]
    
plt.plot(nbs, nsurvive)
plt.hlines(np.arange(1, 11, 1), 6, 10, linestyles="--", colors="C7")
plt.xlabel("log10(nb)")
plt.ylabel("nr. of surviving evts")
plt.title("Highest surviving sob: {:.5f} > {:.5f} (abs thresh)".format(
    last_sob, thresh))
plt.xlim(6, 10)
plt.ylim(0, 10.5)
plt.show()

# Now show the actual sob values dropping below abs thresh.
# See the dashed dropping below the threshold being at the same nb as the
# steps in the first plot.
for i, _nb in enumerate(nbs):
    x = np.ones(nsurvive[i]) * _nb
    y = np.sort(np.log10(sobs[i]))
    plt.plot(x, y, marker=".", ls="", color="C7")
plt.axhline(np.log10(thresh), 0, 1)
plt.xlim(6, 10)
plt.title("sob values dropping below thresh when nb is increased")
plt.show()

### Getting the Test Statistic

The LLH evaluates it's Test Statistic.
Even if this is not quite the logic way to go here, it is imporving speed, because trivial cases (no, 1 or 2 evts given) can be solved analytically to skip the minimization.

It would be nicer to have that in the analysis module, so we could choose which hypotheses to test, but we fit only one parameter anyway (ns) so here that doesn't really matter, if we put everything in the LLH.
Things just don't have to be that flexible.

In [None]:
# Snippet to plot lnLLH ratio and gradient next each other
def plot_llh(ns, lnllh, lnllh_grad, ns_max, xmin, xmax, lw=2):
    fig, (al, ar) = plt.subplots(1, 2, figsize=(10, 4))
    al.plot(ns, lnllh, lw=lw)
    al.set_xlim(xmin, xmax)
    if np.amax(lnllh) == 0.:
        al.set_ylim(np.amin(lnllh), 1)
    else:
        al.set_ylim(0, 1.05 * np.amax(lnllh))
    al.axvline(ns_max, 0, 1, ls="--", lw=2, color="C7")
    al.set_title("LLH")

    ar.plot(ns, lnllh_grad, lw=lw)
    ar.axhline(0, 0, 1, ls="--", lw=2, color="C7")
    ar.axvline(ns_max, 0, 1, ls="--", lw=2, color="C7")
    al.set_xlim(xmin, xmax)
    ar.set_ylim(-5, 5)
    ar.set_title("LLH gradient in ns")
    fig.tight_layout()
    return fig, (al, ar)

names = ["t", "dt0", "dt1", "ra", "dec", "w_theo"]
rndgen = np.random.RandomState(7353)

bounds = [[0, None]]

#### Analytic ns best fit and TS for nevts = [0, 1, 2].

Compare the analytic best fits with the ones from the fitter (here just scanned LLH) just to be sure.

Just choose some upper number for nevts and see how the fitter and the scan are just the same.

In [None]:
def _get_best_fit(sob):
    """
    Copied from code, otherwise we'd have to setup sources again and wouldn't
    have direct control on the SoB values.
    """
    def _neglnllh(ns):
        lnllh, lnllh_grad = grbllh._lnllh_ratio(ns, sob)
        return -1. * lnllh, -1. * lnllh_grad

    # Get the best fit parameter and TS. Analytic cases are handled:
    # For nevts = [1 | 2] we get a [linear | quadratic] equation to solve.
    nevts = len(sob)
    # Test again, because we applied some threshold cuts
    if nevts == 0:
        return 0., 0.
    if nevts == 1:
        # Use scalar math functions, they're faster than numpy
        sob = sob[0]
        ns = 1. - (1. / sob)
        if ns <= 0:
            return 0., 0.
        else:
            TS = -ns + math.log(sob)
        return ns, 2. * TS
    elif nevts == 2:
        a = 1. / (sob[0] * sob[1])
        c = (sob[0] + sob[1]) * a
        ns = 1. - 0.5 * c + math.sqrt(c * c / 4. - a + 1.)
        if ns <= 0:
            return 0., 0.
        else:
            TS, _ = grbllh._lnllh_ratio(ns, sob)
        return ns, TS
    else:
        # Fit other cases
        res = sco.minimize(fun=_neglnllh, x0=[10], jac=True, bounds=bounds)

        return res.x[0], -1. * res.fun[0]

In [None]:
# Just use a sob value directly (see how ns gets more accurate with high sob)
_sob = 8.1

nevts = np.arange(0, 6 + 1)
nscan = 100

for i, ni in enumerate(nevts): 
    sob = np.array(ni * [_sob])
    ns = np.linspace(0, 1.5 * np.amax(nevts), nscan)
    TS = np.empty(nscan, dtype=np.float)
    grad = np.empty(nscan, dtype=np.float)
    for j, nsj in enumerate(ns):
        TS[j], grad_j = grbllh._lnllh_ratio(nsj, sob)
        grad[j] = grad_j[0]

    ns_bf = ns[np.argmax(TS)]
    _, ax = plot_llh(ns, TS, grad, ns_bf, ns[0], ns[-1])
    for axi in ax:
        axi.set_xlabel("ns")
        axi.axvline(_get_best_fit(sob)[0], 0, 1, ls="--", lw=3, color="C1")
    plt.suptitle("Number of events: {:d}. SoB = {:.1f}".format(ni, _sob))
    plt.show()

#### Single source

First we test if the module simply wrapps the LLH module correctly.
This should reproduce same results (not regarding random fluctuations of course) as in the section ln-llh ratio, as we test the same setup as above here.

In [None]:
# Make up some setup
nsrcs = 1
src_t = np.random.choice(_exp["timeMJD"], size=nsrcs)
dt = np.array([-20, 200])

# Expected background with rate 5mHz, kind of realistic.
# Increase nb scale to see ns best fit shrink.
scale = 1e5
nb = 0.005 * np.diff(dt) * scale
src_ra = np.deg2rad([180])  # Arbitrarily placed single source
src_dec = np.deg2rad([10])
src_w_theo = np.ones_like(src_dec)

# Setup src record array
srcs = np.vstack((src_t, [dt[0]], [dt[1]],
                  [src_ra], [src_dec], src_w_theo))
names = ["t", "dt0", "dt1", "ra", "dec", "w_theo"]
srcs = np.core.records.fromarrays(srcs, names=names,
                                       formats=len(names) * ["float64"])
args = {"nb": nb, "srcs": srcs}

# Set the events artificially where the srcs are in space and nicely spaced
# times inside the search window, where time sob is large. Otherwise the llh
# is almost always peaked at 0
N = 10
mint, maxt = src_t + dt / secinday  # In MJD
timeMJD = np.linspace(mint, maxt, N)
X = np.random.choice(_exp, size=N)  # Only to copy the recarray structure
X["timeMJD"] = timeMJD
X["ra"] = np.ones_like(timeMJD) * src_ra
X["sinDec"] = np.ones_like(timeMJD) * np.sin(src_dec)
X["sigma"] = np.deg2rad(np.ones_like(timeMJD))

# Scan a single LLH for the chosen data above
n_ns = 500
xmin, xmax = 0, 2 * N
ns = np.linspace(xmin, xmax, n_ns)
lnllh = np.empty(n_ns)
lnllh_grad = np.empty(n_ns)
for i in range(n_ns):
    lnllh[i], lnllh_grad[i] = grbllh.lnllh_ratio(X, ns[i], args)

# Manual "fit" by scanning the maximum
ns_max = ns[np.argmax(lnllh)]

_, ax = plot_llh(ns, lnllh, lnllh_grad, ns_max, xmin, xmax)
ax[0].set_title("LLH. ns best fit {:.3f}".format(ns_max))
plt.show()

In [None]:
# Also let's quickly see, how the times are distributed within the time PDF
inj_trange = grbllh.time_pdf_def_range(src_t=srcs["t"], dt=dt)
inj_trange = src_t + inj_trange.flatten() / secinday

x = np.linspace(inj_trange[0], inj_trange[1], 100)
y = grbllh._soverb_time(t=x, src_t=srcs["t"], dt=dt)

plt.plot(x, y.reshape(len(x)))
plt.vlines(X["timeMJD"], 0, np.amax(y), colors="C7", linestyles="--", lw=2)
plt.show()

Test if we can do the same as above, but using the scipy fitter this time to get the maximum.
The LLH curve and the maximum should be identical to the ones above (except for small errors in scanning vs fitting ns).

In [None]:
# Seed for the fitter
ns0 = 1

ns_bf, TS = grbllh.fit_lnllh_ratio(X, ns0, args, bounds, {})

_, ax = plot_llh(ns, lnllh, lnllh_grad, ns_bf, xmin, xmax)
ax[0].set_title("LLH. ns best fit {:.3f}".format(ns_bf))
plt.show()

#### Multiple Sources -- All at same position

In [None]:
# Repeat sources exactly as the single one from above
nsrcs = 5
_src_t = np.repeat(src_t, repeats=nsrcs, axis=0)
_dt = np.repeat(dt.reshape(1, 2), axis=0, repeats=nsrcs)
# Attention here: 100% overlapping windows so total BG is unchanged. To work
# in the stacking framework, we just split the expectation equally

# Increase nb scale to see ns best fit shrink
scale = 1e5
nb = 0.005 * np.diff(_dt, axis=1).flatten() / nsrcs  * scale

_src_ra = np.repeat(src_ra, repeats=nsrcs, axis=0)
_src_dec = np.repeat(src_dec, repeats=nsrcs, axis=0)
src_w_theo = np.ones_like(_src_dec)

# Setup src record array
srcs = np.vstack((_src_t, _dt[:, 0], _dt[:, 1], _src_ra, _src_dec, src_w_theo))

srcs = np.core.records.fromarrays(srcs, names=names,
                                  formats=len(names) * ["float64"])
args = {"nb": nb, "srcs": srcs}

# Scan a single LLH for the chosen data above
n_ns = 500
xmin, xmax = 0, 2 * N
ns = np.linspace(xmin, xmax, n_ns)
lnllh = np.empty(n_ns)
lnllh_grad = np.empty(n_ns)
for i in range(n_ns):
    lnllh[i], lnllh_grad[i] = grbllh.lnllh_ratio(X, ns[i], args)

# Manual "fit" by scanning the maximum
ns_max = ns[np.argmax(lnllh)]

_, ax = plot_llh(ns, lnllh, lnllh_grad, ns_max, xmin, xmax)
ax[0].set_title("LLH. ns best fit {:.3f}".format(ns_max))
plt.show()

Again check using the class function

In [None]:
# Seed for the fitter
ns0 = 1

ns_bf, TS = grbllh.fit_lnllh_ratio(X, ns0, args, bounds, {})

_, ax = plot_llh(ns, lnllh, lnllh_grad, ns_bf, xmin, xmax)
ax[0].set_title("LLH. ns best fit {:.3f}".format(ns_bf))
plt.show()

#### Multiple Sources -- Different Right-Ascencions

In [None]:
# Repeat sources exactly as the single one from above
nsrcs = 5
_src_t = np.repeat(src_t, repeats=nsrcs, axis=0)
_dt = np.repeat(dt.reshape(1, 2), axis=0, repeats=nsrcs)

# Windows don't overlap anymore, so use full BG for each window
# Increase nb scale to see ns best fit shrink
scale = 1e5
nb = 0.005 * np.diff(_dt) * scale

# Do not let windows overlap
src_ra = np.deg2rad(np.linspace(0, 2 * np.pi, nsrcs + 1)[:-1])
_src_dec = np.repeat(src_dec, repeats=nsrcs, axis=0)
src_w_theo = np.ones_like(_src_dec)

# Setup src record array
srcs = np.vstack((_src_t, _dt[:, 0], _dt[:, 1], src_ra, _src_dec, src_w_theo))

srcs = np.core.records.fromarrays(srcs, names=names,
                                       formats=len(names) * ["float64"])
args = {"nb": nb, "srcs": srcs}

# We used 5 srcs and 10 events, so we just repeat the ras once
# This is not very obvious on how to scale to arbirary Ns and nsrcs
# I'm not very sure here, how many events to inject to exactly match the cases
# above.
# Here we just have 2 evts per window and still have ns of 10, even though
# signal should get donwweighted to 1/5 of the two cases above per source.
X["ra"] = np.repeat(src_ra, repeats=2)

# Scan a single LLH for the chosen data above
n_ns = 500
xmin, xmax = 0, 2 * N
ns = np.linspace(xmin, xmax, n_ns)
lnllh = np.empty(n_ns)
lnllh_grad = np.empty(n_ns)
for i in range(n_ns):
    lnllh[i], lnllh_grad[i] = grbllh.lnllh_ratio(X, ns[i], args)

# Manual "fit" by scanning the maximum
ns_max = ns[np.argmax(lnllh)]

_, ax = plot_llh(ns, lnllh, lnllh_grad, ns_max, xmin, xmax)
ax[0].set_title("LLH. ns best fit {:.3f}".format(ns_max))
plt.show()

Again check with the class function

In [None]:
# Seed for the fitter
ns0 = 1

ns_bf, TS = grbllh.fit_lnllh_ratio(X, ns0, args, bounds, {})

_, ax = plot_llh(ns, lnllh, lnllh_grad, ns_bf, xmin, xmax)
ax[0].set_title("LLH. ns best fit {:.3f}".format(ns_bf))
plt.show()

## Signal Injector

In [None]:
# Define a simple stripped version of injector for single method tests
dec = np.deg2rad([-60., -30., 0, 30., 60.])
sin_dec = np.sin(dec)
srcs = np.core.records.fromarrays([dec], names="dec")

# Only set the src params we need here manually
sig_inj = SigInj.SignalInjector(gamma=2., mode="band",
                               inj_width=np.deg2rad(5.))
sig_inj._srcs = srcs
sig_inj._nsrcs = len(srcs)

### Injection bands and omega calculation

#### Mode "band"

In [None]:
sig_inj._inj_width = np.deg2rad(5.)
sig_inj._mode = "band"
sig_inj._set_solid_angle()

_min_dec = sig_inj._min_dec
_max_dec = sig_inj._max_dec
_omega = sig_inj._omega

fig, (axl, axr) = plt.subplots(1, 2, figsize=(10, 4))

# Dec
axl.hlines(dec, 0, 2 * np.pi, linestyles="-", colors="C1")
axl.hlines(_min_dec, 0, 2 * np.pi, linestyles="--", colors="#353132")
axl.hlines(_max_dec, 0, 2 * np.pi, linestyles="--", colors="#353132")

_x = np.array([0, 2 * np.pi])
for mind, maxd in zip(_min_dec, _max_dec):
    axl.fill_between(_x, [mind, mind], [maxd, maxd], color="C7", alpha=.25)

axl.set_xlim(0, 2 * np.pi)
axl.set_ylim(-np.pi / 2., np.pi / 2.)
axl.set_title("dec bands")

# Sindec
axr.hlines(np.sin(dec), 0, 2 * np.pi, linestyles="-", colors="C1")
axr.hlines(np.sin(_min_dec), 0, 2 * np.pi, linestyles="--", colors="#353132")
axr.hlines(np.sin(_max_dec), 0, 2 * np.pi, linestyles="--", colors="#353132")

_x = np.array([0, 2 * np.pi])
for mind, maxd in zip(np.sin(_min_dec), np.sin(_max_dec)):
    axr.fill_between(_x, [mind, mind], [maxd, maxd], color="C7", alpha=.25)

axr.set_xlim(0, 2 * np.pi)
axr.set_ylim(-1., 1.)
axr.set_title("sin dec bands")

plt.tight_layout()
plt.show()

#### Mode "circle"

In [None]:
# Only for printing the values below, plot is not able to test correctness
r = np.deg2rad(15.)
sig_inj._mode = "circle"
sig_inj._inj_width = r
sig_inj._set_solid_angle()
_omega = sig_inj._omega

# Now make the circles with wrapping
fig, ax = plt.subplots(1, 1)
ra = np.linspace(0, 2. * np.pi, len(dec))
t = np.linspace(0, 2. * np.pi, 100)
for deci, rai in zip(dec, ra):    
    x = r * np.cos(t) + rai
    y = r * np.sin(t) + deci
    
    ax.hlines(dec, 0, 2 * np.pi, linestyles="--", colors="C1")
    ax.hlines(dec - r, 0, 2 * np.pi, linestyles="--", colors="#353132")
    ax.hlines(dec + r, 0, 2 * np.pi, linestyles="--", colors="#353132")
    
    hlp.circle_on_skymap(rai, deci, r, ax, flat=True, color="C0", lw=3)

ax.set_xlim(0, 2 * np.pi)
ax.set_ylim(-np.pi / 2., np.pi / 2.)
ax.set_title("circles around sources")

plt.tight_layout()
plt.show()

print("Omegas are\n", _omega)
print("  in percent of the sky\n", _omega / 4. / np.pi * 100, "%")
test = 2 * np.pi * (1. - np.cos(r))
print("Omegas should be\n", test)
print("  in percent of the sky\n", test / 4. / np.pi * 100, "%")

#### Sun and Moon

Test sun, moon and obvious cases of half and full sphere.
Sun and moon from https://en.wikipedia.org/wiki/Solid_angle#Sun_and_Moon.

In [None]:
sig_inj._mode = "circle"

print("")
sun = 9.35e-3 / 2.
sig_inj._inj_width = sun
sig_inj._set_solid_angle()
print("Sun has ", sig_inj._omega[0])
print("Wiki says 6.87×10−5 sr")
moon = 9.22e-3 / 2.
sig_inj._inj_width = moon
sig_inj._set_solid_angle()
print("Moon has ", sig_inj._omega[0])
print("Wiki says 6.67×10−5 sr")

# Half and full sphere
half = np.pi / 2.
full = np.pi
sig_inj._inj_width = half
sig_inj._set_solid_angle()
print("Half sphere 4pi", sig_inj._omega[0] / 4. / np.pi)
sig_inj._inj_width = full
sig_inj._set_solid_angle()
print("Full sphere in 4pi", sig_inj._omega[0] / 4. / np.pi)

### Time signal sampling

#### Time sampling - Check multiple window sampling

In [None]:
# Make some src times and tranges (repeated to get multiple times in there)
dt = 100. * np.vstack((np.arange(0, 39, 4), np.arange(1, 40, 4))).T
src_t = np.repeat(50000. + 100 * np.arange(0, len(dt)), 2)
dt = np.repeat(dt, 2, axis=0)
rndgen = check_random_state(3537)

times = sig_inj._sample_times(src_t, dt)

print("dt.T\n", dt.T)
print("src_t\n", src_t)
print("sampled times\n", times)

#### Time sampling - Check against time signal pdf

**Attention:** Signal is sampled in the uniform region only.
Signal PDF is defined only in unifrom region of the signal PDF.
So the normalized heights don't really match because the gaussian edges steal normalization.

This is OK, as the time PDF has no real seperation power.
It acts more like a theta function, cutting out the region around a source.
Only if we would assume, that the signal PDF had a different shape from uniform (and thus from the BG PDF) we would have real seperation power in subregions of the on time intervall.

In [None]:
# Make some src times and tranges (repeated to get multiple times in there)
dt = 100. * np.vstack((np.arange(0, 39, 4), np.arange(1, 40, 4))).T
src_t = np.repeat(50000. + 100 * np.arange(0, len(dt)), 2)
dt = np.repeat(dt, 2, axis=0)
rndgen = check_random_state(3537)

# Arbitrary start date from data
nsrcs = 1
t0 = 50000.
t0_sec = t0 * secinday

# dt from t0 in seconds, clip at 4 sigma
dt = 200
nsig = 4.

# Make t values for plotting in MJD around t0
clip = np.clip(dt, 2, 30) * nsig
trange = np.array([-clip, dt + clip]).reshape(nsrcs, 2)
ntrials = int(1e4)

_src_t = np.repeat(t0, ntrials)
_dt = np.repeat([[0., dt]], ntrials, axis=0)
trials = sig_inj._sample_times(_src_t, _dt)

# Plot them in together with the PDFs
def time_bg_pdf(t, t0, a, b):
    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
  
    pdf = np.zeros_like(_t, dtype=np.float)
    uni = (_t >= a) & (_t <= b)
    pdf[uni] = 1. / (b - a)
    return pdf

def time_sig_pdf(t, t0, dt, nsig=4):
    if dt < 0:
        raise ValueError("dt must not be negative.")

    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
    
    # Constrain sig_t to [2, 30]s regardless of uniform time window
    sig_t = np.clip(dt, 2, 30)
    sig_t_clip = nsig * sig_t
    gaus_norm = (np.sqrt(2 * np.pi) * sig_t)
    
    # Split in def regions gaus rising, uniform, gaus falling and zero
    gr = (_t < 0) & (_t >= -sig_t_clip)
    gf = (_t > dt) & (_t <= dt + sig_t_clip)
    uni = (_t >= 0) & (_t <= dt)
    
    pdf = np.zeros_like(t, dtype=np.float)
    pdf[gr] = scs.norm.pdf(_t[gr], loc=0, scale=sig_t)
    pdf[gf] = scs.norm.pdf(_t[gf], loc=dt, scale=sig_t)
    # Connect smoothly with the gaussians
    pdf[uni] = 1. / gaus_norm
    
    # Normalize whole distribtuion
    dcdf = (scs.norm.cdf(dt + sig_t_clip, loc=dt, scale=sig_t) -
            scs.norm.cdf(-sig_t_clip, loc=0., scale=sig_t))
    norm = dcdf + dt / gaus_norm
    
    return pdf / norm


# Plot the pdfs
t = np.linspace(t0_sec + trange[:, 0], t0_sec + trange[:, 1], 200) / secinday
bg_pdf = time_bg_pdf(t, t0, -clip, dt + clip)
sig_pdf = time_sig_pdf(t, t0, dt, nsig)

# Plot in normalized time
_t = t * secinday - t0 * secinday
plt.plot(_t, bg_pdf, "C0-")
plt.plot(_t, sig_pdf, "C1-")
plt.axvline(dt, 0, 1, color="C3", ls="--")
plt.axvline(0, 0, 1, color="C2", ls="--")

# Plot injected events from all trials, relative times
times = (trials - t0) * secinday
_ = plt.hist(times, bins=50, normed=True, color=dg, alpha=.25)

plt.xlabel("Time relative to t0 in sec")
plt.ylim(0, None);
plt.tight_layout()

# plt.savefig("./data/figs/signal_events_time_sampled.png", dpi=200)

plt.show()

### Compare to mhuber raw_flux and event selection

**We need to change the kernel to Python 2 here, to load skylab**

No modifications have been done to skylab code.

The conversion from flux to fluence is simply the lack of livetime in the weights.
So we obtain skylab fluence by just repeating the steps from the code, but dropping the multiplication with the livetime.

The event selection is not affected by the livetime, so we don't have to modify the code itself.

In [None]:
nsrc = 5
src_ra = np.linspace(0, 2. * np.pi, nsrc)
src_dec = np.deg2rad([-60., -30., 0, 30., 60.])
src_t = np.linspace(50000, 50300, nsrc)
dt0 = np.zeros(nsrc, dtype=np.float)
dt1 = np.zeros(nsrc, dtype=np.float) + 200.
w_theo = np.ones(nsrc)
src_names = ["ra", "dec", "t", "dt0", "dt1", "w_theo"]
srcs = np.core.records.fromarrays(np.vstack((src_ra, src_dec,src_t,
                                             dt0, dt1, w_theo)),
                                  names=src_names)

# Only set the src params we need here manually
sig_inj = SigInj.SignalInjector(gamma=2., mode="band",
                               inj_width=np.arcsin(0.1))

# Simulate skylab's band selection (debug flag only)
sig_inj._skylab_band = True
# Different sample sizes
mc_dict = {0: mc, 1: mc[::10]}
livetime = {0: 340., 1: 34.}
sig_inj.fit(srcs, mc_dict, exp.dtype.names)

print("\nRaw Fluence: {}".format(sig_inj._raw_fluence))

In [None]:
print(sig_inj)

In [None]:
import sys
sys.path.insert(1, "/Users/tmenne/icecube/software/skylab-mhuber")

from skylab.ps_injector import StackingPointSourceInjector as psinj
inj = psinj(gamma=2.)
inj.fill(src_dec, mc_dict, livetime)

ow = np.empty(0, dtype=np.float)
omega = (inj._omega / inj.w_theo)[inj.mc_arr["src_idx"]]
print("")
for enum in mc_dict.keys():
    idx = inj.mc_arr[inj.mc_arr["enum"] == enum]["idx"]
    _ow = (inj.mc[enum]["ow"] * inj.mc[enum]["trueE"]**(-inj.gamma))[idx]
    _ow /= omega[inj.mc_arr["enum"] == enum][idx]
    ow = np.append(ow, _ow)
    print("Raw Fluence at {} : {}".format(enum, np.sum(_ow)))

print("\nRaw Fluence: {}".format(np.sum(ow)))

In [None]:
# Are both raw_fluences and number of events equal?
print("Selected events equal : {}".format(len(sig_inj.mc_arr) ==
                                          len(inj.mc_arr)))
print("Raw fluences equal    : {}".format(np.allclose(sig_inj._raw_fluence,
                                                      np.sum(ow))))

### Fill Events - Plot positions

#### First: Band mode

In [None]:
src_dec = np.deg2rad([-60., -30., 0, 30., 60.])
nsrc = len(src_dec)
src_ra = np.linspace(0, 2. * np.pi, nsrc)
src_t = np.linspace(50000, 50300, nsrc)
dt0 = np.zeros(nsrc, dtype=np.float)
dt1 = np.zeros(nsrc, dtype=np.float) + 200.
w_theo = np.ones(nsrc)
src_names = ["ra", "dec", "t", "dt0", "dt1", "w_theo"]
srcs = np.core.records.fromarrays(np.vstack((src_ra, src_dec,src_t,
                                             dt0, dt1, w_theo)),
                                  names=src_names)

# Only set the src params we need here manually
bandwidth = 0.1
gamma = 2.
sig_inj = SigInj.SignalInjector(gamma=gamma, mode="band",
                               inj_width=np.arcsin(bandwidth))
skip = 50  # Just that it plots faster
sig_inj.fit(srcs, {0: mc[::skip], 1:mc[::skip]}, exp.dtype.names)


fig, (axl, axr) = plt.subplots(1, 2, figsize=(12, 5))

# Plot each src in a different color
cmap = plt.cm.get_cmap("viridis", nsrc + 2)
colors = cmap.colors[1:-1]
bins = np.linspace(-1., 1., 101)

# Plot all MC: Selected parts must be equally filles
weights = mc["ow"] * mc["trueE"]**(-sig_inj.gamma)
h, b, _ = axl.hist(np.sin(mc[::skip]["trueDec"]), bins=bins,
                   weights=weights[::skip], alpha=0.2, color="C7")

for i in range(nsrc):
    # Get all events from all samples
    for enum, mc_i in sig_inj._MC.items():
        _enum = sig_inj.mc_arr[sig_inj.mc_arr["enum"] == enum]
        _src_m = _enum["src_idx"] == i
        _ev_idx = _enum["ev_idx"]

        _mc_i = mc_i[_ev_idx][_src_m]
        weights = _mc_i["ow"] * _mc_i["trueE"]**(-sig_inj.gamma)
        axl.hist(np.sin(_mc_i["trueDec"]), bins=bins, weights=weights,
                 color=colors[i])
        axr.scatter(_mc_i["trueRa"], _mc_i["trueDec"], c=colors[i])

    axl.axvline(np.sin(src_dec[i]), 0, 1, color="k", ls="--")
    axl.axvline(np.sin(sig_inj._min_dec[i]), 0, 1, color="k")
    axl.axvline(np.sin(sig_inj._max_dec[i]), 0, 1, color="k")
    axr.axhline(src_dec[i], 0, 1, color="k", ls="--")
    axr.axhline(sig_inj._min_dec[i], 0, 1, color="k")
    axr.axhline(sig_inj._max_dec[i], 0, 1, color="k")

# Plot outline to show that nothing is hidden
m = 0.5 * (b[:-1] + b[1:])
axl.hist(m, bins=bins, weights=h, color="k", histtype="step", linewidth=2)
    
axl.set_xlim(-1., 1.)
axr.set_xlim(0., 2. * np.pi)
axr.set_ylim(-np.pi/2., np.pi/2.)

axl.set_xlabel("sin dec")
axr.set_xlabel("right-ascension")
axr.set_ylabel("dec")

axl.set_title("gamma = {:.2f}".format(gamma))

fig.tight_layout()
# fig.savefig("data/figs/sig_inj_ev_selection_bands.png", dpi=200)
plt.show()

#### Second: Circle mode

In [None]:
nsrc = 5
src_ra = np.linspace(0, 2. * np.pi, nsrc)
src_dec = np.deg2rad([-80., -30., 0, 30., 80.])
src_t = np.linspace(50000, 50300, nsrc)
dt0 = np.zeros(nsrc, dtype=np.float)
dt1 = np.zeros(nsrc, dtype=np.float) + 200.
w_theo = np.ones(nsrc)
src_names = ["ra", "dec", "t", "dt0", "dt1", "w_theo"]
srcs = np.core.records.fromarrays(np.vstack((src_ra, src_dec,src_t,
                                             dt0, dt1, w_theo)),
                                  names=src_names)

# Only set the src params we need here manually
inj_width = np.deg2rad(10)
gamma = 2.
sig_inj = SigInj.SignalInjector(gamma=gamma, mode="circle",
                               inj_width=inj_width)
skip = 50  # Just that it goes faster
sig_inj.fit(srcs, {0: mc[::skip], 1:mc[::skip]}, exp.dtype.names)


fig, (axl, axr) = plt.subplots(1, 2, figsize=(12, 5))

cmap = plt.cm.get_cmap("viridis", nsrc + 2)
colors = cmap.colors[1:-1]

# Plot all MC: Selected parts must be equally filles
weights = mc["ow"] * mc["trueE"]**(-sig_inj.gamma)
h, b, _ = axl.hist(np.sin(mc[::skip]["trueDec"]), bins=bins, log=True,
                   weights=weights[::skip], alpha=0.2, color="C7")

# Plot each src in a different color
for i in range(nsrc):
    # Get all events from all samples
    for enum, mc_i in sig_inj._MC.items():
        _enum = sig_inj.mc_arr[sig_inj.mc_arr["enum"] == enum]
        _src_m = _enum["src_idx"] == i
        _ev_idx = _enum["ev_idx"]

        _mc_i = mc_i[_ev_idx][_src_m]
        axr.scatter(_mc_i["trueRa"], _mc_i["trueDec"], c=colors[i])
        
        _mc_i = mc_i[_ev_idx][_src_m]
        weights = _mc_i["ow"] * _mc_i["trueE"]**(-sig_inj.gamma)
        axl.hist(np.sin(_mc_i["trueDec"]), bins=bins, weights=weights,
                 log=True, color=colors[i])
        axr.scatter(_mc_i["trueRa"], _mc_i["trueDec"], c=colors[i])

    axl.axvline(np.sin(src_dec[i]), 0, 1, color="k", ls="--")
    axl.axvline(np.sin(src_dec[i] - inj_width), 0, 1, color="k")
    axl.axvline(np.sin(src_dec[i] + inj_width), 0, 1, color="k")
        
    axr.axhline(max(-np.pi / 2., srcs["dec"][i] - sig_inj.inj_width),
               0, 1, color="C7", ls="--")
    axr.axhline(min(np.pi / 2., srcs["dec"][i] + sig_inj.inj_width),
               0, 1, color="C7", ls="--")
    
    hlp.circle_on_skymap(srcs["ra"][i], srcs["dec"][i], inj_width, axr,
                         flat=True, color="k", ls="-", marker="")  
       
axr.scatter(src_ra, src_dec, color="k", marker="o", edgecolor="k",
           facecolor="w")

m = 0.5 * (b[:-1] + b[1:])
axl.hist(m, bins=bins, weights=h, color="k", histtype="step",
         linewidth=2, log=True)

axl.set_xlim(-1., 1.)
axr.set_xlim(0., 2. * np.pi)
axr.set_ylim(-np.pi/2., np.pi/2.)

axl.set_xlabel("sin dec")
axr.set_xlabel("right-ascension")
axr.set_ylabel("dec")
axl.set_title("gamma = {:.2f}".format(gamma))

fig.tight_layout()
# fig.savefig("data/figs/sig_inj_ev_selection_circle.png", dpi=200)
plt.show()

In [None]:
nsrc = 5
src_ra = np.linspace(0, 2. * np.pi, nsrc)
np.random.shuffle(src_ra)
src_dec = np.deg2rad([-60., -30., 0, 30., 60.])
src_t = np.linspace(50000, 50300, nsrc)
dt0 = np.zeros(nsrc, dtype=np.float)
dt1 = np.zeros(nsrc, dtype=np.float) + 200.
w_theo = np.ones(nsrc)
src_names = ["ra", "dec", "t", "dt0", "dt1", "w_theo"]
srcs = np.core.records.fromarrays(np.vstack((src_ra, src_dec,src_t,
                                             dt0, dt1, w_theo)),
                                  names=src_names)

# Only set the src params we need here manually
inj_width = np.deg2rad(11)
gamma = 2.
sig_inj = SigInj.SignalInjector(gamma=gamma, mode="circle",
                               inj_width=inj_width)
skip = 50  # Just that it goes faster
sig_inj.fit(srcs, {0: mc[::skip], 1:mc[::skip]}, exp.dtype.names)


sm = amp_plt.skymap()
fig, ax = sm.figure(tex=False)

cmap = plt.cm.get_cmap("viridis", nsrc + 2)
colors = cmap.colors[1:-1]

# Plot each src in a different color
for i in range(nsrc):
    # Get all events from all samples
    for enum, mc_i in sig_inj._MC.items():
        _enum = sig_inj.mc_arr[sig_inj.mc_arr["enum"] == enum]
        _src_m = _enum["src_idx"] == i
        _ev_idx = _enum["ev_idx"]

        _mc_i = mc_i[_ev_idx][_src_m]
        x, y = amp_plt.EquCoordsToMapCoords(_mc_i["trueRa"],
                                            _mc_i["trueDec"])
        ax.scatter(x, y, c=colors[i])

    hlp.circle_on_skymap(srcs["ra"][i], srcs["dec"][i], inj_width, ax,
                         flat=False, color="k", ls="-", marker="")  

x, y = amp_plt.EquCoordsToMapCoords(src_ra, src_dec)
ax.scatter(x, y, color="k", marker="o", edgecolor="k",
           facecolor="w")

ax.set_title("gamma = {:.2f}".format(gamma))

# fig.savefig("data/figs/sig_inj_ev_selection_circle_skymap.png", dpi=200)
    
fig.tight_layout()
plt.show()

### Inject events

In [None]:
src_dec = np.deg2rad([-80., -30., 0, 30., 80.])
nsrcs = len(src_dec)
src_ra = np.linspace(0, 2. * np.pi, nsrcs)
src_t = np.linspace(50000, 50300, nsrcs)
# Use start = 0 only, plotter can't handle negative start times
dt = np.vstack((np.repeat([0.], nsrcs), 100. * np.arange(1, nsrcs + 1))).T
dt0 = dt[:, 0]
dt1 = dt[:, 1]
w_theo = np.ones(nsrcs)
src_names = ["ra", "dec", "t", "dt0", "dt1", "w_theo"]
srcs = np.core.records.fromarrays(np.vstack((src_ra, src_dec,src_t,
                                             dt0, dt1, w_theo)),
                                  names=src_names)

# Only set the src params we need here manually
gamma = 2.
mode = "band"  # Change here to switch circle vs. band mode below
if mode == "band":
    inj_width = np.deg2rad(5.)
elif mode == "circle":
    inj_width = np.deg2rad(10.)
sig_inj = SigInj.SignalInjector(gamma=gamma, mode=mode,
                               inj_width=inj_width)
sig_inj.fit(srcs, mc, exp.dtype.names)

ngen = 10000
gen = sig_inj.sample(ngen, poisson=False)

#### Spatial - Compare using band and circle with the same bandwidth

In circle it lok like we selected way more events than in band mode, but we have way more MC than we select.
So we just select the same amount of evts, but in a much denser region so the plot looks crowded in the circle case.

In [None]:
# Extract injected events per source
ninj, inj, idx = next(gen)

evts_per_src = []
trueRa_per_src = []
trueDec_per_src = []
for j in range(nsrcs):
    per_src_ev_ids = idx[idx["src_idx"] == j]["ev_idx"]
    trueRa_per_src.append(sig_inj._MC[-1][per_src_ev_ids]["trueRa"])
    trueDec_per_src.append(sig_inj._MC[-1][per_src_ev_ids]["trueDec"])
    evts_per_src.append(inj[idx["src_idx"] == j])
    
print("[true] Injected per src: ", list(map(len, trueRa_per_src)))
print("[meas] Injected per src: ", list(map(len, evts_per_src)))

In [None]:
cmap = plt.cm.get_cmap("viridis", nsrcs + 2)
colors = cmap.colors[1:-1]

fig, (axl, axr) = plt.subplots(1, 2, figsize=(12, 5))

if mode == "band":
    _min_dec = sig_inj._min_dec
    _max_dec = sig_inj._max_dec
elif mode == "circle":
    _min_dec = src_dec - np.repeat(sig_inj.inj_width, nsrcs)
    _max_dec = src_dec + np.repeat(sig_inj.inj_width, nsrcs)
else:
    raise ValueError("Choose 'band' or 'circle'.")
    

for i in range(nsrcs):
    trueRa = trueRa_per_src[i]
    trueDec = trueDec_per_src[i]
    _ev = evts_per_src[i]
    n_sel = len(_ev)
    
    # Rotate again to if all all truths are at the src positions
    ra3, dec3 = rotator(trueRa, trueDec,
                        np.repeat(src_ra[i], n_sel),
                        np.repeat(src_dec[i], n_sel),
                        trueRa, trueDec)
    
    axl.scatter(_ev["ra"], _ev["dec"], marker=".", color=colors[i], alpha=0.1)
    axr.scatter(trueRa, trueDec, marker=".", color=colors[i], alpha=0.1)

    axl.scatter(ra3, dec3, marker=".", color="w", edgecolor="k", s=100)
    axr.scatter(ra3, dec3, marker=".", color="w", edgecolor="k", s=100)
    
    
    axl.axhline(src_dec[i], 0, 1, color="k", ls="--")
    axl.axhline(_min_dec[i], 0, 1, color="k")
    axl.axhline(_max_dec[i], 0, 1, color="k")
    axr.axhline(src_dec[i], 0, 1, color="k", ls="--")
    axr.axhline(_min_dec[i], 0, 1, color="k")
    axr.axhline(_max_dec[i], 0, 1, color="k")
    
axl.set_xlabel("right-ascension")
axl.set_ylabel("dec")
axr.set_xlabel("right-ascension")
axr.set_ylabel("dec")
axl.set_title("Rotated measured and true positions")
axr.set_title("Rotated true and true positions")

fig.tight_layout()
# fig.savefig("data/figs/signal_events_radec_sampled_" +
#             "rotated_{}.png".format(mode), dpi=200)
plt.show()

In [None]:
def cos_dist(src_ra, src_dec, ev_ra, ev_dec):
    cos_dist = (np.cos(src_ra - ev_ra) *
                np.cos(src_dec) * np.cos(ev_dec) +
                np.sin(src_dec) * np.sin(ev_dec))

    return np.clip(cos_dist, -1., 1.)


fig, ax = plt.subplots(1, 1, figsize=(10, 5))

# Pool of MC event to select from
ra = sig_inj._MC[-1]["ra"]
dec = sig_inj._MC[-1]["dec"]
trueRa = sig_inj._MC[-1]["trueRa"]
trueDec = sig_inj._MC[-1]["trueDec"]
w = sig_inj._sample_w
dist = cos_dist(trueRa, trueDec, ra, dec)
_ = plt.hist(np.rad2deg(np.arccos(dist)), bins=100, weights=w,
             range=[0., 180.], log=True, label="All selected")

# All currently injected events
ev_idx = idx["ev_idx"]  # Select only injected from all
ra = ra[ev_idx]
dec = dec[ev_idx]
trueRa = trueRa[ev_idx]
trueDec = trueDec[ev_idx]
w = w[ev_idx]
dist = cos_dist(trueRa, trueDec, ra, dec)
_ = plt.hist(np.rad2deg(np.arccos(dist)), bins=100, weights=w,
             range=[0., 180.], log=True, label="Injected")
plt.title("Space angle distribution between true and measured direction." +
          " Currently injected: {} evts". format(ngen))

plt.xlabel("delta Psi in degree")
plt.ylabel("sum of sample weights per bin")
plt.legend(loc="upper right")
plt.tight_layout()
plt.savefig("data/figs/signal_events_delta_psi_comp_{}.png".format(mode,
                                                                   dpi=150))
plt.show()

#### Times

**Note:** The number of times per window (= events drawn per source) are distributed like the signal declination distribution, because we made up sources which are distributed linearly ascending in time and in declination, so they simply correlate.
It's a nice double check in plot.
But also compare to the histograms in the "Fill Event" cells above.

In [None]:
# Create the GRBLLH object but only for the time pdf
sin_dec_bins = np.linspace(-1, 1, 50)
logE_bins = np.linspace(1, 10, 40)

spatial_pdf_args = {"bins": sin_dec_bins, "k": 3, "kent": True}
energy_pdf_args = {"bins": [sin_dec_bins, logE_bins],
                   "gamma": 2., "fillval": "col", "interpol_log": False}
time_pdf_args = {"nsig": 4., "sigma_t_min": 2., "sigma_t_max": 30.}
llh_args = {"sob_rel_eps": 0., "sob_abs_eps": 1e-4}

grbllh = LLH.GRBLLH(X=_exp, MC=_mc,
                    spatial_pdf_args=spatial_pdf_args,
                    energy_pdf_args=energy_pdf_args,
                    time_pdf_args=time_pdf_args,
                    llh_args=llh_args)

In [None]:
# Extract injected events per source
ninj, inj, idx = next(gen)

times_per_src = []
for j in range(nsrcs):
    times_per_src.append(inj[idx["src_idx"] == j]["timeMJD"])
    
print("[true] Injected per src: ", list(map(len, times_per_src)))


# Now each times in the centered time frame in seconds together with the
# signal PDFs
vline_scale = 20.

nsig = 4.
clip = np.clip(np.diff(dt, axis=1), 2, 30) * nsig
trange = np.hstack((dt[:, [0]] - clip, dt[:, [1]] + clip))

for i, ti in enumerate(times_per_src):
    t_sec = ti * secinday - src_t[i] * secinday
    _t = np.linspace(trange[i, 0], trange[i, 1], 100)
    _t_mjd = _t / secinday + src_t[i]
#     _pdf = time_sig_pdf(_t_mjd, src_t[i],
#                         dt[i, 1] - dt[i, 0]).flatten()
    _pdf = grbllh._soverb_time(_t_mjd, src_t[i], dt[i]).ravel()
    if i == 0:
        _max = 1.05 * np.amax(_pdf)
    # Small ticks for event positions below the 0 line
    plt.vlines(t_sec, -i * _max / vline_scale, -(i+1) * _max / vline_scale,
               linestyles="-", colors="C{:1d}".format(i))
    plt.vlines(dt[i], _max, -nsrcs * _max / vline_scale, linestyles=":",
               colors="C{:1d}".format(i))
    plt.plot(_t, _pdf, "C{:1d}-".format(i),
             label="{:d} evts".format(len(ti)))

plt.axhline(0, 0, 1, color="C7")
plt.xlim(1.1 * trange[1, 0], trange[-1, 1] - 0.1 * trange[-1, 0])
plt.ylim(-nsrcs * _max / vline_scale, _max)
plt.xlabel("time in sec, centered at src t0")
plt.title("Injected evts per window, total of {} signal evts.".format(ninj))
plt.legend(loc="upper right")
plt.tight_layout()
# plt.savefig("data/figs/signal_events_time_sampled_multi.png", dpi=200)
plt.show()

#### Check expected vs injected evts per src

We inject events and comnpare the fraction of events injected per source with the total src weight, which is detector acceptance (signal weighted) and intrinsic weight.
If we sample in a narrow band around each src we expect the fraction of sampled events to match the total source weights.

**Note:** Circle mode doesn't match when the radius is very small.
That is because we inject from less events and are very dominated by initial mc fluctuations, inserting only a tiny fraction of events.
If we increase the circle size (0.1 in sindec) we get similarly stable results as in band mode.

In [None]:
src_dec = np.deg2rad([-80.,- 60., -45., -30., -15., 0,
                      15., 30., 45., 60., 80.])
nsrcs = len(src_dec)
src_ra = np.linspace(0, 2. * np.pi, nsrcs)
src_t = np.linspace(50000, 50300, nsrcs)
# Use start = 0 only, plotter can't handle negative start times
dt = np.vstack((np.repeat([0.], nsrcs), 100. * np.arange(1, nsrcs + 1))).T
dt0 = dt[:, 0]
dt1 = dt[:, 1]

w_theo = np.ones(nsrcs)
# Try to see the deviation from the MC, but not from injected evts
# w_theo = np.arange(nsrcs, dtype=np.float) + 1.

src_names = ["ra", "dec", "t", "dt0", "dt1", "w_theo"]
srcs = np.core.records.fromarrays(np.vstack((src_ra, src_dec,src_t,
                                             dt0, dt1, w_theo)),
                                  names=src_names)
gamma = 1.8
mode = "band"
# Make it narrow so only events in close proximity to the srcs are selected
# so the fraction matches the  total src weight.
inj_width = np.arcsin(0.02)
sig_inj = SigInj.SignalInjector(gamma=gamma, mode=mode,
                               inj_width=inj_width)
sig_inj.fit(srcs, mc, exp.dtype.names)

ngen = 10000
gen = sig_inj.sample(ngen, poisson=False)

# Extract injected events per source
ninj, inj, idx = next(gen)

trueDec_per_src = []
for j in range(nsrcs):
    per_src_ev_ids = idx[idx["src_idx"] == j]["ev_idx"]
    trueDec_per_src.append(sig_inj._MC[-1][per_src_ev_ids]["dec"])
    
evts_per_src = list(map(len, trueDec_per_src))
print("[true] Injected per src: ", evts_per_src)

In [None]:
# Get src detector * theo weights
grbllh = LLH.GRBLLH(exp, mc,
                    spatial_pdf_args={"bins": np.linspace(-1., 1., 50)},
                    energy_pdf_args={"bins": np.vstack((
                    np.linspace(-1., 1., 50), np.linspace(1., 10., 50))),
                                    "gamma": sig_inj.gamma})

norm_src_w = grbllh.src_weights(srcs["dec"], src_w_theo=w_theo).ravel()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 5))
sin_dec = np.linspace(-1., 1., 201)
bins = np.linspace(-1., 1., 201)

# Normed src weights, sum=1
ax.plot(np.sin(srcs["dec"]), norm_src_w, ls="", marker="o", color="#353132",
        label="norm. src_w = acc_w * theo_w")

# Spline must be pseudo weighted to match the single weights (PMF style)
pseudo_norm = np.sum(np.exp(grbllh._spatial_signal_spl(np.sin(srcs["dec"]))))
y = np.exp(grbllh._spatial_signal_spl(sin_dec)) / pseudo_norm
ax.plot(sin_dec, y, color="#353132")

# Make MC hist (origin of spline)
weights = mc["ow"] * mc["trueE"]**(-sig_inj.gamma)
h, b = np.histogram(np.sin(mc["trueDec"]), bins=bins, weights=weights,
                    normed=True)
m = 0.5 * (b[:-1] + b[1:])
_ = ax.hist(m, bins=b, weights=h / pseudo_norm, alpha=0.2, color="C7")

# Make fraction of sampled events per band
frac = evts_per_src / np.sum(evts_per_src)
ax.plot(np.sin(srcs["dec"]), frac, ls="", marker="_",
        color="C1", mew=2, ms=10, label="Fraction of inj. events")
ax.vlines(np.sin(srcs["dec"]), np.zeros_like(frac), frac, zorder=5,
           color="C1", linestyle="-", lw=3)

ax.set_xlabel("sin_dec")
ax.set_ylabel("fraction of inj events")
ax.set_xlim(-1., 1.)
# ax.semilogy()
ax.legend()
ax.set_title("{} injected events, mode='{}'".format(ngen, mode))

# plt.savefig("./data/figs/sig_inj_src_w_vs_inj_fraction.png", dpi=150)
fig.tight_layout()
plt.show()

## Analysis

The analysis module grabs all the stuf from before and creates trial calculation from it.
So we test here, if everything wrapped up correctly and if we get OK looking test statistics from our trials.

A genreal note on how our experimental is handled:

Before we start out analysis, we split our data in off-time and on-time data.
On-time data is data around a a-priori fixed time frame around our sources we want to test.
We exclude this data until the very end, because we don't want to bias ourselfes as there is the possibility that the signal we want to find is in that on-time data.

The off-time data is everything else and is assumed to not contain the sought after signal.
The on-time time frame should be choosen large enough to account for that.
It should definitely be larger than the time frames we test for in our analysis.

### BG only Trials

Using all the modules from above we can run trials with pure background now.

A trial is a single pseudo experiment we perform and evaluate to get an idea of the underlying statistical distribtuion and to build our test statistic from which we can infer the significance of real data later.
For that we need to generate sets of pseudo-data which has background-like properties.
The `bg_injector`, `bg_rate_injector` and `rate_function` classes are used to generate these properties.

So each trial consist of the following steps, in which the source positions are always fixed and a-priori known:

1. Determine the expected number background events per source time window we test.
   This is derived from the `bg_rate_injector` which returns a list of sampled times for each time window.
   It knows the expected rate from the given `rate_function`
2. In addition to our sampled times, we need all the other event features we have on real data (positions, energy, uncertainty) , because our sampled pseudo-data should have the same properties as real data.
   These missing properties are generated by the `bg_injector` class.
3. When we sampled our pseudo-events we need to fit the LLH to this set of events and see what best fit we get.
4. We do that a lot of times and see how our best fits are distributed which gives us a so called test statistic which describes the distribution of LLH firs using BG only.

On background-like events we expect to get a null fit result most of the times, because no signal is present.
But out of chance, we sometimes get a combination of background-like events, that has very signal-like properties.

The so build test statistic is then used to see how unlikely the single fit to our on-time data was and how lucky we'd have to get to observe that result out of chance from pure background.

In [None]:
def _percentile_nzeros(vals, nzeros, q):
    """
    Returns the percentile q for a dataset with `vals` > 0 and `nzeros`
    entries that are zero.
    
    Parameters
    ----------
    vals : array-like
        Non-zero values.
    nzeros : int
        Number of zero trials.
    q : float
        Percentile in [0, 100].
    """
    q /= 100.
    nonzero = len(vals)
    ntot = nonzero + nzeros
    idx = int(q * ntot) - nzeros - 1
    vals = np.sort(vals)
        
    if idx < 0:
        return 0.
    else:
        return vals[idx]

In [None]:
def plot_trials(vals, ntrials, bins, ax=None,
                CHI2=True, GEN=False, SIGMA=True, **kwargs):
    if len(vals) == 0:
        raise ValueError("No values given. Maybe sample more trials.")

    def prop_in_1d_sigma(sigma):
        # To draw the sigma lines
        if sigma < 0:
            raise ValueError("'sigma' must be >= 0")
        return scs.norm.cdf(sigma) - scs.norm.cdf(-sigma)

    # Compare with generated chi2 with df=1 and same nzeros
    if GEN:
        vals = np.random.chisquare(df=1, size=len(res))
        fname = "gen_chi2_df=1"
    else:
        fname = "trials"
        
    if ax is None:
        _, ax = plt.subplots(1, 1)

    # Add nzeros to first bin manually
    nzeros = ntrials - len(vals)
    h, b = np.histogram(vals, bins=bins)
    h[0] += nzeros

    m = 0.5 * (b[:-1] + b[1:])
    _ = ax.hist(m, b, weights=h, normed=True, log=True, **kwargs)

    if CHI2:
        # Fit delta-chi2 PDF to TS
        (df, loc, scale) = scs.chi2.fit(vals, floc=0)
        chi2fit = scs.chi2(df, loc, scale)
        eta = len(vals) / ntrials
        x = np.linspace(0.1, bins[-1], 200)
        y = chi2fit.pdf(x) * eta
        ydf1 = scs.chi2.pdf(x, df=1, loc=0, scale=1) * eta

        # Plot fitted and dof=1 chi2 for comparison
        ax.plot(x, y, "C1-")
        ax.plot(x, ydf1, "C2--")

    # Plot sigmas with increasing shade
    if SIGMA:
        sigmas = np.arange(3, 6)
        sig_vals = [prop_in_1d_sigma(i) for i in sigmas]
        for sig, sigval in zip(sigmas, sig_vals):
            ax.axhline(1 - sigval, 0, 1, color=dg, ls="--",
                       label="{} sigma".format(sig),
                       alpha=(1 - 0.5 * (np.amax(sigmas) - sig) /
                              (np.amax(sigmas) - np.amin(sigmas))))
        ax.legend(loc="best")

    ax.set_ylabel("PDF")

    if CHI2:
        ax.set_title("Percent non-zero trials: " +
                     "{:.1f} (=eta). df={:.2f}, scale={:.2f}".format(
                      100 * len(vals) / ntrials, df, scale))
    else:
        ax.set_title("Percent non-zero trials: {:.1f} (=eta).".format(
            100 * len(vals) / ntrials))

    return ax

#### Setup

We're going through the full steps here:

1. Create bg_rate_injector with a specific rate_function.
2. Create a bg_injector injecting random data events.
3. Create LLH which is used to test our hypthesis.
4. Create some src hyptheses to test against.

In [None]:
rndgen = np.random.RandomState(7353)

In [None]:
# Create some srcs we want to test, with some different properties.
# We don't use randomness here to have full control
nsrcs = 5
dt = np.vstack((np.repeat([-20.], nsrcs), 100. * np.arange(1, nsrcs + 1))).T

names = ["t", "dt0", "dt1", "ra", "dec", "w_theo"]
types = len(names) * [np.float]
dtype = [(_n, _t) for _n, _t in zip(names, types)]
srcs = np.empty((nsrcs, ), dtype=dtype)

# Choose times equally spaced, but away from borders
mint, maxt = np.amin(_exp["timeMJD"]), np.amax(_exp["timeMJD"])
srcs["t"] = np.linspace(mint, maxt, nsrcs + 2)[1:-1]

srcs["dt0"] = dt[:, 0]
srcs["dt1"] = dt[:, 1]

# Don't let them overlap by choosing 0 and 2pi
srcs["ra"] = np.linspace(0, 2 * np.pi, nsrcs + 1)[:-1]

# Same as with time here, do not select directly at poles
srcs["dec"] = np.arcsin(np.linspace(-1, 1, nsrcs + 2)[1:-1])

# These are just ones, they shouldn't cause problems
srcs["w_theo"] = np.ones(nsrcs, dtype=np.float)

print("t   : " + ", ".join("{:.2f}".format(_t) for _t in srcs["t"]))
print("dt0 : ", srcs["dt0"])
print("dt1 : ", srcs["dt1"])
print("RA  : " + ", ".join("{:.2f}".format(_ra) for _ra
                           in np.rad2deg(srcs["ra"])))
print("DEC : " + ", ".join("{:.2f}".format(_dec) for _dec
                           in np.rad2deg(srcs["dec"])))
print("wt  : ", srcs["w_theo"])

In [None]:
# Create the GRBLLH object with all the PDF settings
sin_dec_bins = np.linspace(-1, 1, 50)

min_logE = 1  #  min(np.amin(_exp["logE"]), np.amin(mc["logE"]))
max_logE = 10 #  max(np.amax(_exp["logE"]), np.amax(mc["logE"]))
logE_bins = np.linspace(min_logE, max_logE, 40)

spatial_pdf_args = {"bins": sin_dec_bins, "k": 3, "kent": True}

energy_pdf_args = {"bins": [sin_dec_bins, logE_bins],
                   "gamma": 2., "fillval": "col", "interpol_log": False}

time_pdf_args = {"nsig": 4., "sigma_t_min": 2., "sigma_t_max": 30.}

llh_args = {"sob_rel_eps": 0., "sob_abs_eps": 1e-4}

grbllh = LLH.GRBLLH(X=_exp, MC=_mc,
                    spatial_pdf_args=spatial_pdf_args,
                    energy_pdf_args=energy_pdf_args,
                    time_pdf_args=time_pdf_args,
                    llh_args=llh_args)

print(grbllh)

In [None]:
# Create a bg rate injector model
def filter_runs(run):
    """
    Filter runs as stated in jfeintzig's doc.
    """
    exclude_runs = [120028, 120029, 120030, 120087, 120156, 120157]
    if ((run["good_i3"] == True) & (run["good_it"] == True) &
        (run["run"] not in exclude_runs)):
        return True
    else:
        return False

# Create an injector using a goodrun list, use a sinus rate function with
# fixed period of 1yr
runlist = "data/runlists/ic86-i-goodrunlist.json"

# Choose your rate function
RFUNC = "CONST"  # "SIN1YR", "CONST"
print("Choosing '{}' function".format(RFUNC))

if RFUNC == "SIN" or RFUNC == "SIN1YR":
    # Give fixed srs, to use caching
    t = srcs["t"]
    trange = grbllh.time_pdf_def_range(src_t=srcs["t"], dt=dt)
    if RFUNC == "SIN":
        rate_func_obj = RateFunc.SinusRateFunction(t, trange, rndgen)
    else:
        rate_func_obj = RateFunc.Sinus1yrRateFunction(t, trange, rndgen)
elif RFUNC == "CONST":
    rate_func_obj = RateFunc.ConstantRateFunction(rndgen)

bg_rate_inj = BGRateInj.RunlistBGRateInjector(rate_func_obj, runlist,
                                              filter_runs, rndgen)

# Fit the injector to make it usable
times = _exp["timeMJD"]
rate_func = bg_rate_inj.fit(T=times, x0=None, remove_zero_runs=True)

if RFUNC != "CONST":
    print("RateFunction uses cached fmax vals:\n  - ", rate_func_obj._fmax)

In [None]:
# Choose your bg injector
BGINJ = "DATA"  # "KDE", "UNI", "MR"
print("Choosing '{}' injector".format(BGINJ))

if BGINJ == "DATA":
    bg_inj = BGInj.DataBGInjector(rndgen)
    bg_inj.fit(_exp)
    
elif BGINJ == "KDE":
    # We use precahced KDE values, because the model is fixed
    # Note: The original order cannot be changed now [logE, dec, sigma]
    with open("data/awKDE_CV/CV10_glob_bw_alpha_EXP_IC86I_CUT_sig.ll.20_" +
              "PARS_diag_True_pass2.pickle", "rb") as f:
        model_selector = pickle.load(f)
        print(model_selector.best_params_)

    bg_inj = BGInj.KDEBGInjector(rndgen)
    bg_inj.kde_model = model_selector.best_estimator_

    # We could still change the alpha, but the global bandwidth must stay fixed
    # bg_inj.kde_model.alpha = 0.3

    # Fit doesn't take long because all adaptive kernels are set.
    bounds = np.array([[None, None], [-np.pi / 2. , np.pi / 2.], [0, None]])
#     bg_inj.fit(_exp, bounds)

elif BGINJ == "UNI":
    bg_inj = BGInj.UniformBGInjector(rndgen)
    
elif BGINJ == "MR":
    bg_inj = BGInj.MRichmanBGInjector(rndgen)
    bg_inj.fit(_exp)

In [None]:
# And now the analysis object
ana = Analysis.TransientsAnalysis(srcs=srcs, llh=grbllh)

#### Manual steps

Make a single trial fit in the end while looking at all the intermediate steps.

Need to run the setuo cells above.

**Here are some time background plots that should get it's own category but we leave them here for now.**

In [None]:
rndgen = np.random.RandomState(7353)

In [None]:
# Prepare fixed source parameters for injector arguments
src_t = srcs["t"]
src_dt = np.vstack((srcs["dt0"], srcs["dt1"])).T

# First step: Get the injection time windows per source in seconds centered
# around each source position
trange = ana.llh.time_pdf_def_range(src_t, src_dt)

print("dt in sec\n", dt)
print("")
print("trange in sec\n", trange)

# Compare to our expectation by calculcating manually trange:
#   trange = diff(dt) + 2 * nsig * clip_t, which were setup for the LLH
print("")
clip = np.clip(np.diff(dt).flatten(), time_pdf_args["sigma_t_min"],
               time_pdf_args["sigma_t_max"])
nsig = time_pdf_args["nsig"]
_trange = np.copy(dt)
_trange[:, 0] = dt[:, 0] - clip * nsig
_trange[:, 1] = dt[:, 1] + clip * nsig
print("manual\n", _trange)

In [None]:
# Get the number of expected background events in each given time frame from
# the bg_rate_injector which gets it from the integral over the rate_function
nb = bg_rate_inj.get_nb(src_t, trange)
args = {"nb": nb}

print("nb from integral\n", nb)
print("")
print("args wrapper\n", args)

# We compare that with a naive integration by just using trange * mean_rate
# directly from the fit params of the rate_function. These are approximately
# the same when our time windows are not too large and is not constant.
if RFUNC == "SIN":
    print("\n# Used a SinusRateFunction")
    mean_rate = bg_rate_inj.best_pars[3]
if RFUNC == "SIN1YR":
    print("\n# Used a Sinus1yrRateFunction")
    mean_rate = bg_rate_inj.best_pars[2]
if RFUNC == "CONST":
    print("\n# Used a ConstantRateFunction, values must be excatly the same")
    mean_rate = bg_rate_inj.best_pars[0]
    
intgrls = mean_rate * np.diff(trange)
print("")
print("Naive mean_rate * diff(trange)\n", intgrls.flatten())

In [None]:
# Now we sample times for a few trials, just to get some more events
ntrials = 100
times = []
for i in range(ntrials):
    times.append(bg_rate_inj.sample(src_t, trange, poisson=True))
    
# This is now a list of lists of arrays
flat_times_per_src = []
for i in range(nsrcs):
    flat_times_per_src.append(flatten_list_of_1darrays(
        [ti[i] for ti in times]))

flat_times = flatten_list_of_1darrays(flat_times_per_src)

# First all times in MJD in the global position. Won't see much, just to
# check if no stray times are generated
plt.vlines(flat_times, 0, 1, linestyles="--", colors="C7")
plt.vlines(src_t - trange[:, 0] / secinday, 0, 0.25,  # only 1/4 to see all
           linestyles="--", colors="C3")
plt.vlines(src_t - trange[:, 1] / secinday, 0.25, 0.5,  # only 1/4 to see all
           linestyles="--", colors="C2")
plt.xlim(mint, maxt)
plt.xlabel("Each time sample in MJD")
plt.ylabel("Just vlines, no real unit")
plt.title("Time windows too small to see evts, just to check for stray evts")
plt.tight_layout()

# plt.savefig("data/figs/background_events_time_sampled_large.png", dpi=200)

plt.show()

# Now each times in the centered time frame in seconds together with the
# signal PDFs
vline_scale = 20.
for i, ti in enumerate(flat_times_per_src):
    t_sec = ti * secinday - src_t[i] * secinday
    _t = np.linspace(trange[i, 0], trange[i, 1], 100)
    _t_mjd = _t / secinday + src_t[i]
    _pdf = ana.llh._soverb_time(_t_mjd, src_t[i], dt[i]).flatten()
    if i == 0:
        _max = 1.05 * np.amax(_pdf)
    # Small ticks for event positions below the 0 line
    plt.vlines(t_sec, -i * _max / vline_scale, -(i+1) * _max / vline_scale,
               linestyles="-", colors="C{:1d}".format(i))
    plt.vlines(trange[i], 0, -nsrcs * _max / vline_scale, linestyles=":",
               colors="C{:1d}".format(i))
    plt.plot(_t, _pdf, "C{:1d}-".format(i),
             label="{:d} evts".format(len(ti)))

plt.axhline(0, 0, 1, color="C7")
plt.xlim(1.1 * trange[-1, 0], trange[-1, 1] - 0.1 * trange[-1, 0])
plt.ylim(-nsrcs * _max / vline_scale, _max)
plt.xlabel("time in sec, centered at src t0")
plt.title("Individual samples in each window, " +
          "combined of {} trials.".format(ntrials))
plt.legend(loc="upper right")
plt.tight_layout()

# plt.savefig("data/figs/background_events_time_sampled_windows.png", dpi=200)

plt.show()

In [None]:
# Just sample a bit more to have enough events
ntrials = 10000
times = []
for i in range(ntrials):
    times.append(bg_rate_inj.sample(src_t, trange, poisson=True,
                                    random_state=rndgen))
flat_times_per_src = []
for i in range(nsrcs):
    flat_times_per_src.append(flatten_list_of_1darrays(
        [ti[i] for ti in times]))
flat_times = flatten_list_of_1darrays(flat_times_per_src)


# Next up, sample alle other features than time:
# ra, dec (sinDec), logE, sigma
nevts_per_source = np.array(list(map(len, flat_times_per_src)))
nevts = len(flat_times)

X = []
for i in range(nsrcs):
    X_ = bg_inj.sample(nevts_per_source[i], random_state=rndgen)
    X.append(numpy.lib.recfunctions.append_fields(
        X_, "timeMJD", flat_times_per_src[i],
        dtypes=np.float, usemask=False))

# Show sampled events per src for all trials
skymap = amp_plt.skymap()
fig, ax = skymap.figure(tex=False, gal_plane=True)
for i in range(nsrcs):
    x, y = skymap.EquCoordsToMapCoords(X[i]["ra"], X[i]["dec"])
    ax.plot(x, y, ls="", marker=".", ms=2, color="C{:1d}".format(i),
            alpha=0.5)
plt.show()

# Plot RA
for i in range(nsrcs):
    plt.hist(X[i]["ra"], bins=10, range=[0, 2 * np.pi], histtype="step",
             color="C{:1d}".format(i))
plt.xlabel("ra")
plt.ylabel("number of events")
plt.show()
    
# Plot dec
for i in range(nsrcs):
    plt.hist(X[i]["sinDec"], bins=20, range=[-1, +1], histtype="step",
             color="C{:1d}".format(i))
plt.xlabel("sin(dec)")
plt.ylabel("number of events")
plt.show()    

# Plot logE
for i in range(nsrcs):
    plt.hist(X[i]["logE"], bins=20, range=[2, 6], histtype="step",
             color="C{:1d}".format(i))
plt.xlabel("logE")
plt.ylabel("number of events")
plt.show()    

In [None]:
# Now the fit itself. One every cell execution we do a single trial
times = bg_rate_inj.sample(src_t, trange, poisson=True,
                           random_state=rndgen)
flat_times = flatten_list_of_1darrays(times)
nevts_per_src = list(map(len, times))
nevts = len(flat_times)

print("nevts total\n", nevts)
print("nevts per src\n", nevts_per_src)
print("times\n", flat_times)

X = bg_inj.sample(nevts, random_state=rndgen)
X = numpy.lib.recfunctions.append_fields(X, "timeMJD", flat_times,
                                         dtypes=np.float, usemask=False)

print("X['ra'] degrees\n", np.rad2deg(X["ra"]))
print("X['dec'] degrees\n", np.rad2deg(X["dec"]))
print("X['sinDec']\n", X["sinDec"])
print("X['logE']\n", X["logE"])
print("X['sigma'] degress\n", np.rad2deg(X["sigma"]))

args = {"nb": nb, "srcs": srcs}
ns0 = 1.
bounds = [[0., 2. * nevts]]
res = grbllh.fit_lnllh_ratio(X, ns0, args, bounds=bounds, minimizer_opts={})
res

#### Local Test

Everything we did above, wrapped in a single method.
Trials up to 1e3 - 1e4 run fast enough to quick check.
Everything else needs some more cluster time, see cell below.
Trials are zero a lot of times, so you really need a lot of trials to get a good enough sampled TS.

Note on the ns distribution:
As we sample the number of events per trial from a poisson distribution, we see the fitted ns peaked at integer numbers.
The main peak is of course at 0 because most of our events are not even close to the sources.
The second peak is the next likely number of events, 1 and so on.

In [None]:
# %%prun  # Profile the run. The are some unoptimized things due to
          # generality of the code

ns0 = 1  # The closer to small number of evts the faster
minopts = {"bounds": [[0, None],]}
ntrials = int(1e4)

t0 = time()
res, nzeros = ana.do_trials(ntrials, ns0,
                            bg_inj=bg_inj, bg_rate_inj=bg_rate_inj,
                            minimizer_opts=minopts)
t1 = time()
print("Elapsed time; {:.2f}s".format(t1 - t0))

In [None]:
name = "TS"
ax = plot_trials(res[name], ntrials=ntrials)
ax.set_xlabel(name)
plt.show()

### Signal trials

Setup signal injector.
Use the same srcs as in the analysis setup.

In [None]:
# Only set the src params we need here manually
gamma = 2.
mode = "band"  # Change here to switch circle vs. band mode below
if mode == "band":
    inj_width = np.deg2rad(5.)
elif mode == "circle":
    inj_width = np.deg2rad(10.)
sig_inj = SigInj.SignalInjector(gamma=gamma, mode=mode,
                                inj_width=inj_width)
sig_inj.fit(srcs, mc, exp.dtype.names)

ngen = 2
gen = sig_inj.sample(ngen, poisson=True)

In [None]:
# %%prun

ns0 = ngen + 1  
minopts = {"bounds": [[0, None],]}
ntrials = int(1e5)

t0 = time()
res, nzeros = ana.do_trials(ntrials, ns0,
                            bg_inj=bg_inj, bg_rate_inj=bg_rate_inj,
                            signal_inj=gen,
                            minimizer_opts=minopts)
t1 = time()
print("Elapsed time; {:.2f}s".format(t1 - t0))

In [None]:
# Test if numpy percentile with zeros filled is equal to custom function
equal = (_percentile_nzeros(res["TS"], nzeros, 50) ==
         np.percentile(np.concatenate((np.zeros(nzeros), res["TS"])), 50,
                       interpolation="lower"))
print("Both percentiles are equal: {}".format(equal))

# Plot TS and ns distribution
fig, (axl, axr) = plt.subplots(1, 2, figsize=(14, 6))

name = "TS"
axl = plot_trials(res[name], ntrials=ntrials, xmax=max(res[name]), ax=axl,
                  CHI2=False, SIGMA=False)
axl.axvline(_percentile_nzeros(res[name], nzeros, 10), 0, 1, color="C1",
           label="10%")
axl.axvline(_percentile_nzeros(res[name], nzeros, 50), 0, 1, color="C1",
           ls="--", label="50%")
axl.set_title(axl.get_title() + " n_inj = {}".format(ngen))
axl.set_xlabel(name)
axl.legend(loc="upper right")

name = "ns"
axr = plot_trials(res[name], ntrials=ntrials, xmax=max(res[name]), ax=axr,
                  CHI2=False, SIGMA=False)
axr.axvline(_percentile_nzeros(res[name], nzeros, 50), 0, 1, color="C1",
           ls="--")
axr.set_title("ntrials = {}, n_inj = {}".format(ntrials, ngen))
axr.set_xlabel(name)

fig.tight_layout()
plt.savefig("data/figs/SIGT_ntrials={}_ninj={}.png".format(ntrials, ngen),
            dpi=150)
plt.show()

### Do BG + Signal in same plot

First some BG trials

In [None]:
ns0 = 1.
minopts = {"bounds": [[0, None],]}
n_bg_trials = int(1e5)

bg_res, bg_nzeros = ana.do_trials(n_bg_trials, ns0,
                                  bg_inj=bg_inj, bg_rate_inj=bg_rate_inj,
                                  minimizer_opts=minopts)

In [None]:
_ = plot_trials(bg_res["TS"], n_bg_trials, np.linspace(0, 20, 50),
                CHI2=True, SIGMA=True)

Now signal trials for multiple number of injected events

In [None]:
# Only set the src params we need here manually
gamma = 2.
mode = "band"
inj_width = np.deg2rad(5.)
sig_inj = SigInj.SignalInjector(gamma=gamma, mode=mode, inj_width=inj_width)
sig_inj.fit(srcs, mc, exp.dtype.names)


minopts = {"bounds": [[0, None],]}
n_sig_trials = int(1e4)

sig_res = []
sig_nzeros = []

n_inj = [0.01, 0.1, 1]
for n_inj_i in n_inj:
    print("{} Trials with {} injected events.".format(n_sig_trials, n_inj_i))
    gen = sig_inj.sample(n_inj_i, poisson=True)
    res_i, nzeros_i = ana.do_trials(n_sig_trials, n_inj_i + 1.,
                                    bg_inj=bg_inj, bg_rate_inj=bg_rate_inj,
                                    signal_inj=gen,
                                    minimizer_opts=minopts)
    sig_res.append(res_i)
    sig_nzeros.append(nzeros_i)
    
print("Done")

In [None]:
print("BG only      -> eta = {:.3f}%".format(bg_nzeros / n_bg_trials * 100))

for n_inj_i, nzerosi in zip(n_inj, sig_nzeros):
    print("n_inj = {:.2f} -> eta = {:.3f}%".format(n_inj_i,
                                           nzerosi / n_sig_trials * 100))

In [None]:
fig, ax = plt.subplots(1, 1)
colors=["C0", "C1", "C2"]
bins = np.linspace(0, 70, 70)

for resi, nzerosi, ci in zip(sig_res, sig_nzeros, colors):
    _ = plot_trials(resi["TS"], n_sig_trials, bins, ax=ax,
                    CHI2=False, SIGMA=False, color=ci,
                    histtype="step")
    ax.axvline(_percentile_nzeros(resi["TS"], nzerosi, 50), 0, 1,
               color=ci, ls="--")
    
_ = plot_trials(bg_res["TS"], n_bg_trials, bins, ax=ax,
                CHI2=False, SIGMA=True, color="C7",
                histtype="step")

ax.set_title("")
ax.legend_.remove()

fig.tight_layout()
plt.savefig("data/figs/SIGT_BGT_ninj=0.01_0.1_1.png", dpi=150)
plt.show()

## Misc

#### NOTE on performance

1. soverb time is slow due to heyva broadcasting
2. replace signal pdf cos_dist with C++ snippet

Test again

In [None]:
%timeit grbllh.fit_lnllh_ratio(X, ns0, args, bounds=bounds, minimizer_opts={})

In [None]:
%prun grbllh.fit_lnllh_ratio(X, ns0, args, bounds=bounds, minimizer_opts={})

In [None]:
%%timeit rndgen = np.random.RandomState(32423423)
check_random_state(rndgen)

**Is if faster than function call with pass? -> Yes, use if**

In [None]:
%%timeit  a = "Not None"
if a is not None:
    pass

In [None]:
%%timeit def test(): pass
test()

**Is inline if faster than outline? -> No, use normal if**

In [None]:
%%timeit sobmax = 1.
if sobmax > 0.:
    pass

In [None]:
%%timeit sobmax = 1.
sobmax = 1 if sobmax == 0 else sobmax

**Is math faster than numpy for scalars? -> Yes, use math for scalars**

In [None]:
%%timeit a = 15.
math.exp(a)

In [None]:
%%timeit a = 15.
np.exp(a)

**Single scalar to list or to ndarray (for gradient)? -> Use single element list**

In [None]:
%%timeit a = 15.
out = [a]

In [None]:
%%timeit a = 15.
out = np.array(a)

In [None]:
%%timeit a = 15.
out = np.atleast_1d(a)

** numpy reshape vs bracket notation? -> Yes use [:, None] instead of function call or np.newaxis**

In [None]:
%%timeit a = np.random.uniform(size=10000)
b = a[:, None]

In [None]:
%%timeit a = np.random.uniform(size=10000)
b = a[:, np.newaxis]

In [None]:
%%timeit a = np.random.uniform(size=10000)
a = a.reshape(10000, 1)

**shape vs len in ndarrays -> Use len**

In [None]:
%%timeit a = np.random.uniform(size=10000)[:, None]
len(a)

In [None]:
%%timeit a = np.random.uniform(size=10000)[:, None]
a.shape[0]

** numpy clip vs two masks? -> Equal in both cases, use clip it's clearer to read**

In [None]:
%timeit a = np.random.uniform(size=10000)
a = np.clip(a, 0.1, 0.9)

In [None]:
%timeit a = np.random.uniform(size=10000)
a[a < 0.1] = 0.1
a[a > 0.9] = 0.9

**np.concatenate vs ndarry.tolist and then merge list -> Dont use with function that convert to array anyway, otherwise use list**

In [None]:
%%timeit a = np.arange(100)
out = np.concatenate((a[[0]], a, a[[-1]]))

In [None]:
%%timeit a = np.arange(100)
out = [a[0]] + a.tolist() + [a[-1]]

In [None]:
%%timeit a = np.arange(100)
out = np.array([a[0]] + a.tolist() + [a[-1]])

In [None]:
%%timeit a = np.arange(100) + 1
out = [a[0]] + a.tolist() + [a[-1]]
sci.InterpolatedUnivariateSpline(out, out)

In [None]:
%%timeit a = np.arange(100) + 1
out = np.concatenate((a[[0]], a, a[[-1]]))
sci.InterpolatedUnivariateSpline(out, out)

#### Cluster Trials - How To

Same one as above but with many trials run on a remote machine.
Here we generate the DAGMan scripts used to start the jobs.
DAGMan system operates on a code which gets called with different arguments specified in a argument list file.
For each job it just plugs in the arguments and runs the job.

To get the jobs running do:

1. Put all scripts in a folder on iccobalt
2. Adapt path to script in `onejob.submit` file
3. Generate DAGMan argument file with: `python job_options_generator.py`
4. Submit using: `condor_submit_dag -config dagman.config job_options.dag `

When jobs are finished you should receive a mail.

Check job status on cluster using `<command>`.

##### `DAGMan Configuration file`

Steers how many jobs run in parallel.

`dagman.config`

In [None]:
FileLink("./data/bgtrials_ic86I/dagman.config")

##### `Submit File for a single Job`

Shell script that makes a single programm call with parameters plugged in and is the one called for each job.
It plugs the arguments from `job_options.dag` into the real job script `onejob.py`.

`onejob.submit`

In [None]:
FileLink("./data/bgtrials_ic86I/onejob.submit")

##### `Script for a single Job`

This code gets executed with different args per job and is our main program.

`onejob.py`

In [None]:
FileLink("./data/bgtrials_ic86I/onejob.py")

##### `Job Argument Generator`

We need to generate arguments for each job and write them in `joboptions.dag`.
In this generator we specify how many trials and how many jobs we want run.
`job_ptions.dag` has the following structure for each job:

```bash
JOB <jobname> <path/to/onejob.submit>
VARS <jobname> -arg1="arg1" -argn "argn"
```

Here the generation script:

`job_options_generator.py`

In [None]:
FileLink("./data/bgtrials_ic86I/job_options_generator.py")

This is the generated argument list file:

`job_options.dag`

In [None]:
FileLink("./data/bgtrials_ic86I/job_options.dag")

## Prepare Data - On/Offtime Split

Here we train to split data in on- and off-time part, like it would be done later in the analysis.

To be save we scramble times for exp, just to be sure we don't get blindness issues here.
We do this after the PDFs from data have bee build, because otherwise, we'd get flat rate functions.
The injectors draw random times anyway so and we use made up src positions so it's not critical anyway...

In [None]:
nsrcs = 5

# Make up some srcs
dt = np.vstack((np.repeat([-20.], nsrcs), 100. * np.arange(1, nsrcs + 1))).T

# Choose times equally spaced, but away from borders
mint, maxt = np.amin(_exp["timeMJD"]), np.amax(_exp["timeMJD"])
src_t = np.linspace(mint, maxt, nsrcs + 2)[1:-1].reshape(nsrcs, 1)

In [None]:
# Seed set to we get some evts in the windows
rndgen = np.random.RandomState(seed=5)
mint, maxt = np.amin(_exp["timeMJD"]), np.amax(_exp["timeMJD"])
exp_rnd = np.copy(_exp)
exp_rnd["timeMJD"] = rndgen.uniform(mint, maxt, len(_exp))

In [None]:
# Get time windows in MJD and mask the data
dt_MJD = src_t + dt / secinday
evt_t = exp_rnd["timeMJD"]

# This assumes no time window overlap
ontime_per_src = np.logical_and(evt_t >= dt_MJD[:, [0]],
                                evt_t <= dt_MJD[:, [1]])
ontime_tot = np.any(ontime_per_src, axis=0)

nevts_per_src = np.sum(ontime_per_src, axis=1)
nevts = np.sum(ontime_tot)

print("Ontime events in each dt\n", nevts_per_src)
print("Total Ontime events\n", nevts)

In [None]:
# This always plots all event times but places dt as the plot lims.
# So we can double check the number of on time events.
fig, ax = plt.subplots(nsrcs, 1)
y = np.zeros_like(evt_t)
for i, axi in enumerate(ax):
    axi.plot(evt_t, y, color="C{}".format(i), ls="", marker="|",
             mew=2, ms=10, label="{} evts".format(nevts_per_src[i]))
    axi.set_xlim(dt_MJD[i])
    axi.set_ylim(-1, 1)
    axi.set_yticklabels([])
    axi.set_xticklabels([])

    # Steal space for legend. Stackoverflow: 4700614 :+1:
    box = axi.get_position()
    axi.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    axi.legend(loc='center left', bbox_to_anchor=(1, 0.5),
               title="window {}".format(i))

ax[-1].set_xlabel("Time MJD")

In [None]:
# Cut events out and see they're gone from the ontime windows
_exp_cut = exp_rnd[~ontime_tot]

# Now all events should be gone
fig, ax = plt.subplots(nsrcs, 1)
y = np.zeros_like(_exp_cut["timeMJD"])
for i, axi in enumerate(ax):
    axi.plot(_exp_cut["timeMJD"], y, color="C{}".format(i), ls="", marker="|",
             mew=2, ms=10, label="had {} evts".format(nevts_per_src[i]))
    axi.set_xlim(dt_MJD[i])
    axi.set_ylim(-1, 1)
    axi.set_yticklabels([])
    axi.set_xticklabels([])

    # Steal space for legend. Stackoverflow: 4700614 :+1:
    box = axi.get_position()
    axi.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    axi.legend(loc='center left', bbox_to_anchor=(1, 0.5),
               title="window {}".format(i))

ax[-1].set_xlabel("Time MJD")
fig.suptitle("All gone")