In [None]:
import os
import sys
import json
import math
from astropy.time import Time as astrotime
from tqdm import tqdm_notebook as tqdm

import numpy as np
from numpy.lib.recfunctions import drop_fields

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
%matplotlib inline

import scipy.interpolate as sci
import scipy.optimize as sco
import scipy.integrate as scint
import scipy.stats as scs

import anapymods3.plots.astro as amp_plt
from anapymods3.plots import (split_axis, get_binmids, hist_marginalize,
                              hist_from_counts, dg)
from anapymods3.stats import rejection_sampling, json2kde, sigma2prob
from anapymods3.general import fill_dict_defaults, total_size
from anapymods3.healpy import wrap_theta_phi_range

import tdepps.bg_injector as BGInj
import tdepps.bg_rate_injector as BGRateInj
import tdepps.rate_function as RateFunc
import tdepps.llh as LLH
import tdepps.analysis as Analysis
import tdepps.signal_injector as SigInj
from tdepps.utils import rejection_sampling, func_min_in_interval, rotator

secinday = 24. * 60. * 60.
print("Executed on ", astrotime.now())

Test multi year analysis build on top of the single year one.

# Setup GRB samples

In [None]:
rndgen_86II = np.random.RandomState(735005)  # Google
rndgen_86III = np.random.RandomState(7353)   # Esel

### IC86III (2013)

In [None]:
data_86III = np.load("data/GRB_IC86III/data.npy")
sim_86III = np.load("data/GRB_IC86III/sim_IC86III_nugen_10634.npy")
grblist_86III = np.load("data/GRB_IC86III/grblist.npy")
with open("data/GRB_IC86III/runlist.json", "r") as infile:
    runlist_86III = json.load(infile)

# Number of files is different in both years, see weight check later
nfiles_86III = 956
nevents = 5000
ngen_86III = float(nfiles_86III * nevents)
    
# Cut on score
score_cut_86III = 0.5
data_86III = data_86III[data_86III["score"] > score_cut_86III]
sim_86III = sim_86III[sim_86III["score"] > score_cut_86III]
    
run_ids = runlist_86III.keys()
livetimes = []
for key in run_ids:
    livetimes.append(runlist_86III[key]["livetime"])
livetimes_86III = np.array(livetimes)
livetime_86III = np.sum(livetimes)
livetime_days_86III = livetime_86III / 24. / 3600.

In [None]:
def is_on_time(run_id):
    key = str(int(run_id))
    return True if runlist_86III[key]["ontime"] else False

on_mask = np.array(list(map(is_on_time, data_86III["run_id"])))
assert np.sum(on_mask) + np.sum(~on_mask) == len(data_86III)

on_data_86III = data_86III[on_mask]
off_data_86III = data_86III[~on_mask]

# Get livetime split
on_livetime_86III = np.sum([run["livetime"] if run["ontime"] else 0.
                            for run in runlist_86III.values()])
off_livetime_86III = np.sum([run["livetime"] if not run["ontime"] else 0.
                            for run in runlist_86III.values()])
assert livetime_86III == (on_livetime_86III + off_livetime_86III)

print("Off livetime   : {:6.2f} days".format(off_livetime_86III / 24. / 3600.))
print("On livetime    : {:6.2f} days".format(on_livetime_86III / 24. / 3600.))
print("Total livetime : {:6.2f} days".format(livetime_days_86III))

In [None]:
nsrcs = len(grblist_86III)
names = ["t", "dt0", "dt1", "ra", "dec", "w_theo", "sigma"]
types = len(names) * [np.float]
dtype = [(name, typ) for name, typ in zip(names, types)]

grb_srcs_IC86III = np.empty((nsrcs, ), dtype=dtype)

grb_srcs_IC86III["t"] = grblist_86III["timeMJD"]
grb_srcs_IC86III["dt0"] = np.zeros(nsrcs, dtype=np.float)
grb_srcs_IC86III["dt1"] = grblist_86III["t100"]
grb_srcs_IC86III["ra"] = grblist_86III["ra"]
grb_srcs_IC86III["dec"] = grblist_86III["dec"]
grb_srcs_IC86III["w_theo"] = np.ones(nsrcs, dtype=np.float)
grb_srcs_IC86III["sigma"] = grblist_86III["sigma"]

In [None]:
runs = []
for key, item in runlist_86III.items():
    d = item.copy()
    tstart = astrotime(item["tStart"], format="mjd").iso
    tstop = astrotime(item["tStop"], format="mjd").iso
    d["run"] = int(key)
    d["good_tstart"] = tstart
    d["good_tstop"] = tstop
    runs.append(d)

goodruns_86III = {"runs": runs}
print("Number of all runs : ", len(runs))
off_len =  len(filter(lambda runi: not runi["ontime"], runs))
on_len =  len(filter(lambda runi: runi["ontime"], runs))
print("Number of off runs : ",off_len)
print("Number of on runs  : ", on_len)

assert off_len + on_len == len(runs)

In [None]:
# Create the GRBLLH object with all the PDF settings
# Note: Range derived from sinDec hist in NRT wiki
sin_dec_bins = np.linspace(-0.1, 1, 17 + 1)  

# Range is derived from hist, dropping some wrongly reconstructed events
min_logE = -1.
max_logE = 10.
logE_bins = np.linspace(min_logE, max_logE, 40)

# Remove wrongly reconstructed events outside range from off data & sim
logE_mask = (sim_86III["logE"] > min_logE) & (sim_86III["logE"] < max_logE)
sim_pdf_full_86III = sim_86III[logE_mask]
print("Removed {} events in logE for simulation.".format(np.sum(~logE_mask)))
logE_mask = ((off_data_86III["logE"] > min_logE) &
             (off_data_86III["logE"] < max_logE))
data_pdf_full_86III = off_data_86III[logE_mask]
print("Removed {} events in logE for data.".format(np.sum(~logE_mask)))

# Later 3 events show up, that have times outside the run times. These are
# removed too - still strange.
time_idx = [39023, 52880, 63524]
data_pdf_full = np.delete(data_pdf_full_86III, time_idx)

# Check weights for E2 signal against built-in weights
gamma = 2.
type_w = 0.5         # Nugen defaults: 50:50 nu, anti-nu
norm_per_type = 0.5  # Astro just splits 50:50
# Norm to OW per type and "sim-livetime"
sim_pdf_full_86III["ow"] /= ngen_86III * type_w
w = (norm_per_type * sim_pdf_full_86III["trueE"]**(-gamma) *
     sim_pdf_full_86III["ow"])

print("Included and self made weights ar the same: ",
      np.allclose(sim_pdf_full_86III["weight_E2"], w))

# Now strip off uneeded fields so data and sim are compatible for tdepps
data_pdf_86III = drop_fields(data_pdf_full_86III,
                             ["run_id", "event_id", "score", "azi", "zen"])
sim_pdf_86III = drop_fields(sim_pdf_full_86III,
                            ["azi", "zen", "score", "weight_E2",
                             "weight_honda", "trueAzi", "trueZen"])

### IC86II (2012)

In [None]:
data_86II = np.load("data/GRB_IC86II/data.npy")
sim_86II = np.load("data/GRB_IC86II/sim_IC86II_nugen_10634.npy")
grblist_86II = np.load("data/GRB_IC86II/grblist.npy")
with open("data/GRB_IC86II/runlist.json", "r") as infile:
    runlist_86II = json.load(infile)

# Number of files is different in both years, see weight check later
nfiles_86II = nfiles_86III + 5
nevents = 5000
ngen_86II = float(nfiles_86II * nevents)
    
# Cut on score
score_cut_86II = 0.4
data_86II = data_86II[data_86II["score"] > score_cut_86II]
sim_86II = sim_86II[sim_86II["score"] > score_cut_86II]
    
run_ids = runlist_86II.keys()
livetimes = []
for key in run_ids:
    livetimes.append(runlist_86II[key]["livetime"])
livetimes_86II = np.array(livetimes)
livetime_86II = np.sum(livetimes)
livetime_days_86II = livetime_86II / 24. / 3600.

In [None]:
def is_on_time(run_id):
    key = str(int(run_id))
    return True if runlist_86II[key]["ontime"] else False

on_mask = np.array(list(map(is_on_time, data_86II["run_id"])))
assert np.sum(on_mask) + np.sum(~on_mask) == len(data_86II)

on_data_86II = data_86II[on_mask]
off_data_86II = data_86II[~on_mask]

# Get livetime split
on_livetime_86II = np.sum([run["livetime"] if run["ontime"] else 0.
                            for run in runlist_86II.values()])
off_livetime_86II = np.sum([run["livetime"] if not run["ontime"] else 0.
                            for run in runlist_86II.values()])
assert livetime_86II == (on_livetime_86II + off_livetime_86II)

print("Off livetime   : {:6.2f} days".format(off_livetime_86II / 24. / 3600.))
print("On livetime    : {:6.2f} days".format(on_livetime_86II / 24. / 3600.))
print("Total livetime : {:6.2f} days".format(livetime_days_86II))

In [None]:
nsrcs = len(grblist_86II)
names = ["t", "dt0", "dt1", "ra", "dec", "w_theo", "sigma"]
types = len(names) * [np.float]
dtype = [(name, typ) for name, typ in zip(names, types)]

grb_srcs_IC86II = np.empty((nsrcs, ), dtype=dtype)

grb_srcs_IC86II["t"] = grblist_86II["timeMJD"]
grb_srcs_IC86II["dt0"] = np.zeros(nsrcs, dtype=np.float)
grb_srcs_IC86II["dt1"] = grblist_86II["t100"]
grb_srcs_IC86II["ra"] = grblist_86II["ra"]
grb_srcs_IC86II["dec"] = grblist_86II["dec"]
grb_srcs_IC86II["w_theo"] = np.ones(nsrcs, dtype=np.float)
grb_srcs_IC86II["sigma"] = grblist_86II["sigma"]

In [None]:
runs = []
for key, item in runlist_86II.items():
    d = item.copy()
    tstart = astrotime(item["tStart"], format="mjd").iso
    tstop = astrotime(item["tStop"], format="mjd").iso
    d["run"] = int(key)
    d["good_tstart"] = tstart
    d["good_tstop"] = tstop
    runs.append(d)

goodruns_86II = {"runs": runs}
print("Number of all runs : ", len(runs))
off_len =  len(filter(lambda runi: not runi["ontime"], runs))
on_len =  len(filter(lambda runi: runi["ontime"], runs))
print("Number of off runs : ",off_len)
print("Number of on runs  : ", on_len)

assert off_len + on_len == len(runs)

In [None]:
# Create the GRBLLH object with all the PDF settings
# Note: Range derived from sinDec hist in NRT wiki
sin_dec_bins = np.linspace(-0.1, 1, 17 + 1)  

# Range is derived from hist, dropping some wrongly reconstructed events
min_logE = -1.
max_logE = 10.
logE_bins = np.linspace(min_logE, max_logE, 40)

# Remove wrongly reconstructed events outside range from off data & sim
logE_mask = (sim_86II["logE"] > min_logE) & (sim_86II["logE"] < max_logE)
sim_pdf_full_86II = sim_86II[logE_mask]
print("Removed {} events in logE for simulation.".format(np.sum(~logE_mask)))
logE_mask = ((off_data_86II["logE"] > min_logE) &
             (off_data_86II["logE"] < max_logE))
data_pdf_full_86II = off_data_86II[logE_mask]
print("Removed {} events in logE for data.".format(np.sum(~logE_mask)))

# Later 3 events show up, that have times outside the run times. These are
# removed too - still strange.
# time_idx = [39023, 52880, 63524]
# data_pdf_full = np.delete(data_pdf_full_86II, time_idx)

# Check weights for E2 signal against built-in weights
gamma = 2.
type_w = 0.5         # Nugen defaults: 50:50 nu, anti-nu
norm_per_type = 0.5  # Astro just splits 50:50
sim_pdf_full_86II["ow"] /= ngen_86II * type_w  # Norm to OW per type and "sim-livetime"
w = (norm_per_type * sim_pdf_full_86II["trueE"]**(-gamma) *
     sim_pdf_full_86II["ow"])

print("Included and self made weights ar the same: ",
      np.allclose(sim_pdf_full_86II["weight_E2"], w))

# Now strip off uneeded fields so data and sim are compatible for tdepps
data_pdf_86II = drop_fields(data_pdf_full_86II,
                             ["run_id", "event_id", "score", "azi", "zen"])
sim_pdf_86II = drop_fields(sim_pdf_full_86II,
                            ["azi", "zen", "score", "weight_E2",
                             "weight_honda", "trueAzi", "trueZen"])

### Done:

In [None]:
print("Setup done")

# tdepps setup

In [None]:
spatial_pdf_args = {"bins": sin_dec_bins, "k": 3, "kent": True}

energy_pdf_args_86III = {"bins": [sin_dec_bins, logE_bins], "gamma": 2.,
                         "fillval": "col", "interpol_log": False,
                         "mc_bg_weights": sim_pdf_full_86III["weight_honda"],
                         "smooth_sigma": [[0., 0.], [0., 0.]],
                         "logE_asc": True}
energy_pdf_args_86II = {"bins": [sin_dec_bins, logE_bins], "gamma": 2.,
                        "fillval": "col", "interpol_log": False,
                        "mc_bg_weights": sim_pdf_full_86II["weight_honda"],
                        "smooth_sigma": [[0., 0.], [0., 0.]],
                        "logE_asc": True}

time_pdf_args = {"nsig": 4., "sigma_t_min": 2., "sigma_t_max": 30.}

llh_args = {"sob_rel_eps": 0., "sob_abs_eps": 1e-4}

grbllh_ic86III = LLH.GRBLLH(X=data_pdf_86III, MC=sim_pdf_86III,
                            spatial_pdf_args=spatial_pdf_args,
                            energy_pdf_args=energy_pdf_args_86III,
                            time_pdf_args=time_pdf_args,
                            llh_args=llh_args)

grbllh_ic86II = LLH.GRBLLH(X=data_pdf_86II, MC=sim_pdf_86II,
                            spatial_pdf_args=spatial_pdf_args,
                            energy_pdf_args=energy_pdf_args_86II,
                            time_pdf_args=time_pdf_args,
                            llh_args=llh_args)

### Test plots

In [None]:
sindec = np.linspace(-0.1, 1., 100)

yIII = grbllh_ic86III._pdf_spatial_background(ev_sin_dec=sindec)
yII = grbllh_ic86II._pdf_spatial_background(ev_sin_dec=sindec)

plt.plot(sindec, yIII, label="IC86III")
plt.plot(sindec, yII, label="IC86II")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(14, 6))

bins = [sin_dec_bins, logE_bins]
x = np.linspace(*bins[0][[0, -1]], num=100 + 1)
y = np.linspace(*bins[1][[0, -1]], num=100 + 1)

XX, YY = np.meshgrid(x, y)
xx, yy = map(np.ravel, [XX, YY])

values = {"III": {"llh": grbllh_ic86III, "cut": score_cut_86III},
          "II": {"llh": grbllh_ic86II, "cut": score_cut_86II}}

for axi, key in zip(ax, list(values.keys())):
    zz = values[key]["llh"]._soverb_energy(xx, yy)
    ZZ = zz.reshape(XX.shape)
    img = axi.pcolormesh(XX, YY, np.log10(ZZ), cmap="coolwarm",
                         vmin=-6, vmax=6)
    
    cbar = plt.colorbar(ax=axi, mappable=img)
    cbar.set_label("log10(ratio)")

    axi.set_xlabel("sin(dec)")
    axi.set_ylabel("muex in log10(GeV)")
    axi.set_title("Energy PDF for IC86{}. ".format(key) +
                 "score_cut: > {:.2f}".format(values[key]["cut"]))

fig.tight_layout()
# plt.savefig("./fig_mc_energy_pdf/score_cut_" +
#             "{:.2f}_mc_energy_pdf.png".format(score_cut), dpi=150)
plt.show()

## Test LLH evaluation

In [None]:
# Create data to evaluate from both on-datasets
names = ["timeMJD", "ra", "sinDec", "logE", "sigma"]
types = len(names) * [np.float]
dtype = [(name, typ) for name, typ in zip(names, types)]
X_86III = np.empty((len(on_data_86III)), dtype=dtype)
X_86II = np.empty((len(on_data_86II)), dtype=dtype)

for name in names:
    X_86III[name] = on_data_86III[name]
    X_86II[name] = on_data_86II[name]

In [None]:
# Approximate BG expectation per GRB: 5mHz * dt, equal for both
rate_86III = len(off_data_86III) / off_livetime_86III
rate_86II = len(off_data_86II) / off_livetime_86II

nb_86III = rate_86III * (grb_srcs_IC86III["dt1"] - grb_srcs_IC86III["dt0"])
nb_86II = rate_86II * (grb_srcs_IC86II["dt1"] - grb_srcs_IC86II["dt0"])

2 sources

In [None]:
multillh2 = LLH.MultiSampleGRBLLH()
multillh2.add_sample("86III", grbllh_ic86III)
multillh2.add_sample("86II", grbllh_ic86II)
X_dict = {"86III": X_86III, "86II": X_86II}
args_dict = {"86III": {"nb": nb_86III, "srcs": grb_srcs_IC86III},
             "86II": {"nb": nb_86II, "srcs": grb_srcs_IC86II}}

In [None]:
%%time
ns = np.linspace(0, 10, 100)
llh2, grad2 = np.array([multillh2.lnllh_ratio(X_dict, nsi, args_dict)
                        for nsi in ns]).T

In [None]:
plt.plot(ns, llh2)
plt.show()
plt.plot(ns, grad2)
plt.plot(0.5 * (ns[:-1] + ns[1:]), np.diff(llh2) / np.diff(ns), ls="--")
plt.show()

1 source

In [None]:
multillh1 = LLH.MultiSampleGRBLLH()
multillh1.add_sample("86III", grbllh_ic86III)
X_dict = {"86III": X_86III}
args_dict = {"86III": {"nb": nb_86III, "srcs": grb_srcs_IC86III}}

In [None]:
ns = np.linspace(0, 10, 100)
llh1, grad1 = [], []
for nsi in tqdm(ns):
    llhi, gradi = multillh1.lnllh_ratio(X_dict, nsi, args_dict)
    llh1.append(llhi)
    grad1.append(gradi)

llh1, grad1 = np.vstack((llh1, np.ravel(grad1)))

In [None]:
plt.plot(ns, llh1)
plt.show()
plt.plot(ns, grad1)
plt.plot(0.5 * (ns[:-1] + ns[1:]), np.diff(llh1) / np.diff(ns), ls="--")
plt.show()

Single sample LLH must be the same as multi LLH for a single added sample

In [None]:
args = {"nb": nb_86III, "srcs": grb_srcs_IC86III}

In [None]:
ns = np.linspace(0, 10, 100)
llh0, grad0 = [], []
for nsi in tqdm(ns):
    llhi, gradi = grbllh_ic86III.lnllh_ratio(X_86III, nsi, args)
    llh0.append(llhi)
    grad0.append(gradi)

llh0, grad0 = np.vstack((llh0, np.ravel(grad0)))

In [None]:
plt.plot(ns, llh0)
plt.plot(ns, llh1, ls="--")
plt.show()
plt.plot(ns, grad0)
plt.plot(0.5 * (ns[:-1] + ns[1:]), np.diff(llh0) / np.diff(ns), ls="--")
plt.plot(ns, grad1, ls=":", lw=1)
plt.show()

## Signal Injector tests

In [None]:
siginj = SigInj.SignalInjector(gamma=2.)
mcs = {"86III": sim_pdf_86III, "86II": sim_pdf_86II}
srcs = {"86III": grb_srcs_IC86III, "86II": grb_srcs_IC86II}
siginj.fit(srcs=srcs, MC=mcs, exp_names=data_pdf_86III.dtype.names)
print("1 event flux: {:.6f} GeV^-1cm^-2".format(siginj.mu2flux(1)))

### Tests

Expect same result when plugging in the same sample and the same sources two times.
We have half the weights, but each event twice which cancels in effects.

In [None]:
siginj_2 = SigInj.SignalInjector(gamma=2.)
mcs = {"86III": sim_pdf_86III, "86II": sim_pdf_86III}
srcs = {"86III": grb_srcs_IC86III, "86II": grb_srcs_IC86III}
siginj_2.fit(srcs=srcs, MC=mcs, exp_names=data_pdf_86III.dtype.names)
print("1 event flux: {:.6f} GeV^-1cm^-2".format(siginj_2.mu2flux(1)))

In [None]:
siginj_1 = SigInj.SignalInjector(gamma=2.)
mcs = {"86III": sim_pdf_86III}
srcs = {"86III": grb_srcs_IC86III}
siginj_1.fit(srcs=srcs, MC=mcs, exp_names=data_pdf_86III.dtype.names)
print("1 event flux: {:.6f} GeV^-1cm^-2".format(siginj_1.mu2flux(1)))

In [None]:
print("Both fluxes equal: ", np.isclose(siginj_1.mu2flux(1),
                                        siginj_2.mu2flux(1)))