In [None]:
import os
import numpy as np
import scipy.interpolate as sci
import matplotlib.pyplot as plt
import json
from astropy.time import Time as astrotime

import tdepps.model_injection as Inj
from tdepps.model_toolkit import (create_run_dict, SignalFluenceInjector,
                                  make_rate_records, rebin_rate_rec,
                                  UniformTimeSampler, SinusFixedConstRateFunction)
from tdepps.utils import make_spl_edges

secinday = 24. * 60. * 60.
rndgen = np.random.RandomState(42439462)
print("Loaded: ", astrotime.now())

tw_id = 20

## Setup

In [None]:
# ###### Load data for each sample
p = "/Users/tmenne/git/phd/hese_tdepps/data/proc"
exp = {}
names = ["79", "86I", "86II", "86III"]
for key in names[:1]:
    exp[key] = np.load(os.path.join(p, "offdata/{}.npy".format(key)))
    print("Loaded exp {}.".format(os.path.join(p,
                                               "offdata/{}.npy".format(key))))

exp_names = {key: exp_i.dtype.names for key, exp_i in exp.items()}

p = "/Users/tmenne/git/phd/hese_tdepps/data/raw"
mc = {}
mc["79"] = np.load(os.path.join(p, "IC79/IC79_corrected_MC.npy"))
print("Loaded mc from: {}".format(
    os.path.join(p, "IC79/IC79_corrected_MC.npy")))
# mc["86I"] = np.load(os.path.join(p, "IC86_2011/IC86_corrected_MC.npy"))
# print("Loaded mc from: {}".format(
#     os.path.join(p, "IC79/IC79_corrected_MC.npy")))
# mc["86II"] = np.load(os.path.join(p, "IC86_2012/IC86-2012_corrected_MC.npy"))
# mc["86III"] = mc["86II"]  # MC is the same here
# print("Loaded mc from: {} for 86II & 86III".format(
#     os.path.join(p, "IC86_2012/IC86-2012_corrected_MC.npy")))

# We need "timeMJD" as a key, so change the dtype names
for mci in mc.values():
    if "timeMJD" not in mci.dtype.names:
        idx = mci.dtype.names.index("time")
        mci.dtype.names = (mci.dtype.names[:idx] + ("timeMJD",) +
                           mci.dtype.names[idx + 1:])


# ###### Get prepared run time lists
p = "/Users/tmenne/git/phd/hese_tdepps/data/proc"
rundict = {}

rundict["79"] = json.load(open(os.path.join(p, "goodruns/IC79v24.json")))
# rundict["86I"] = json.load(open(os.path.join(p, "goodruns/IC86_2011.json")))
# rundict["86II"] = json.load(open(os.path.join(p, "goodruns/IC86_2012.json")))
# rundict["86III"] = json.load(open(os.path.join(p, "goodruns/IC86_2013.json")))

print("Loaded goodrun dicts from: " + p + "/goddruns")


# ###### Load HESE tracks locations
p = "/Users/tmenne/git/phd/hese_tdepps/data/raw"
path = os.path.join(p, "public_data_release/All_HESE_Events_4_years_tracks.txt")
src_t, src_dec, src_ra = np.loadtxt(path, usecols=[1, 2, 3], unpack=True)
src_ra = np.deg2rad(src_ra)
src_dec = np.deg2rad(src_dec)

names = ["t", "dt0", "dt1", "ra", "dec", "w_theo"]
types = len(names) * [np.float]
dtype = [(name, typ) for name, typ in zip(names, types)]

srcs = {}
for key, rl in rundict.items():
    runs = rl["runs"]
    tmin = np.amin([astrotime(r["good_tstart"]).mjd for r in runs])
    tmax = np.amax([astrotime(r["good_tstop"]).mjd for r in runs])
    t_mask = (src_t >= tmin) & (src_t <= tmax)

    src_ti = src_t[t_mask]
    nsrcs_i = len(src_ti)

    srcs_i = np.empty((nsrcs_i, ), dtype=dtype)

    srcs_i["t"] = src_ti
    # Leave empty for now, time window is set explicitely below
    srcs_i["dt0"] = np.zeros(nsrcs_i, dtype=np.float)
    srcs_i["dt1"] = np.zeros(nsrcs_i, dtype=np.float)
    srcs_i["ra"] = src_ra[t_mask]
    srcs_i["dec"] = src_dec[t_mask]
    srcs_i["w_theo"] = np.ones(nsrcs_i, dtype=np.float)
    srcs[key] = srcs_i

print("Made HESE srcs from: " + path)


# ###### Setup time window for all srcs
fname = "/Users/tmenne/git/phd/hese_tdepps/data/proc/time_windows/time_windows.txt"
time_window = np.loadtxt(fname=fname)[tw_id]
print("Using time window {:2d}: {:.2f}s".format(tw_id, np.diff(time_window)[0]))
for key, srcs_i in srcs.items():
    srcs_i["dt0"] = np.repeat(time_window[0], repeats=len(srcs_i))
    srcs_i["dt1"] = np.repeat(time_window[1], repeats=len(srcs_i))
    print(srcs_i)

## Done

In [None]:
print("Done!")

## Start

In [None]:
exp_ = exp["79"]
MC_ = mc["79"]
srcs_ = srcs["79"]
rate_func = create_run_dict(rundict["79"]["runs"])

hor = 0.25
sindec_bins = np.unique(np.concatenate([
                        np.linspace(-1., -hor, 5 + 1),    # south
                        np.linspace(-hor, +hor, 10 + 1),  # horizon
                        np.linspace(+hor, 1., 5 + 1),     # north
                        ]))
rate_rebins = np.linspace(exp_["timeMJD"].min(), exp_["timeMJD"].max(), 12)

bg_inj_args = {"sindec_bins": sindec_bins, "rate_rebins": rate_rebins}
grb_inj = Inj.GRBModelInjector(bg_inj_args=bg_inj_args, rndgen=rndgen)

In [None]:
recs = make_rate_records(T=exp_["timeMJD"], run_dict=rate_func)
rates, new_bins, stddev, _ = rebin_rate_rec(rate_rec=recs, bins=rate_rebins,
                                     ignore_zero_runs=True)
mids = 0.5 * (new_bins[:-1] + new_bins[1:])

rf = SinusFixedConstRateFunction(p_fix=365.)
allsky_res = rf.fit(rate=rates, srcs=srcs_, t=mids, w=1. / stddev)

t0_fix = allsky_res.x[1]
print("Best fit t0 before first event: ",
      t0_fix - exp_["timeMJD"].min(), "days")

rf = SinusFixedConstRateFunction(p_fix=365., t0_fix=t0_fix)

In [None]:
def signal_flux_model(trueE, E0=1., gamma=2.):
    return (trueE / E0)**(-gamma)

ts = UniformTimeSampler(random_state=None)

sig_inj = SignalFluenceInjector(signal_flux_model, time_sampler=ts)
sig_inj.fit(srcs_, MC=MC_, exp_names=["timeMJD", "dec", "ra", "sigma", "logE"])

In [None]:
grb_inj.fit(X=exp_, srcs=srcs_, run_dict=rate_func, sig_inj=sig_inj)

In [None]:
n = "amp"
x = np.linspace(-1, 1, 100)

bins = grb_inj._sin_dec_bins
mids = 0.5 * (bins[:-1] + bins[1:])
vals = grb_inj._best_pars[n]
err = np.copy(grb_inj._best_stddevs[n])

plt.plot(mids, grb_inj._best_pars[n], color="C7", ls="--")
plt.errorbar(mids, vals, yerr=err, fmt="o", color="C1")
plt.plot(x, grb_inj._param_splines[n](x), color="k")

plt.show()

In [None]:
x = np.linspace(-1, 1, 200)
bins = 40
plt.hist(np.sin(exp_["dec"]), bins=bins, density=True)
plt.plot(x, grb_inj._data_spl(x))
plt.show()

In [None]:
sam = np.concatenate([grb_inj.get_sample() for i in range(100)])

In [None]:
x = np.linspace(-1, 1, 200)
bins = 40
# bins = np.linspace(-1, 1, 17 + 1)
for j, ev in enumerate(grb_inj._bg_inj):
    plt.hist(np.sin(exp_["dec"]), bins=bins, density=True)
    plt.plot(x, grb_inj._data_spl(x))

    plt.hist(np.sin(ev["dec"]), bins=bins, density=True, histtype="step", lw=2.5)
    plt.plot(x, grb_inj._sin_dec_splines[j](x))

    plt.show()

### Test LLH scan instead of hess_inv from fitres

In [None]:
def plot_llh_scan(bfs, stds, llh, grid):
    """
    Plot the llh scan with errors and contours
    
    Parameters
    ----------
    bfs : array-like, shape (2)
        Best fit result parameters, around which the LLH was scanned.
    stds : array-like, shape (2)
        Approximate standard deviations (symmetric) for each fit parameter,
        obtained using Wilks' theorem on the scanned space.
    llh : array-like, shape (nbins, nbins)
        Scanned LLH values.
    grid : list
        X, Y grid, same shape as ``llh``.
    """
    bf_a, bf_d = bfs
    std_a, std_d = stds
    a, d = grid
    
    # Plot scan
    fig, ax = plt.subplots(1, 1)
    img = ax.pcolormesh(a, d, llh)
    fig.colorbar(img, ax=ax)

    # Plot 1, 2, 3 sigma contours
    vals = np.amin(llh) - scs.chi2.logsf(df=2, x=[1**2, 2**2, 3**2])
    ax.contour(a, d, llh, vals, linestyles=["--", "-.", "--"], colors="w")
    
    # Plot best fit with symmetric errors
    ax.errorbar(bf_a, bf_d, xerr=std_a, yerr=std_d, fmt="o", c="w", capsize=5)   
    
    ax.xlabel = ("amplitude")
    ax.ylabel = ("baseline")
    plt.show()

def get_stddev_from_scan(mids, rates, weights, bfs, rngs, rate_func, nbins=100):
    """
    Scan the rate_func chi2 fit LLH to get stddevs for the best fit params a, d.
    Using matplotlib contours and averaging to approximately get the variances.
    Note: This is not a true LLH profile scan in both variables.
    
    Parameters
    ----------
    mids : array-like
        Time points (x) used in the original fit.
    rates : array-like, shape (len(mids))
        Rate values (y) used in the original fit.
    weights : array-like, shape (len(mids))
        Weights used in the original chi2 fit.
    bfs : array-like, shape (2)
        Best fit result parameters.
    rngs : list
        Parameter ranges ``[rng_x, rng_y]`` to scan.
    rate_func : RateFunction instance
        Rate function used to do the original fit.
    nbins : int, optional
        Number of bin in each dimension to sca. (Default: 100)
        
    Returns
    -------
    stds : array-like, shape (2)
        Approximate standard deviations (symmetric) for each fit parameter,
        obtained using Wilks' theorem on the scanned space.
    llh : array-like, shape (nbins, nbins)
        Scanned LLH values.
    grid : list
        X, Y grid, same shape as ``llh``.
    """
    def _scan_llh(bf_a, rng_a, bf_d, rng_d):
        """ Scan LLH and return contour vertices """
        a_bins = np.linspace(bf_a - rng_a, bf_a + rng_a, nbins)
        d_bins = np.linspace(bf_d - rng_d, bf_d + rng_d, nbins)   
        a, d = np.meshgrid(a_bins, d_bins)
        AA, DD = map(np.ravel, [a, d])
        llh = np.empty_like(AA)
        for i, (ai, di) in enumerate(zip(AA, DD)):
            llh[i] = rf._lstsq((ai, di), mids, rates, weights)
        llh = llh.reshape(a.shape)
        # Get the contour points and average over min, max per parameter
        one_sigma = np.amin(llh) - scs.chi2.logsf(df=2, x=[1**2])
        cont = contour.Cntr(a, d, llh, one_sigma)
#         plt.clf()
        # https://stackoverflow.com/questions/5666056
        # Collection list contains one LineCollection per contour value
        return cont.collections[0].get_paths(), llh, [a, d]
    
    def _get_stds_from_path(path):
        """ Create symmetric stddevs from the path vertices """
        x = path.vertices[:, 0]
        y = path.vertices[:, 1]
        # Average asymmetricities in both direction
        a_min, a_max = np.amin(x), np.amax(x)
        d_min, d_max = np.amin(y), np.amax(y)
        return 0.5 * (a_max - a_min), 0.5 * (d_max - d_min)
        
    bf_a, bf_d = bfs
    rng_a, rng_d = rngs
    
    # Scan the LLH, adapt scan range if contour is not closed
    closed = False
    while not closed:
        # Get contour from scanned LLH space
        paths, llh, grid = _scan_llh(bf_a, rng_a, bf_d, rng_d)
        
        # We want the contour to be fully contained. Means there is only one
        # path and the first and last point are close.
        if len(paths) == 1:
            path = paths[0]
            # If no contour is made, path has only a single vertex
            if len(path.vertices) > 1:
                max_bin_dist = np.amax([rng_a / float(nbins),
                                        rng_d / float(nbins)])
                closed = np.allclose(path.vertices[0], path.vertices[-1],
                                     atol=max_bin_dist)
        if not closed:
            # Otherwise make the scan range twice as large and retry
            # TODO: Is there a mechanism to decide if only y OR y needs scaling?
            rng_a *= 2
            rng_d *= 2
            
    for i in range(2):
        std_a, std_d = _get_stds_from_path(path)
        rng_a = std_a * 1.1
        rng_d = std_d * 1.1
        paths, llh, grid = _scan_llh(bf_a, rng_a, bf_d, rng_d)
        path = paths[0]

    stds = np.array(_get_stds_from_path(path))
    return stds, llh, grid

In [None]:
import matplotlib._cntr as contour

In [None]:
sindec = IC79["sinDec"]
t_ = np.linspace(IC79["timeMJD"].min(), IC79["timeMJD"].max(), 200)

allres = []
errs = []
for j, (lo, hi) in enumerate(zip(sindec_bins[:-1], sindec_bins[1:])[:]):
    mask = (sindec >= lo) & (sindec <= hi)

    recs = make_rate_records(T=IC79["timeMJD"][mask], run_dict=IC79rd)
    rates, new_bins, stddev, _ = rebin_rate_rec(rate_rec=recs, bins=rate_rebins,
                                                ignore_zero_runs=True)
    new_mids = 0.5 * (new_bins[:-1] + new_bins[1:])
    weights = 1. / stddev
    res = rf.fit(rate=rates, srcs=IC79srcs, t=new_mids, w=weights)
    allres.append(res)
    
    plt.errorbar(recs["start_mjd"], recs["rate"], yerr=recs["rate_std"],
                 fmt=",", alpha=0.2, color="C0")
    plt.plot(recs["start_mjd"], recs["rate"], marker=".", ls="", color="C0")
    plt.plot(new_bins, np.r_[rates[0], rates], drawstyle="steps-pre", color="k")
    plt.plot(t_, rf.fun(t=t_, pars=res.x), color="C3")

    plt.show()
    
    bfs = np.array([res.x[0], res.x[1]])
    # Empirical estimates for amplitude and baseline scan range
    rngs = np.array([bfs[0], bfs[1] / 10.])
    stds, llh, grid = get_stddev_from_scan(
        new_mids, rates, weights, bfs=bfs, rngs=rngs, rate_func=rf)
    
    plot_llh_scan(bfs, stds, llh, grid)
    
    errs.append(stds)

errs = np.array(errs)

In [None]:
idx = 1
x = np.linspace(-1, 1, 100)
mids = 0.5 * (sindec_bins[:-1] + sindec_bins[1:])
norm = np.diff(sindec_bins)

vals = np.array([res.x[idx] for res in allres]) / norm
err_ = errs.T[idx] / norm

vals, pts, err_ = make_spl_edges(vals, sindec_bins, err_)
err_[[0, -1]] = np.amax(err_)
w = 1. / err_
spl = sci.UnivariateSpline(pts, vals, w=w, s=len(vals) - 2)

plt.errorbar(pts, vals, yerr=err_, fmt=".", color="C7")
plt.axhline(0, 0, 1, color="C7", ls="--")
plt.plot(x, spl(x))
    
plt.show()