Here are some more side tests to clarify / justify details, that would clutter the main test notebook.

In [None]:
import helper as hlp

import numpy as np

import matplotlib.pyplot as plt
import matplotlib.dates as mpldates
import matplotlib.gridspec as gridspec
from matplotlib.colors import LogNorm
%matplotlib inline

import scipy.interpolate as sci
import scipy.optimize as sco
import scipy.integrate as scint
import scipy.stats as scs

from tqdm import tqdm_notebook as tqdm
import json
import datetime
import pickle
from astropy.time import Time as astrotime
from corner import corner

import sklearn.neighbors as skn
import sklearn.model_selection as skms  # Newer version of grid_search

from corner_hist import corner_hist
from anapymods3.plots.general import split_axis, get_binmids, hist_marginalize

# Load data

Load IC86 data from epinat, which should be the usual IC86-I (2011) PS sample, but pull corrected and OneWeights corrected by number of events generated.

In [None]:
exp, mc, livetime = hlp.load_data()

# Data livetime comparison to v1.4

Let's compare to the v1.4 list, as used by jfeintzig.
Oddly we have 0.2 days less livetime as he had.
The number of runs is correct though

In [None]:
# New livetime from iclive
run_list = hlp.get_run_list()
run_dict = hlp.get_run_dict(run_list)
inc_run_arr, ic_livetime = hlp.get_good_runs(run_dict)

print("Total runs from iclive     : ", len(inc_run_arr))
print("IC86-I livetime from iclive: ", ic_livetime)

In [None]:
# For comparison, also parse the v1.4 list
# Should be: 1081 runs, with a total livetime of 332.61 days.
with open("data/Prelim_IC86-I_v1.4a.txt",'r') as f:
    data = []
    for line in f.readlines():
        data.append(line.replace('\n',''))
        
# Skip to beginning of run info
data = data[73:]

# Split at white space
data = [d.split() for d in data]

dtype = [("runID", np.int), ("duration", np.float), ("IT", "|S2"),
         ("CONF", "|S7"), ("FLAG", "|S6")]
runlist = np.empty((len(data),), dtype=dtype)

runlist["runID"] = np.array([int(d[0]) for d in data])
runlist["duration"] = np.array([float(d[3]) for d in data])
runlist["IT"] = np.array([d[5] for d in data])
runlist["CONF"] = np.array([d[6] for d in data])
runlist["FLAG"] = np.array([d[7] for d in data])

# Now filter: Include IT=it, CONF=full, FLAG=GOOD, exclude strange rate runs
exclude_rate = [120028, 120029, 120030, 120087, 120156, 120157]
itgood = runlist["IT"] == b"IT"  # Somehow only bitwise comparison is non-empty
confgood = runlist["CONF"] == b"full"
flaggood = runlist["FLAG"] == b"GOOD"
ratebad = np.in1d(runlist["runID"], exclude_rate)

include = itgood & confgood & flaggood & ~ratebad
runlist_inc = runlist[include]

# Get the livetime of the sample in days
hoursindays = 24.
secinday = hoursindays * 60. * 60.
old_livetime = np.sum(runlist_inc["duration"]) / hoursindays

print("Total runs from v1.4     : ", len(runlist_inc))
print("Total livetime from v1.4 : ", old_livetime)

Let's see, if the 120 extra runs in the new runlist make up for the difference of about 10 days in livetime.

In [None]:
iclive_in_old = np.in1d(inc_run_arr["runID"], runlist_inc["runID"])
not_in_old = inc_run_arr[~iclive_in_old]

start = not_in_old["start_mjd"]
stop = not_in_old["stop_mjd"]
missing_livetime = np.sum(stop - start)

print("\nOfficial IC86-I PS livetime: ", livetime)
print("Total livetime from v1.4   : ", old_livetime)
print("IC86-I livetime from iclive: ", ic_livetime)

print("\nMissing runs in old: ", len(not_in_old))
print("Livetime icliv - old :", ic_livetime - old_livetime)

print("\nDiff from summing missing runs           : ", missing_livetime)
print("New iclive livetime with same runs as old: ",
      ic_livetime - missing_livetime)

print("\nTotal rate [Hz] over total livetime: ",
      len(exp) / (livetime * secinday))

All runs from the new run list that zero events, make up for the missing runs in the old runlist, so this is consisting.

Dont't know though, where the missing 0,2 days come from. Probably some runtimes have shifted a little making some extra livetime in the new list.

In [None]:
# Store events in bins with run borders
exp_times = exp["timeMJD"]
start_mjd = inc_run_arr["start_mjd"]
stop_mjd = inc_run_arr["stop_mjd"]

tot = 0
evts_in_run = {}
for start, stop , runid in zip(start_mjd, stop_mjd, inc_run_arr["runID"]):
    mask = (exp_times >= start) & (exp_times < stop)
    evts_in_run[runid] = exp[mask]
    tot += np.sum(mask)
    
# Crosscheck, if we got all events and counted nothing double
print("Do we have all events? ", tot == len(exp))
print("  Events selected : ", tot)
print("  Events in exp   : ", len(exp))

# Create binmids and histogram values in each bin
binmids = 0.5 * (start_mjd + stop_mjd)
h = np.zeros(len(binmids), dtype=np.float)

for i, evts in enumerate(evts_in_run.values()):
    h[i] = len(evts)
    
m = (h > 0)
print("Runs with 0 events :", np.sum(~m))
print("Runtime in those runs: ", np.sum(inc_run_arr["stop_mjd"][~m] -
                                        inc_run_arr["start_mjd"][~m]))

# Remove all zero event runs (artifacts from new run list) and calc the rate
stop_mjd, start_mjd = stop_mjd[m], start_mjd[m]
h = h[m] / ((stop_mjd - start_mjd) * secinday)
binmids = binmids[m]

# Time dependent rate function

**Note: I think it is unnecessary to use a time and declination dependent rate. The spatial part is injected from the data BG from KDE anyways. So we just need to have the rate to determine how much events we inject allsky.**

Rate ist time dependent because of seasonal variation.
We take this varariation into account by fitting a priodic function to the time resolved rate.

The data is built by calculating the rate in each run as seen before.
This rate is correctly normalized and smoothes local fluctuations.

### Peridoc function with a weighted least squares fit

See side_test for comparison to spline fits.
The function is a simple sinus scalable by 4 parameters to fit the shape of the rates:

$$
    f(x) = a\cdot \sin(b\cdot(x - c)) + d
$$

The least squares loss function is

$$
    R = \sum_i (w_i(y_i - f(x_i)))^2
$$

Weights are standard deviations from poisson histogram error.

$$
    w_i = \frac{1}{\sigma_i}
$$

Seed values are estimated from plot rate vs time.

- Period should be 365 days (MJD) because we have one year of data so we choose $b0 = 2\pi/365$.
- Amplitude is about $a_0=-0.0005$, because sinus seems to start with negative values.
- The x-offset is choose as the first start date, to get the right order of magnitude.
- The y-axis intersection $d$ schould be close to the weighted average, so we take this as a seed.

The bounds are motivated as follows (and if we don't hit them, it's OK to use them).

- Amplitude $a$ should be positive, this also resolves a degenracy between a-axis offset.
- The period $b$ should scatter around one year, a period larger than +-1 half a year is unphysical.
- The x-offset $c$ cannot be greater than the initial +- the period because we have a periodic function.
- The y-axis offset $d$ is arbitrarily constrained, but as seen from the plot it should not exceed 0.1. 

## Proposed was something like this

Rate ist time dependent because of seasonal variation and delination dependent because the detector acceptance is declination dependent.
A correletation should not exist or be very small.

So we express the rate in depence of time and decliantion as

$$
    R(t,\delta) = R_T(t)\cdot R_\delta(t)
$$

with independent parts in declination and time each.

The time parts is constructed by fitting a periodic function.
Then we seperatly fit a spline to the total $\sin(\delta)$ distribtuion and normalizes it over the declination range of the sample.
For a given time and declination the smooth function $R(t, \delta)$ gives the correct detector rate.

## Splinefit to sinDec

First we fit a spline to the sinDec distribtuion for all events.
This is equivalent to what is done in skylab to estimate the per signal background PDF from data.

In [None]:
# This is equivalent to skylab's baclground PDF construction
sinDec_bins = 25
sinDec_range= [-1, 1]
sinDec_hist, sinDec_bins = np.histogram(exp["sinDec"], density=True,
                                        bins=sinDec_bins, range=sinDec_range)

m = get_binmids([sinDec_bins])[0]

if np.any(sinDec_hist <= 0.):
    raise ValueError(("Declination hist bins empty, this must not happen. "
                      +"Empty bin idx: {}".format(
                          np.arange(len(m))[sinDec_hist <= 0.])))

# Fit to logarithm, to avoid ringing. Raise err if evaluated outside range
sinDec_spline = sci.InterpolatedUnivariateSpline(m, np.log(sinDec_hist),
                                                 k=3, ext="extrapolate")

# Normalize to area on whole sky = 1, so norm = 2pi * integral(exp(spl))
def sinDec_pdf_(x):
    return np.exp(sinDec_spline(x))

norm = scint.quad(sinDec_pdf_, -1, 1)[0] * 2. * np.pi

def sinDec_pdf(x):
    return (np.exp(sinDec_spline(x))) / norm

print("SinDec pdf has area on 4pi = ", scint.quad(
    sinDec_pdf, -1, 1)[0] * 2 * np.pi)

In [None]:
x = np.linspace(sinDec_bins[0], sinDec_bins[-1], 100)
y = sinDec_pdf(x)

plt.plot(x, y)

In [None]:
# Try to hist dec dependence of single run
# As expected we have not enough statistic to see anything
run = 50
start = start_mjd[run]
stop = stop_mjd[run]
mask = (exp_times >= start) & (exp_times < stop)
_sinDec_run = np.sin(exp["dec"][mask])

plt.hist(_sinDec_run, range=[-1, 1], bins=10)

## Spline to rate distribution

Choosing spline weights according to scipy.interpolate.UnivariateSpline manual:

    If None (default), s = len(w) which should be a good value if 1/w[i] is an estimate of
    the standard deviation of y[i].

Internally it is doing a weighted least squares fit with $\sum_i(w_i(y_i-\text{spl}(x_i)))^2 \leq s$.
We leave $s$ as the default because we have an estimate for the stddevs: $\sigma_i = \sqrt{h_i}$.
To match the definition of the weights we use:

$$
    \frac{1}{w_i} \stackrel{!}{=} \sigma_i = \sqrt{h_i} \Leftrightarrow w_i 
                               = \frac{1}{\sqrt{h_i}}
$$

Because we scaled $h_i$ to get the rate in events per s, we need to scale the errors too:

$$
    \tilde{h}_i = \frac{h_i}{s_i} \Rightarrow \tilde{\sigma}_i = \frac{\sqrt{h_i}}{s_i} 
                = \frac{\sqrt{s_i\tilde{h}_i}}{s} 
                = \sqrt{\frac{\tilde{h}_i}{s_i}} = \frac{1}{w_i}
$$

The smoothing condition `s` chooses the support knots based on the weights.
Because we have some oscilating pattern due to to seasonal variations (periode ~1yr) a quadratic spline function is not enough.
So we choose the next higher order, a cubic spline, which is able to oscilate up and down exatcly once.

**Note:** If a weight is zero, the corresponding point doesn't contribute at all.
So we might consider using $w_i = \sigma_i$ instead.
Then point woth high poisson statsitics are preferred over low statistic bins.
It doesn't seem to make a huge difference though.

Below we try both weights and the unweighted case.
For the 'correctly' weighted case with $w_i = 1. / \sigma_i$ the spline oscillates strongly.
So we better try a true perdiodic function.

In [None]:
# h is already scaled, so we need to scale the errors too
yerr = np.sqrt(h) / np.sqrt((stop_mjd - start_mjd) * secinday)
w = 1. / yerr
rate_spline = sci.UnivariateSpline(binmids, h, k=3, w=w,
                                   s=None, ext="extrapolate")

rate_spline_inv = sci.UnivariateSpline(binmids, h, k=3, w=1. / w,
                                       s=None, ext="extrapolate")

rate_spline_unw = sci.UnivariateSpline(binmids, h, k=3, w=None,
                                       s=None, ext="extrapolate")

In [None]:
# Plot runs
xerr = 0.5 * (stop_mjd - start_mjd)
plt.errorbar(binmids, h, xerr=0, yerr=yerr, fmt=",")
plt.ylim(0, None);

# Plot spline
x = np.linspace(start_mjd[0], stop_mjd[-1], 200)
y = rate_spline(x)
plt.plot(x, y, zorder=5, lw=2, color="k", label="w=1/std")

# Plot weighted average. Weights are variance to resemble stddev weighted
# least squares fit
avg = np.average(h, weights=yerr**2)
plt.axhline(avg, 0, 1, color="k", ls="--", zorder=5)

# Plot unweighted mean and spline
y = rate_spline_unw(x)
plt.plot(x, y, zorder=5, lw=2, color="r")

avg = np.mean(h)
plt.axhline(avg, 0, 1, color="r", ls="--", zorder=5, label="w=1")

# Plot with inverse weights
y = rate_spline_inv(x)
plt.plot(x, y, zorder=5, lw=2, color="g", label="w=std")
avg = np.average(h, weights=1. / yerr**2)
plt.axhline(avg, 0, 1, color="g", ls="--", zorder=5)

plt.legend()
plt.tight_layout()
plt.savefig("splines.png", dpi=200)

## Periodic function fit

Try a peridoc function with a weighted least squares fit.

$$
    f(x) = a\cdot \sin(b\cdot(x - c)) + d
$$

The least squares loss function is

$$
    R = \sum_i (w_i(y_i - f(x_i)))^2
$$

Weights are standard deviations from poisson histogram error.

$$
    w_i = \frac{1}{\sigma_i}
$$

Seed values are estimated from plot rate vs time.
Period should be 365 days (MJD) because we have one year of data so we choose $b0 = 2\pi/365$.
Amplitude is about $a_0=-0.0005$, because sinus seems to start with negative values.
The x-offset is choose as the first start date, to get the right order of magnitude.
The y-axis intersection $d$ schould be close to the weighted average, so we take this as a seed.

The bounds are motivated as follows (and if we don't hit them, it's OK to use them).
Amplitude $a$ should be positive, this also resolves a degenracy between a-axis offset.
The period $b$ should scatter around one year, a period larger than +-1 half a year is unphysical.
The x-offset $c$ cannot be greater than the initial +- the period because we have a periodic function.
The y-axis offset $d$ is arbitrarily constrained, but as seen from the plot it should not exceed 0.1. 

In [None]:
def f(x, args):
    a, b, c, d = args
    return a * np.sin(b * (x - c)) + d

def lstsq(pars, *args):
    """
    Weighted leastsquares min sum((wi * (yi - fi))**2)
    """
    # data x,y-values and weights are fixed
    x, y, w = args[0], args[1], args[2]
    # Params get fitted
    a, b, c, d = pars[0], pars[1], pars[2], pars[3]
    # Target function
    f = a * np.sin(b * (x - c)) + d
    # Least squares loss
    return np.sum((w * (y - f))**2)

In [None]:
# Seed values from consideration above.
a0 = -0.0005
b0 = 2. * np.pi / 365.
c0 = np.amin(start_mjd)
d0 = np.average(h, weights=yerr**2)

x0 = [a0, b0, c0, d0]
# Bounds as explained above
bounds = [[None, None], [0.5 * b0, 1.5 * b0], [c0 - b0, c0 + b0, ], [0, 0.01]]
# x, y values, weights
args = (binmids, h, 1. / yerr)

res = sco.minimize(fun=lstsq, x0=x0, args=args, bounds=bounds)

for i, name in enumerate(["Amplitude a", "Period b", "x-Shift c", "y-axis d"]):
    print(name, " : ", res.x[i])

In [None]:
# Plot runs
xerr = 0.5 * (stop_mjd - start_mjd)
plt.errorbar(binmids, h, xerr=0, yerr=yerr, fmt=",")
plt.ylim(0, None);

# Plot fit
pars = res.x
x = np.linspace(start_mjd[0], stop_mjd[-1], 1000)
y = f(x, pars)
plt.plot(x, y, zorder=5)

# Plot y shift dashed to see baseline or years average
plt.axhline(res.x[3], 0, 1, color="C1", ls="--")

plt.xlim(start_mjd[0], stop_mjd[-1])

plt.savefig("sinfit.png", dpi=200)

## Combine both to make a time-dec rate function

Multiply the rate function of time with the pdf in sinDec.
This gives the rate per solid angle.
Integrated over the whole sphere, we recover the total rate that time.
Integrating further over the whole time range, regarding the deadtimes of the detector, we recover the number of total events in all runs in this sample.
We can approximate this by using the fitted y-axis offset, which is approximatly the mean and multiply with the livetitme.
We recover the number of total events to good approximation.

In [None]:
print("Number of events from approx : ", res.x[3] * livetime * secinday)
print("True number of events        : ", len(exp))

# Simply integrating doesn't respect the downtimes
wrong = scint.quad(f, start_mjd[0], stop_mjd[-1], args=res.x)[0] * secinday
print("Integrating over whole year  : ", wrong)

In [None]:
# Function of time, sinDec and right-ascension to get the rate at that point.
def time_sinDec_rate(sinDec, t):
    return sinDec_pdf(sinDec) * f(t, res.x)

In [None]:
# This should yield ~1. The ratio of the fitted average d and the integral
# of the rate function over the whole sky at a time approximately at rate=d
_i = 2. * np.pi * scint.quad(time_sinDec_rate, -1, 1, args=55700)[0] / res.x[3]
print("1D and mukltiply by 2pi : ",_i)

# We can also use a 2D integrator to integrate RA as well (same result)
def fullsky_rate(ra, sinDec, t):
    return sinDec_pdf(sinDec) * f(t, res.x)
_i = scint.dblquad(fullsky_rate, -1, 1, lambda x: 0, lambda x: 2.*np.pi,
                   args=(55700,))[0] / res.x[3]
print("2D over dec and ra      : ", _i)

# Time PDF ratio

Background in uniformly distributed in the time window.
Signal distribtution is falling off gaussian-like at both edges so normalization is different.
So the ratio S/B is simply the the signal pdf divided by the uniform normalization $1 / (t_1 - t_0)$ in the time frame.

To get finite support we truncate the gaussian edges at n sigma.
Though arbitrarliy introducet to smoothly run to zero, the concrete cutoff of the doesn't really matter (so say 4, 5, 6 sigma, etc).
This is because in the LLH we get the product of $\langle b_B \rangle B_i$.
A larger cutoff make the normalization of the BG pdf larger, but in the same time makes the number of expected BG event get higher in the same linear fashion.
So as long as we choose a cutoff which ensures that $S \approx 0$ outside, we're good to go.

In [None]:
secinday = 24. * 60. * 60.

def time_bg_pdf(t, t0, a, b):
    """
    BG is uniform for t in [t0 + a, t0 + b] and 0 outside.
    
    Times t and t0 are given in MJD, the range is given relative to t0
    in seconds. t are the times we return pdf values for, t0 is the time of
    the source event around which the time frame is defined.
    
    The PDF is normed to time in seconds!
    """
    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
  
    pdf = np.zeros_like(_t, dtype=np.float)
    uni = (_t >= a) & (_t <= b)
    pdf[uni] = 1. / (b - a)
    return pdf

def time_sig_pdf(t, t0, dt, nsig=4):
    """
    Signal falls of with gaussian with sigma = dt outside uniform range dt.
    
    Times t, t0 are in MJD, dt is in seconds.
    t are the times we return pdf values for, t0 is the time of the source
    event around which the time frame is defined.
    dt is the time window starting from t0 in which signal is uniform.
    
    The PDF is normed to time in seconds!
    """
    if dt < 0:
        raise ValueError("dt must not be negative.")

    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
    
    # Constrain sig_t to [2, 30]s regardless of uniform time window
    sig_t = np.clip(dt, 2, 30)
    sig_t_clip = nsig * sig_t
    gaus_norm = (np.sqrt(2 * np.pi) * sig_t)
    
    # Split in def regions gaus rising, uniform, gaus falling and zero
    gr = (_t < 0) & (_t >= -sig_t_clip)
    gf = (_t > dt) & (_t <= dt + sig_t_clip)
    uni = (_t >= 0) & (_t <= dt)
    
    pdf = np.zeros_like(t, dtype=np.float)
    pdf[gr] = scs.norm.pdf(_t[gr], loc=0, scale=sig_t)
    pdf[gf] = scs.norm.pdf(_t[gf], loc=dt, scale=sig_t)
    # Connect smoothly with the gaussians
    pdf[uni] = 1. / gaus_norm
    
    # Normalize whole distribtuion
    dcdf = (scs.norm.cdf(dt + sig_t_clip, loc=dt, scale=sig_t) -
            scs.norm.cdf(-sig_t_clip, loc=0., scale=sig_t))
    norm = dcdf + dt / gaus_norm
    
    return pdf / norm

def time_soverb(t, t0, dt, nsig):
    """
    Time signal over background PDF.
    
    Signal and background PDFs are each normalized over seconds.
    Signal PDF has gaussian edges to smoothly let it fall of to zero, the
    stddev is dt when dt is in [2, 30]s, otherwise the nearest edge.

    To ensure finite support, the edges are truncated after nsig * dt.

    Parameters
    ----------
    t : array-like
        Times given in MJD for which we want to evaluate the ratio.
    t0 : float
        Time of the source event.
    dt : float
        Time window in seconds starting from t0 in which the signal pdf is
        assumed to be uniform. Must not be negative.
    nsig : float
        Clip the gaussian edges at nsig * dt
    """
    if dt < 0:
        raise ValueError("dt must not be negative.")

    secinday = 24. * 60. * 60.

    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
   
    # Create signal PDF
    # Constrain sig_t to [2, 30]s regardless of uniform time window
    sig_t = np.clip(dt, 2, 30)
    sig_t_clip = nsig * sig_t
    gaus_norm = (np.sqrt(2 * np.pi) * sig_t)
    
    # Split in def regions gaus rising, uniform, gaus falling
    gr = (_t < 0) & (_t >= -sig_t_clip)
    gf = (_t > dt) & (_t <= dt + sig_t_clip)
    uni = (_t >= 0) & (_t <= dt)
    
    pdf = np.zeros_like(t, dtype=np.float)
    pdf[gr] = scs.norm.pdf(_t[gr], loc=0, scale=sig_t)
    pdf[gf] = scs.norm.pdf(_t[gf], loc=dt, scale=sig_t)
    # Connect smoothly with the gaussians
    pdf[uni] = 1. / gaus_norm
    
    # Normalize signal distribtuion
    dcdf = (scs.norm.cdf(dt + sig_t_clip, loc=dt, scale=sig_t) -
            scs.norm.cdf(-sig_t_clip, loc=0., scale=sig_t))
    norm = dcdf + dt / gaus_norm
    pdf /= norm
    
    # Calculate the ratio
    bg_pdf = 1. / (dt + 2 * sig_t_clip)
    ratio = pdf / bg_pdf
    return ratio

In [None]:
# Arbitrary start date from data
t0 = start_mjd[100]
t0_sec = t0 * secinday

# dt from t0 in seconds, clip at 4 sigma
dt = 0
nsig = 4.

# Make t values for plotting in MJD around t0
clip = np.clip(dt, 2, 30) * nsig
plt_rng = [-clip, dt + clip]
t = np.linspace(t0_sec + plt_rng[0], t0_sec + plt_rng[1], 200) / secinday

bg_pdf = time_bg_pdf(t, t0, -clip, dt + clip)
sig_pdf = time_sig_pdf(t, t0, dt, nsig)

# Plot in normalized time
_t = t * secinday - t0 * secinday
plt.plot(_t, bg_pdf, "C0-")
plt.plot(_t, sig_pdf, "C1-")
plt.axvline(dt, 0, 1, color="C7", ls="--")
plt.axvline(0, 0, 1, color="C1", ls="--")

plt.xlabel("Time relative to t0 in sec")
plt.ylim(0, None);
plt.show()

In [None]:
# Integrate both pdf over time range to show they are correctly normalized
# Note that PDFs are defined in second so we multiply by secinday 
bg_int = scint.quad(time_bg_pdf, t[0], t[-1],
                    args=(t0, -clip, dt + clip))[0] * secinday
sig_int = scint.quad(time_sig_pdf, t[0], t[-1],
                    args=(t0, dt, nsig))[0] * secinday

print("BG integral     : ", bg_int)
print("Signal integral : ", sig_int)

In [None]:
# Make a plot with ratios for different time windows as in the paper
# Arbitrary start date from data
t0 = start_mjd[100]
t0_sec = t0 * secinday

# dt from t0 in seconds, clip at 4 sigma
dts = [5, 50, 200]
nsig = 4

# Make t values for plotting in MJD around t0, fitting all in one plot
max_dt = np.amax(dts)
clip = np.clip(max_dt, 2, 30) * nsig
plt_rng = np.array([-clip, max_dt + clip])
t = np.linspace(t0_sec + 1.2 *plt_rng[0],
                t0_sec + 1.2 * plt_rng[1], 1000) / secinday
_t = t * secinday - t0 * secinday

# Mark event time
plt.axvline(0, 0, 1, c="#353132", ls="--", lw=2)

colors = ["C0", "C3", "C2"]
for i, dt in enumerate(dts):
    # Plot ratio S/B
    SoB = time_soverb(t, t0, dt, nsig)
    plt.plot(_t, SoB, lw=2, c=colors[i],
             label=r"$T_\mathrm{{uni}}$: {:>3d}s".format(dt))
    # Fill uniform part, might look nicely
    # fbtw = (_t > 0) & (_t < dt)
    # plt.fill_between(_t[fbtw], 0, SoB[fbtw], color="C7", alpha=0.1)

# Make it look like the paper plot, but with slightly extended borders, to
# nothing breaks outside the total time frame
plt.xlim(1.2 * plt_rng)
plt.ylim(0, 3)
plt.xlabel("t - t0 in sec")
plt.ylabel("S / B")
plt.legend(loc="upper right")
plt.grid()
plt.show()

# Let's make the BG pdf

## Marginalize KDE by integration

Instead of sampling and reducing to 2D histograms, we can try to truly integrate one dimension of the KDE to be able to plot also the tails of the distribution, where events usually end up only in large samples.

In [None]:
# KDE CV is running on cluster and pickles the GridSearchCV
fname = "./kde_cv/KDE_model_selector_20_exp_IC86_I_followup_2nd_pass.pickle"
with open(fname, "rb") as f:
    model_selector = pickle.load(f)

kde = model_selector.best_estimator_
bw = model_selector.best_params_["bandwidth"]
print("Best bandwidth : {:.3f}".format(bw))

# We maybe just want to stick with the slightly overfitting kernel to
# be as close as possible to data
OVERFIT = True
if OVERFIT:
    bw = 0.1
    kde = skn.KernelDensity(bandwidth=bw, kernel="gaussian", rtol=1e-8)
print("Used bandwidth : {:.3f}".format(bw))

# KDE sample must be cut in sigma before fitting, similar to range in hist
_exp = exp[exp["sigma"] <= np.deg2rad(5)]

fac_logE = 1.5
fac_dec = 2.5
fac_sigma = 2.

_logE = fac_logE * _exp["logE"]
_sigma = fac_sigma * np.rad2deg(_exp["sigma"])
_dec = fac_dec * _exp["dec"]

kde_sample = np.vstack((_logE, _dec, _sigma)).T

# Fit KDE best model to sample
kde.fit(kde_sample)

# Make some samples
nsamples_kde = int(1e7)
bg_samples = kde.sample(n_samples=nsamples_kde)

### 1D case

Integrate out 2 axis with a double integral to show a 1D margin distribution.
This take super long, 5 Minutes per point.
But it can be parallelized pretty simple if needed.
Code to create the values is on phobos.

**Resumee:** Sampling many values and simply bin in 1D is much better, needs less time and is probably more accurate.

In [None]:
# Compare to data hist
_ = plt.hist(exp["logE"] * fac_logE, bins=100, normed=True, label="data")

# Compare 'true' integration with
h, b = np.histogram(bg_samples[:, 0], bins=200, range=[2, 10], normed=True)
m = 0.5 * (b[:-1] + b[1:])
_ = plt.plot(m, h, label="sample")

bins_and_vals = np.load("data/2d_integrate_kde/bins_and_vals.npy")
x = bins_and_vals[0]
vals = bins_and_vals[1]
_ = plt.plot(x, vals, label="integrated")

plt.legend()
plt.savefig("./kde.png", dpi=200)

### 2D PDF

Integrate out only one axis (here sigma) to show the 2D marginalized PDF.

In [None]:
# Choose axis in which the PDF is differential
xax = 0  # logE
yax = 1  # dec
xrng = np.array([2, 7]) * fac_logE
yrng = np.array([-np.pi / 2., np.pi / 2.]) * fac_dec

# Integrate range
irng = np.array([0, 5]) * fac_sigma

# Create the integration grid on the bin mids
nbinsx, nbinsy = 10, 10
xbins = np.linspace(2, 10, nbinsx + 1)
ybins = np.linspace(-np.pi, np.pi, nbinsy + 1)
x, y = get_binmids([xbins, ybins])
xx, yy = map(np.ravel, np.meshgrid(x, y))

# Scans x line by x line with incresing y
grid_pts = np.vstack((xx, yy)).T

def pdf(x, *args):
    # Make single to point to evaluate the KDE at
    xgridpt, ygridpt = args
    pt = [xgridpt, ygridpt, x]
    pt = np.array(pt)[np.newaxis, :]
    
    return np.exp(kde.score_samples(X=pt))
    
# Just integrate over last axis at every gridpoint
integrals = np.zeros(len(grid_pts))
for i, gp in tqdm(enumerate(grid_pts)):
    intgrl = scint.quad(pdf, irng[0], irng[1], args=(gp[0], gp[1]))
    integrals[i] = intgrl[0]
    
    
# Compare hist from sample to integration in the same bins
fig, ax = plt.subplots(1, 2, figsize=(10, 4))

_, _, _, img0 = ax[0].hist2d(bg_samples[:, xax], bg_samples[:, yax],
                             bins=[xbins, ybins], normed=True)

# Don't multiply with binwidth. The midpoint is already the "average".
# A better estimation would be to integrate over the bin and divide by the
# binwidths, but it would yield a similar result.
_, _, _, img1 = ax[1].hist2d(xx, yy, bins=[xbins, ybins],
                             weights=integrals)

cax0 = split_axis(ax[0], "right", cbar=True)
cax1 = split_axis(ax[1], "right", cbar=True)

plt.colorbar(cax=cax0, mappable=img0)
plt.colorbar(cax=cax1, mappable=img1)

fig.tight_layout()

## Justify the sigma cut

Only few higher energy events from the sothern sky are excluded (see cut=10).
But really bad reconstructed events tend to have higher energies (see cut=90).
Still it should be OK to remove those > 10 because they have not so much spatial information.

In [None]:
# Show the leftover event s after a sigma cut
sig_cut = 10
m = exp["sigma"] > np.deg2rad(sig_cut)

_ = plt.hist2d(exp["logE"][m], np.rad2deg(exp["dec"][m]),
               bins=30, cmap="inferno")
plt.colorbar()
plt.title("Total Evts w sigma > {:d}°: {:d} ({:.3f}%)".format(
        sig_cut, np.sum(m), np.sum(m) / len(exp) * 100))
plt.xlabel("logE")
plt.ylabel("dec in °")
plt.show()

# Show the skewed sigma distribution with the cut applied and mean vs median

## Test the marginalize_hist method

It should be equivalent to use one of the following methods to create a 1D histogram from the original 3D data pdf in logE, dec and sigma:

1. Simply use the original 1D data in any variable, e.g. simply histogram logE
2. Create the complete 3D histogram and marginalize by summing over remaining dimensions.

When using unnormalized hists, 2. is simply summing up all other counts.

When using normalized hists, we need to sum with respect to the binwidths in the current dimension to keep the normalization intact.
This is only useful, when only the histogram is available and not the original sample.

We want to compare if both methods are equivalent
As we can see, all ratios are one, so methods are equal.

In [None]:
def make_hist_ratio(h1, h2):
    """Return the ratio h1 / h2. Return 0 where h2 is 0."""
    m = (h2 > 0)
    ratio = np.zeros_like(h1)
    ratio[m] = h1[m] / h2[m]
    return ratio

### Unnormalized
First the unnormalized version. Simply sum over the other axes of the 3D hist.

In [None]:
# Plot each variable in a single plot and the ratios seperately
fig, [[axtl, axtr], [axbl, axbr]] = plt.subplots(2, 2, figsize=(10, 8))

# We also make a cut < 10° in sigma, because there are some outliers
m = exp["sigma"] <= np.deg2rad(10)
sigma = np.rad2deg(exp["sigma"][m])
logE = exp["logE"][m]
dec = np.sin(exp["dec"][m])

logE_nbins = 50
dec_nbins = 40
sigma_nbins = 30

# Make the 3D hist
sample = np.vstack((logE, dec, sigma)).T
nbins = [logE_nbins, dec_nbins, sigma_nbins]
h, b = np.histogramdd(sample, bins=nbins,)

# Get binmids for plotting
m = get_binmids(b)

# Common hist settings
h1 = {"lw": 2, "color": "k", "histtype": "step"}
h2 = {"lw": 2, "color": "r", "histtype": "step", "alpha": 0.5}

# logE
logE_h, logE_b, _ = axtl.hist(logE, bins=logE_nbins, **h1)
logE_hm = np.sum(h, axis=(1, 2))
_ = axtl.hist(m[0], bins=b[0], weights=logE_hm, **h2)
# Ratio plot below
axtl_sec = split_axis(axtl, "bottom", "20%", cbar=False)
axtl_sec.hist(m[0], b[0], weights=make_hist_ratio(logE_h, logE_hm), **h2)
axtl_sec.axhline(1, 0, 1, color="k")
axtl_sec.set_ylim(0, 2)

# dec
dec_h, dec_b, _ = axbl.hist(dec, bins=dec_nbins, **h1)
dec_hm = np.sum(h, axis=(0, 2))
_ = axbl.hist(m[1], bins=b[1], weights=dec_hm, **h2)

axbl_sec = split_axis(axbl, "bottom", "20%", cbar=None)
axbl_sec.hist(m[1], b[1], weights=make_hist_ratio(dec_h, dec_hm), **h2)
axbl_sec.axhline(1, 0, 1, color="k")
axbl_sec.set_ylim(0, 2)

# sigma
sigma_h, sigma_b, _ = axtr.hist(sigma, bins=sigma_nbins, **h1)
sigma_hm = np.sum(h, axis=(0, 1))
_ = axtr.hist(m[2], bins=b[2], weights=sigma_hm, **h2)

axtr_sec = split_axis(axtr, "bottom", "20%", cbar=None)
axtr_sec.hist(m[2], b[2], weights=make_hist_ratio(sigma_h, sigma_hm), **h2)
axtr_sec.axhline(1, 0, 1, color="k")
axtr_sec.set_ylim(0, 2)

axbr.set_visible(False)

fig.suptitle("Black: 1D, Red: Margin", fontsize=15);

### Normalized
Sum over the other axes of the 3D hist and multiply by bin widths.

In [None]:
# Plot each variable in a single plot and the ratios seperately
fig, [[axtl, axtr], [axbl, axbr]] = plt.subplots(2, 2, figsize=(10, 8))

# Now make it normed
h, b = np.histogramdd(sample, bins=nbins, normed=True)

# Get binmids for plotting
m = get_binmids(b)

# logE
logE_h, logE_b, _ = axtl.hist(logE, bins=logE_nbins, normed=True, **h1)
logE_hm = hist_marginalize(h=h, bins=b, axes=(1, 2))[0]
_ = axtl.hist(m[0], bins=b[0], weights=logE_hm, **h2)
# Ratio plot below
axtl_sec = split_axis(axtl, "bottom", "20%", cbar=False)
axtl_sec.hist(m[0], b[0], weights=make_hist_ratio(logE_h, logE_hm), **h2)
axtl_sec.axhline(1, 0, 1, color="k")
axtl_sec.set_ylim(0, 2)

# dec
dec_h, dec_b, _ = axbl.hist(dec, bins=dec_nbins, normed=True, **h1)
dec_hm = hist_marginalize(h=h, bins=b, axes=(0, 2))[0]
_ = axbl.hist(m[1], bins=b[1], weights=dec_hm, **h2)

axbl_sec = split_axis(axbl, "bottom", "20%", cbar=None)
axbl_sec.hist(m[1], b[1], weights=make_hist_ratio(dec_h, dec_hm), **h2)
axbl_sec.axhline(1, 0, 1, color="k")
axbl_sec.set_ylim(0, 2)

# sigma
sigma_h, sigma_b, _ = axtr.hist(sigma, bins=sigma_nbins, normed=True, **h1)
sigma_hm = hist_marginalize(h=h, bins=b, axes=(0, 1))[0]
_ = axtr.hist(m[2], bins=b[2], weights=sigma_hm, **h2)

axtr_sec = split_axis(axtr, "bottom", "20%", cbar=None)
axtr_sec.hist(m[2], b[2], weights=make_hist_ratio(sigma_h, sigma_hm), **h2)
axtr_sec.axhline(1, 0, 1, color="k")
axtr_sec.set_ylim(0, 2)

axbr.set_visible(False)

fig.suptitle("Black: 1D, Red: Margin", fontsize=15);