In [None]:
from importlib import reload

import helper as hlp

import numpy as np

import matplotlib.pyplot as plt
import matplotlib.dates as mpldates
import matplotlib.gridspec as gridspec
from matplotlib.colors import LogNorm
%matplotlib inline

import scipy.interpolate as sci
import scipy.optimize as sco
import scipy.stats as scs
import scipy.integrate as scint

import json
import datetime
import pickle
from astropy.time import Time as astrotime

import sklearn.neighbors as skn
import sklearn.model_selection as skms  # Newer version of grid_search

from corner_hist import corner_hist
from anapymods3.plots.general import (split_axis, get_binmids,
                                      hist_marginalize, dg)
from anapymods3.stats.sampling import rejection_sampling
import anapymods3.stats.KDE as KDE

# Some globals
hoursindays = 24.
secinday = hoursindays * 60. * 60. 

# Load data

Load IC86 data from epinat, which should be the usual IC86-I (2011) PS sample, but pull corrected and OneWeights corrected by number of events generated.

In [None]:
exp, mc, livetime = hlp.load_data()

# Get data livetime

Generate from good run list as stated here:
- http://icecube.wisc.edu/~coenders/html/build/html/ic86-bdt/muonL3.html
- https://wiki.icecube.wisc.edu/index.php/IC86_I_Point_Source_Analysis/Data_and_Simulation

It should be 332.61 days as stated by jefeintzeig and scoenders.
We create one bin per included run, with exactly that width.
Excluded runs are those with too high/low rate and without everything marked "good".

Livetime ist a bit higher, because we used a newer runlist from iclive instead of the old non-json v1.4.
See side test for that comparison.

Problem is also, that this runlist includes runs with zero events, so they are probably cut out due to the old runlist in the original selection.

In [None]:
def filter_runs(run):
    """
    Filter runs as stated in jfeintzig's doc.
    """
    exclude_runs = [120028, 120029, 120030, 120087, 120156, 120157]
    if ((run["good_i3"] == True) & (run["good_it"] == True) &
        (run["run"] not in exclude_runs)):
        return True
    else:
        return False

In [None]:
goodrun_dict, _livetime = hlp.create_goodrun_dict(
    runlist="data/runlists/ic86-i-goodrunlist.json", filter_runs=filter_runs)

# We don't use this livetime, but the "official" one from jfeintzeig's page
print("IC86-I livetime from iclive: ", _livetime)

# Bin BG according to runlist

Each run is one bin in the bg rate vs time plot.
The rate is normed to Hertz by dividing through the bin sizes in seconds.

In [None]:
h = hlp._create_runtime_bins(exp["timeMJD"], goodrun_dict=goodrun_dict,
                             remove_zero_runs=True)

In [None]:
# Plot runs
fig, ax = plt.subplots(1, 1)

start, stop = h["start_mjd"], h["stop_mjd"]
rate = h["rate"]
xerr = 0.5 * (stop - start)
yerr = h["rate_std"]
binmids = 0.5 * (stop + start)
ax.errorbar(binmids, rate, xerr=xerr, yerr=yerr, fmt=",")

# Setup main axis
ax.set_xlim(start[0], stop[-1])
ax.set_ylim(0, None)
ax.set_xlabel("MJD")
ax.set_ylabel("Rate in Hz")
# Rotate bottom labels if needed
# def xlabels(x):
#     return ["{:5d}".format(int(xi)) for xi in x]
# ax.set_xticklabels(xlabels(ax.get_xticks()), rotation=60,
#                    horizontalalignment="right")

# Second xaxis on top with month and year.
# Convert MJD to datetimes, make dates for every month and convert to mjd
# http://stackoverflow.com/questions/22696662/ \
#   python-list-of-first-day-of-month-for-given-period
datetimes = astrotime(binmids, format="mjd").to_datetime()
dt, end = datetimes[0], datetimes[-1]
datetimes_ticks = []
while dt < end:
    if not dt.month % 12:
        dt = datetime.datetime(dt.year + 1, 1, 1)
    else:
        dt = datetime.datetime(dt.year, dt.month + 1, 1)
    datetimes_ticks.append(dt)
mjd_ticks = astrotime(datetimes_ticks, format="datetime").mjd

# New axis on top, make sure, we use the same range
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(mjd_ticks)
ax2.set_xticklabels([dtt.strftime("%b '%y") for dtt in datetimes_ticks],
                    rotation=60, horizontalalignment="left")

fig.tight_layout()
plt.show()

# Time dependent rate function

**Note: I think it is unnecessary to use a time and declination dependent rate. The spatial part is injected from the data BG from KDE anyways. So we just need to have the rate to determine how much events we inject allsky.**

Rate ist time dependent because of seasonal variation.
We take this varariation into account by fitting a priodic function to the time resolved rate.

The data is built by calculating the rate in each run as seen before.
This rate is correctly normalized and smoothes local fluctuations.

### Peridoc function with a weighted least squares fit

See side_test for comparison to spline fits.
The function is a simple sinus scalable by 4 parameters to fit the shape of the rates:

$$
    f(x) = a\cdot \sin(b\cdot(x - c)) + d
$$

The least squares loss function is

$$
    R = \sum_i (w_i(y_i - f(x_i)))^2
$$

Weights are standard deviations from poisson histogram error.

$$
    w_i = \frac{1}{\sigma_i}
$$

Seed values are estimated from plot rate vs time.

- Period should be 365 days (MJD) because we have one year of data so we choose $b0 = 2\pi/365$.
- Amplitude is about $a_0=-0.0005$, because sinus seems to start with negative values.
- The x-offset is choose as the first start date, to get the right order of magnitude.
- The y-axis intersection $d$ schould be close to the weighted average, so we take this as a seed.

The bounds are motivated as follows (and if we don't hit them, it's OK to use them).

- Amplitude $a$ should be positive, this also resolves a degenracy between a-axis offset.
- The period $b$ should scatter around one year, a period larger than +-1 half a year is unphysical.
- The x-offset $c$ cannot be greater than the initial +- the period because we have a periodic function.
- The y-axis offset $d$ is arbitrarily constrained, but as seen from the plot it should not exceed 0.1. 

In [None]:
def f(x, pars):
    """
    Returns the rate at a given time in MJD.
    """
    a, b, c, d = pars
    return a * np.sin(b * (x - c)) + d

def lstsq(pars, *args):
    """
    Weighted leastsquares min sum((wi * (yi - fi))**2)
    """
    # data x,y-values and weights are fixed
    x, y, w = args
    _f = f(x, pars)
    return np.sum((w * (y - _f))**2)

In [None]:
# Seed values from consideration above.
# a0 = -0.0005
# b0 = 2. * np.pi / 365.  # We could restrict the period to one yr exact.
# c0 = np.amin(start_mjd)
# d0 = np.average(h, weights=yerr**2)

rate = h["rate"]
rate_std = h["rate_std"]
X = exp["timeMJD"]
binmids = 0.5 * (h["start_mjd"] + h["stop_mjd"])

a0 = 0.5 * (np.amax(rate) - np.amin(rate))
b0 = 2. * np.pi / 365.
c0 = np.amin(X)
d0 = np.average(rate, weights=rate_std**2)

x0 = [a0, b0, c0, d0]
# Bounds as explained above
bounds = [[None, None], [0.5 * b0, 1.5 * b0], [c0 - b0, c0 + b0, ], [0, 0.01]]
# x, y values, weights
args = (binmids, rate, 1. / rate_std)

res = sco.minimize(fun=lstsq, x0=x0, args=args, bounds=bounds)
bf_pars = res.x

print("Amplitude   : {: 13.5f} in Hz".format(res.x[0]))
print("Period (d)  : {: 13.5f} in days".format(2 * np.pi / res.x[1]))
print("Offset (MJD): {: 13.5f} in MJD".format(res.x[2]))
print("Avg. rate   : {: 13.5f} in Hz".format(res.x[3]))

In [None]:
# Define the rate function:
def rate_fun(t):
    """
    Returns the rate at a given time in MJD.
    
    Parameters
    ----------
    t : array-like
        Time in MJD.
        
    Returns
    -------
    rate : array-like
        The rate of background events in Hz.
    """
    return f(t, *res.x)

In [None]:
# Plot runs
start, stop = h["start_mjd"], h["stop_mjd"]
rate = h["rate"]
xerr = 0.5 * (stop - start)
yerr = h["rate_std"]
binmids = 0.5 * (stop + start)

plt.errorbar(binmids, rate, xerr=xerr, yerr=yerr, fmt=",")
plt.ylim(0, None);

# Plot fit
t = np.linspace(start[0], stop[-1], 1000)
y = rate_fun(t)
plt.plot(t, y, zorder=5)

# Plot y shift dashed to see baseline or years average
plt.axhline(bf_pars[3], 0, 1, color="C1", ls="--", label="")

plt.xlim(start[0], stop[-1])
plt.xlabel("MJD")
plt.ylabel("Rate in Hz")

# plt.savefig("./data/figs/time_rate_sinus.png", dpi=200)
plt.ylim(0, 0.009)
plt.tight_layout()
plt.show()

# Draw Number of Background Events 

The fitted spline the expected background at a given time.
Draw the actual number of events to inject per BG trial within a given time window using a poisson distribution with the mean from the spline.

Classically the events drawn are then assigned a random time within the time window.
But as we have the rate function, we can sample times from that function using a rejection sampling.
This will only affect larger intervals, where the curvature can be seen.

## Sample number of events in frame

In [None]:
def _prep_times(t, trange):
    """
    Little wrapper to not DRY.
    """
    t = np.atleast_1d(t)
    trange = np.array(trange)
    nsrc = len(t)
    
    # Make shape (nsources, 1) for the times
    t = t.reshape(nsrc, 1)
    
    # If range is 1D (one for all) reshape it to (nsources, 2)
    if len(trange.shape) == 1:
        trange = np.repeat(trange.reshape(1, 2), repeats=nsrc, axis=0)
        
    # Prepare time window in MJD
    trange = t + trange / secinday
    
    return t, trange

def get_num_of_bg_events(t, trange, ntrials, pars):
    """
    Draw number of background events per trial from a poisson distribution
    with the mean of the fitted rate function.
    Then draw nevents times via rejection sampling for the time dpeendent rate
    function.
    
    Parameters
    ----------
    t : array-like
        Times of the occurance of each source event in MJD.
    trange : [float, float] or array_like, shape (len(t), 2)
        Time window(s) in seconds relativ to the given time(s) t.
        - If [float, float], the same window [lower, upper] is used for every
          source.
        - If array-like, lower [i, 0] and upper [i, 1] bounds of the time
          window per source.
    ntrials : int
        Number of background trials we need the number of how many events to
        inject for.
    pars : array-like
        Best fit parameters from the fit function used in its integral.
        
    Returns
    -------
    nevents : array-like, shape (len(t), ntrials)
        The number of events to inject for each trial for each source.
    """
    # Integrate rate function analytially in desired interval
    def rate_integral(trange, pars):
        """
        Match with factor [secinday] = 24 * 60 * 60 s / MJD = 86400/(Hz*MJD)
        in the last step.
            [a], [d] = Hz, [b], [c], [ti] = MJD
            [a / b] = Hz * MJD, [d * (t1 - t0)] = HZ * MJD
        """
        a, b, c, d = pars
        
        t0 = np.atleast_2d(trange[:, 0]).reshape(len(trange), 1)
        t1 = np.atleast_2d(trange[:, 1]).reshape(len(trange), 1)
        
        per = a / b * (np.cos(b * (t0 - c)) - np.cos(b * (t1 - c)))
        lin = d * (t1 - t0)

        return (per + lin) * secinday
    
    t, trange = _prep_times(t, trange)
    
    # Expectation is the integral in the time frame
    expect = rate_integral(trange, pars)
        
    # Sample from poisson
    nevts = np.random.poisson(lam=expect, size=(len(t), ntrials))
      
    return nevts

In [None]:
start, stop = h["start_mjd"], h["stop_mjd"]

t = start[100:104]
trange = np.array([-120, 220])
ntrials = 10

nevts = get_num_of_bg_events(t, trange, ntrials, res.x)
nevts

## Now the sampling of random times in the time frame

First we want to see, that all BG injected events stay in the correct time frame and make a uniform distribution for small time frames.

Then we make the time window really big and the events should follow the rate function PDF.

In [None]:
def get_times_in_frame(t, trange, nsamples):
    """
    Parameters
    ----------
    t : float
        Time of the occurance of the source event in MJD.
    trange : [float, float]
        Time window in seconds relativ to the given time t.
    nsamples : array-like, type int, shape (len(t))
        Number of events to inject per trial. Number of trials is given by
        the length of nsamples.
        
    Returns
    -------
    times : list, length len(t)
        List of samples times in MJD of background events per source.
        For each source i nsamples[i] times are drawn from the rate function.
    """
    _pdf = rate_fun
    
    t, trange = _prep_times(t, trange)
    
    sample = []
    nsamples = np.atleast_1d(nsamples)
    
    for i, ni in enumerate(nsamples):
        sam, _ = rejection_sampling(_pdf, bounds=trange, n=ni)
        sample.append(sam)
        
    return sample

In [None]:
# First the small time frame
# Arbitrary start date from data
t0 = start[100]
t0_sec = t0 * secinday

# dt from t0 in seconds, clip at 4 sigma
dt = 200
nsig = 4.

# Make t values for plotting in MJD around t0
clip = np.clip(dt, 2, 30) * nsig
plt_rng = [-clip, dt + clip]
trange = plt_rng
ntrials = 10000

# Sample times
nevts = get_num_of_bg_events(t=t0, trange=trange, ntrials=ntrials,
                             pars=res.x)[0]
times = get_times_in_frame(t0, trange, nevts)

# Plot them in together with the PDFs
def time_bg_pdf(t, t0, a, b):
    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
  
    pdf = np.zeros_like(_t, dtype=np.float)
    uni = (_t >= a) & (_t <= b)
    pdf[uni] = 1. / (b - a)
    return pdf

def time_sig_pdf(t, t0, dt, nsig=4):
    if dt < 0:
        raise ValueError("dt must not be negative.")

    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
    
    # Constrain sig_t to [2, 30]s regardless of uniform time window
    sig_t = np.clip(dt, 2, 30)
    sig_t_clip = nsig * sig_t
    gaus_norm = (np.sqrt(2 * np.pi) * sig_t)
    
    # Split in def regions gaus rising, uniform, gaus falling and zero
    gr = (_t < 0) & (_t >= -sig_t_clip)
    gf = (_t > dt) & (_t <= dt + sig_t_clip)
    uni = (_t >= 0) & (_t <= dt)
    
    pdf = np.zeros_like(t, dtype=np.float)
    pdf[gr] = scs.norm.pdf(_t[gr], loc=0, scale=sig_t)
    pdf[gf] = scs.norm.pdf(_t[gf], loc=dt, scale=sig_t)
    # Connect smoothly with the gaussians
    pdf[uni] = 1. / gaus_norm
    
    # Normalize whole distribtuion
    dcdf = (scs.norm.cdf(dt + sig_t_clip, loc=dt, scale=sig_t) -
            scs.norm.cdf(-sig_t_clip, loc=0., scale=sig_t))
    norm = dcdf + dt / gaus_norm
    
    return pdf / norm


# Plot the pdfs
t = np.linspace(t0_sec + plt_rng[0], t0_sec + plt_rng[1], 200) / secinday
bg_pdf = time_bg_pdf(t, t0, -clip, dt + clip)
sig_pdf = time_sig_pdf(t, t0, dt, nsig)

# Plot in normalized time
_t = t * secinday - t0 * secinday
plt.plot(_t, bg_pdf, "C0-")
plt.plot(_t, sig_pdf, "C1-")
plt.axvline(dt, 0, 1, color="C7", ls="--")
plt.axvline(0, 0, 1, color="C1", ls="--")

# Plot injected events from all trials
T = np.array([])
for ti in times:
    T = np.append(T, ti)  
T = (T - t0) * secinday

_ = plt.hist(T, bins=50, normed=True, color=dg, alpha=.25)

plt.xlabel("Time relative to t0 in sec")
plt.ylim(0, None);
plt.tight_layout()

# plt.savefig("./data/figs/bg_events_time_sampled_narrow.png", dpi=200)

plt.show()

In [None]:
# Now the really large time frame, over the whole time range
t0 = start[0]
t0_sec = t0 * secinday

# dt from t0 in seconds, clip at 4 sigma
dt = (stop[-1] - start[0]) * secinday
nsig = 4.

# Make t values for plotting in MJD around t0
clip = np.clip(dt, 2, 30) * nsig
plt_rng = [-clip, dt + clip]
trange = plt_rng
ntrials = 1

# Sample times
nevts = get_num_of_bg_events(t=t0, trange=trange, ntrials=ntrials,
                             pars=res.x)[0]
times = get_times_in_frame(t0, trange, nevts)

# Plot injected events from all trials
T = np.array([])
for ti in times:
    T = np.append(T, ti)  

h, b = np.histogram(T, bins=1081)
m = get_binmids([b])[0]
scale = np.diff(b) * secinday * ntrials
yerr = np.sqrt(h) / scale
h = h / scale

plt.errorbar(m, h, yerr=yerr, fmt=",")

# Plot normalized rate function to compare
t = np.linspace(start[0], stop[-1], 100)
r = rate_fun(t=t)
plt.plot(t, r, lw=2, zorder=5)
plt.axhline(res.x[3], 0, 1, color="C1", ls="--", label="", zorder=5)

plt.xlim(start[0], stop[-1])
plt.ylim(0, 0.009)
plt.tight_layout()

# plt.savefig("./data/figs/bg_events_time_sampled_wide.png", dpi=200)

plt.show()

# Create the BG PDF from data

Proceeding to section 6.3.1 Randomized BG Injection, p. 113.
Mrichmann draws events by:

1. Get number of bg events to be injected from a poisson distribution with expectation values drawn from the previously build bg temporal distribution.
   $$
   P_{\langle n_B\rangle}(N_m) = \frac{\langle n_B\rangle^{N_m}}{N_m\!}\cdot \exp(\langle n_B\rangle)
   $$
2. These events are then drawn from a 3D pdf in energy proxy, zenith proxy and sigma proxy.
   He does it by dividing 10x10x10 bins, first selecting energy, then zenith in that energy bin, then sigma in that zenith bin.
   
Here we create a smooth PDF using a kernel density estimator and obtain a sample by running a MCMC chain to create a sample a priori.
The bandwidth is set globally and cross validated to be robust.

**Some note on `numpy.histogramdd`:**

The input must be an array with shape (nDim, len(data)).

Shape of h is the same as the number of bins in each dim: (50, 40, 10)
So the first dimension picks a single logE slice -> h[i].shape = (40, 10)
Second dim picks a dec slice -> h[:, i].shape = (50, 10)
3rd picks a sigma slice -> h[:, :, i].shape = (50, 40)

This is important: meshgrid repeats in second axis on first array xx.
For the second array, the first axis is repeated.
But h iterates over energy in 1st axis. So if we don't transpose, we have the whole histogram flipped! Compare to plot in mrcihmanns thesis (cos(zen))

**Some notes on KDE:**

Sebastian has already made a tool for adaptive and asymmetric KDE.
1. The Kernel is the covariance matrix of the whole data set to regard different scales
    + Note: This may only be a problem, if one dim is spread with peaks, while the other is wide spread only. Then we cannot scale the Kernel to small to fit the peaks because the smooth dimension is preventing that.
2. Use Silvermans or Scotts rule as a first guess.
3. Run a second pass and vary the local bandwidth according to the first guess local density.

We could replace 1 and 2 by scaling the data with the inverse covariance and then using a cross validation to find the first guess bandwidth.
Then using a second pass to vary locally.

## 3D histogram of BG data
First we make a 3D histogram to better compare to mrichmann and to get an overview over the distribution.

In [None]:
# Cut sigmas in the sample to obtain smooth tail from KDE and remove outliers
m = exp["sigma"] < np.deg2rad(10)
_logE = exp["logE"][m]
_dec = exp["dec"][m]
_sigma = exp["sigma"][m]
# Sample must match with the one used in training here
sample = np.vstack((_logE, _dec, _sigma)).T

# Binning is rather arbitrary because we don't calc stuff with the hist
bins = [50, 50, 50]

# Plot in degrees and in sinDec
_sam = np.vstack((_logE, np.sin(_dec), np.rad2deg(_sigma))).T

h, bins = np.histogramdd(sample=_sam, bins=bins, normed=False)

# Make a nice corner plot
label = ["logE", "sinDdec", "sigma deg"]
fig, ax = corner_hist(h, bins=bins,
                      label=label,
                      hist2D_args={"cmap": "inferno", "norm": LogNorm()},
                      hist_args={"color":"C1", "alpha": 0.5, "log": True})

# plt.savefig("./data/figs/bg_corner_scaled.png", dpi=200)

## Kernel Density Estimation

Use adaptive width KDE to describe BG data and be able to smoothly draw new events from it.
We fitted one set of params to the full data and stored it to avoid lengthy (~60 mins.) refitting when testing.
A optimal set of parameters gets determined in a cross validation.

In [None]:
# Assign model from CV, which has already evaluated adaptive kernels
with open("data/awKDE_CV/CV10_a_b_EXP_IC86I_CUT_sig.ll.90_PARS_" +
          "diag_True_alpha_0.5.pickle", "rb") as f:
    model_selector = pickle.load(f)
print("Model selector used alpha : ", model_selector.best_estimator_.alpha)

# We could still change the alpha, but the global bandwidth must stay fixed
kde_inj = model_selector.best_estimator_
kde_inj.alpha = .5
print("Actually used alpha       : ", kde_inj.alpha)

# Sample with bounds, because KDEs spillover
bounds = np.array([[None, None], [-np.pi / 2. , np.pi / 2.], [0, None]])
n_samples = int(1e6)
kde_sam = kde_inj.sample(n_samples)


# Plot in degrees and in sinDec
_sam_kde = np.vstack((kde_sam[:, 0],
                      np.sin(kde_sam[:, 1]),
                      np.rad2deg(kde_sam[:, 2]))).T

h, _ = np.histogramdd(sample=_sam_kde, bins=bins, normed=False)
fig, ax = corner_hist(h, bins=bins,
                      label=label,
                      hist2D_args={"cmap": "inferno", "norm": LogNorm()},
                      hist_args={"color":"C1", "alpha": 0.5, "log": True})

# plt.savefig("./data/figs/bg_corner_scaled.png", dpi=200)

## Compare KDE to original data

Make a ratio histogram of the KDE sample and the original data sample.

### 2D marginalization

In [None]:
# Create 2D hists, by leaving out one parameter
xlabel = [label[0], label[0], label[1]]
ylabel = [label[1], label[2], label[2]]

for i, axes in enumerate([[0, 1], [0, 2], [1, 2]]):
    _b = np.array(bins)
    h_exp, b_exp = np.histogramdd(_sam[:, axes],
                                  bins=_b[axes], normed=True)
    h_kde, b_kde = np.histogramdd(_sam_kde[:, axes],
                                  bins=_b[axes], normed=True)
    
    # KDE is expectation, but sampled with much more events.
    # Weights would simply scale the total number of KDE events to match the
    # number of original events. That would be the mean for the poisson
    # distribution in each bin. So to get OK KDE expectation sqrt(n) errors
    # in each bin, we divide not by the number of drawn KDE but by the number
    # of original events.   
    # Again shapes of meshgrid and hist are transposed
    diffXX, _ = np.meshgrid(np.diff(_b[0]), np.diff(_b[1]))
    norm_kde = len(exp) * diffXX.T
    sigma_kde = np.sqrt(h_kde / norm_kde)

    # Make 3 different diff/ratio hists to estimate KDE quality in
    # 1D marginalization.
    m = (h_exp > 0.)
    ratio_h = np.zeros_like(h_exp)
    ratio_h[m] = h_kde[m] / h_exp[m]

    diff_h = h_kde - h_exp

    m = (sigma_kde > 0.)
    sigma_ratio_h = np.zeros_like(h_exp)
    sigma_ratio_h[m] = (h_exp[m] - h_kde[m]) / sigma_kde[m]

    # Bin mids and hist grid
    _b = b_exp
    m = get_binmids(_b)
    xx, yy = map(np.ravel, np.meshgrid(m[0], m[1]))
    
    
    # Big plot on the left and three right
    fig = plt.figure(figsize=(10, 6))
    gs = gridspec.GridSpec(3, 3)
    axl = fig.add_subplot(gs[:, :2])
    axrt = fig.add_subplot(gs[0, 2])
    axrc = fig.add_subplot(gs[1, 2])
    axrb = fig.add_subplot(gs[2, 2])
    
    # Steal space for colorbars
    caxl = split_axis(axl, "right")
    caxrt = split_axis(axrt, "left")
    caxrc = split_axis(axrc, "left")
    caxrb = split_axis(axrb, "left")

    # Unset top and center xticklabels as they are shared with the bottom plot
    axrt.set_xticklabels([])
    axrc.set_xticklabels([])
        
    # Left: Difference over KDE sigma
    # cbar_extr = max(np.amax(sigma_ratio_h),  # Center colormap to min/max
    #                         abs(np.amin(sigma_ratio_h)))
    _, _, _, imgl = axl.hist2d(xx, yy, bins=_b, weights=sigma_ratio_h.T.ravel(),
                               cmap="seismic", vmax=5, vmin=-5)
    cbarl = plt.colorbar(cax=caxl, mappable=imgl)
    axl.set_xlabel(xlabel[i])
    axl.set_ylabel(ylabel[i])
    axl.set_title("(exp - kde) / sigma_kde")
    
    # Right top: Ratio
    _, _, _, imgrt = axrt.hist2d(xx, yy, bins=_b, weights=ratio_h.T.ravel(),
                                 cmap="seismic", vmax=2, vmin=0);
    cbarrt = plt.colorbar(cax=caxrt, mappable=imgrt)
    axrt.set_title("kde / exp")

    # Right center: Data hist
    _, _, _, imgrc = axrc.hist2d(xx, yy, bins=_b, weights=h_exp.T.ravel(),
                                 cmap="inferno", norm=LogNorm());
    cbarrc = plt.colorbar(cax=caxrc, mappable=imgrc)
    axrc.set_title("exp logscale")

    # Right bottom: KDE hist, same colorbar scale as on data
    _, _, _, imgrb = axrb.hist2d(xx, yy, bins=_b, weights=h_kde.T.ravel(),
                                 cmap="inferno", norm=LogNorm());
    # Set with same colormap as on data
    imgrb.set_clim(cbarrc.get_clim())
    cbarrb = plt.colorbar(cax=caxrb, mappable=imgrb)
    axrb.set_title("kde logscale")
    
    # Set tick and label positions
    for ax in [caxrt, caxrc, caxrb]:
        ax.yaxis.set_label_position("right")
        ax.yaxis.tick_left()
    
    fig.tight_layout()
    plt.savefig("./data/figs/kde_data_2d_{}_{}.png".format(
                    xlabel[i], ylabel[i]),
                dpi=200)
    plt.show()

### 1D marginalization

In [None]:
# Pseudo smooth marginalization is done by sampling many point from KDE an
# using a finely binned 1D histogram, so it looks smooth
xlabel = label

for i, axes in enumerate([0, 1, 2]):
    _b = np.array(bins)
    h_exp, b_exp = np.histogram(_sam[:, axes],
                                bins=_b[axes], normed=True)
    h_kde, b_kde = np.histogram(_sam_kde[:, axes],
                                bins=_b[axes], normed=True)
    
#     h_exp, b_exp = hist_marginalize(h, bins, axes=axes)
#     h_kde, b_kde = hist_marginalize(bg_h, bg_bins, axes=axes)
      
    # KDE errorbars as in 2D case
    norm_kde = len(exp) * np.diff(b_kde)
    sigma_kde = np.sqrt(h_kde / norm_kde)

    # Make 3 different diff/ratio hists to estimate KDE quality in
    # 1D marginalization.
    m = (h_exp > 0.)
    ratio_h = np.zeros_like(h_exp)
    ratio_h[m] = h_kde[m] / h_exp[m]

    diff_h = h_kde - h_exp

    m = (sigma_kde > 0.)
    sigma_ratio_h = np.zeros_like(h_exp)
    sigma_ratio_h[m] = (h_exp[m] - h_kde[m]) / sigma_kde[m]

    # Bin mids
    _b = b_exp
    m = get_binmids([_b])[0]
    
    # Plot both and the ration normed. Big plot on the left and three right
    fig = plt.figure(figsize=(10, 6))
    gs = gridspec.GridSpec(3, 3)
    axl = fig.add_subplot(gs[:, :2])
    axrt = fig.add_subplot(gs[0, 2])
    axrc = fig.add_subplot(gs[1, 2])
    axrb = fig.add_subplot(gs[2, 2])

    axrt.set_xticklabels([])
    axrc.set_xticklabels([])

    # Set ticks and labels right
    for ax in [axrt, axrc, axrb]:
        ax.yaxis.set_label_position("right")
        ax.yaxis.tick_right()

    # Limits
    for ax in [axl, axrt, axrc, axrb]:
        ax.set_xlim(_b[0], _b[-1])
        
    # Main plot:
    # Plot more dense to mimic a smooth curve
    __h, __b = np.histogram(_sam_kde[:, i], bins=500,
                            range=[_b[0], _b[-1]], density=True)
    __m = get_binmids([__b])[0]
    axl.plot(__m, __h, lw=3, alpha=0.5)
    
    _ = axl.hist(m, bins=_b, weights=h_exp, label="exp", histtype="step",
                 lw=2, color="k")
    _ = axl.errorbar(m, h_kde, yerr=sigma_kde, fmt=",", color="r")
    _ = axl.hist(m, bins=_b, weights=h_kde, label="kde", histtype="step",
                 lw=2, color="r")    
    
    axl.set_xlabel(xlabel[i])
    axl.legend(loc="upper right")

    # Top right: Difference
    _ = axrt.axhline(0, 0, 1, color="k", ls="-")
    _ = axrt.hlines([-.02, -.01, .01, .02], _b[0], _b[-1],
                    colors='#353132', linestyles='dashed')
    _ = axrt.hist(m, bins=_b, weights=diff_h, histtype="step", lw=2, color="r")
    axrt.set_ylim(-.05, +.05)
    axrt.set_ylabel("kde - exp")

    # Center right: Ratio
    _ = axrc.axhline(1, 0, 1, color="k", ls="-")
    _ = axrc.hlines([0.8, 0.9, 1.1, 1.2], _b[0], _b[-1],
                    colors='#353132', linestyles='dashed')
    _ = axrc.hist(m, bins=_b, weights=ratio_h, histtype="step", lw=2, color="r")
    axrc.set_ylim(.5, 1.5)
    axrc.set_ylabel("kde / exp")

    # Bottom right: Ratio of diff to sigma of expectation
    _ = axrb.axhline(0, 0, 1, color="k", ls="-")
    _ = axrb.hlines([-2, -1, 1, 2], _b[0], _b[-1],
                    colors='#353132', linestyles='dashed')
    _ = axrb.hist(m, bins=_b, weights=sigma_ratio_h, histtype="step", lw=2, color="r")
    axrb.set_ylim(-3, +3)
    axrb.set_ylabel("(exp-kde)/sigma_kde")
    
    plt.savefig("./data/figs/kde_data_1d_{}.png".format(
            xlabel[i]), dpi=200)
    plt.show()

# Define the Likelihoods

Here we define our Likelihoods.
We are given a source event occurance (can be GRB, GW, HESE or anything else) at a given position in space and time.
We want to search for a significant contribution of other events, within a predefined region in time and space around the source events.
For this we need to derive the expected signal and background contributions in that frame.

The Likelihood that describes this scenario can be derived from counting statistics.
If we expect $n_S$ signal and $n_B$ background events in the given frame, then the probability of observing $N$ events is given by a poisson pdf:

$$
    P_\text{Poisson}(N\ |\ n_S + n_B) = \mathcal{L}(N | n_S, n_b) = \frac{(n_S + n_B)^{-N}}{N!}\cdot \exp{-(n_S + n_B)}
$$

We want to fit for the number of signal events $n_S$ in the frame.
But each event doesn_t have the same probability of contributing to either signal or background, because we don't have that information on a per event basis.
So we include prior information on a per event basis to account for that.

$$
    \mathcal{L}(N | n_S, n_B) = \frac{(n_S + n_B)^{-N}}{N!}\cdot \exp{-(n_S + n_B)} \cdot \prod_{i=1}^N P_i
$$

Also the simple poisson pdf above only has one parameter, the total number of events, which can be fit for.
So we need to resolve this degeneracy in $n_S$, $n_B$ by giving additional information.
For that we include a weighted combination of the probability for an event to be signal, denoted by the PDF $S_i$ and for it to background, denoted by $B_i$.
Because the simple counting probabilities are $n_S / (n_S + n_B)$ to count a signal event and likewise $n_B / (n_S + n_B)$ to count a background event we construct the per event prior $P_i$ as:

$$
    P_i = \frac{n_S}{n_S + n_B}\cdot S_i + \frac{n_B}{n_S + n_B}\cdot B_i
        = \frac{n_S \cdot S_i + n_B \cdot B_i}{n_S + n_B}
$$

Note, that for equal probabilities $S_i$ and $B_i$, we simply and up with the normal poisson counting statistic.

Plugging that back into the likelihood we get:

$$
    \mathcal{L}(N | n_S, n_B) = \frac{(n_S + n_B)^{-N}}{N!}\cdot \exp{(-(n_S + n_B))} \cdot \prod_{i=1}^N \frac{n_S \cdot S_i + n_B \cdot B_i}{n_S + n_B}
$$

Taking the natrual logarithm to get the log-likelihood we arrive at:

$$
    \ln\mathcal{L}(N | n_S, n_B) = -(n_S + n_B) -\ln(N!) + \sum_{i=1}^N \ln((n_S + n_B) P_i)
$$

If we weight up $n_S$ then every events signal PDF is contributing a bit more than the background pdf.
So the fitter tries to find the combination of $n_S$ and $n_B$ that maximizes the likelihood.

To further simplify, we can use a measured and fixed background expectation rate $\langle n_B\rangle$ and fit only for the number of signal events.
Then we only fit for the number of signal events $n_S$.
The fixed background rate can be extracted from data by using the pdf of a larger timescale and average over that (or fit a function) to ensure that local fluctuations don't matter.

Then we end up with our full Likelihood (the denominator in $P_i$ cancels with the term from the poisson PDF):

$$
    \ln\mathcal{L}(N | n_S) = -(n_S + \langle n_B\rangle) -\ln(N!) + \sum_{i=1}^N \ln(n_S S_i + \langle n_B\rangle B_i)
$$

For the test statistic we want to test the hypothesis of having no signal $n_S=0$ vs. the alternative with a free parameter $n_S$:

$$
    \Lambda = \ln\frac{\mathcal(\hat{n}_S)}{\mathcal{n_S=0}}
            = \frac{-(\hat{n}_S + \langle n_B\rangle) -\ln(N!) + \sum_{i=1}^N \ln(\hat{n}_S S_i + \langle n_B\rangle B_i)}{-\langle n_B\rangle -\ln(N!) + \sum_{i=1}^N \ln(\langle n_B\rangle B_i)}
            = -\hat{n}_S + \sum_{i=1}^N \ln\left( \frac{\hat{n}_S S_i}{\langle n_B\rangle B_i} + 1 \right)
$$

The per event PDFs $S_i$ and $B_i$ can depend on arbitrary parameters.
The common choise here is to use a time, energy proxy and spatial proxy depency which has most seperation power:

$$
    S_i(x_i, t_i, E_i) = S_T(t_i) \cdot S_S(x_i) \cdot S_E(E_i) \\ 
    B_i(x_i, t_i, E_i) = B_T(t_i) \cdot B_S(x_i) \cdot B_E(E_i) 
$$

Because the Likelihood only contains ratios of the PDF, we only have to construct functions of the signal to background ratio for each time, spatial and energy distribution.

For the energy PDFs $S_E, B_E$ we use a 2D representation in reconstructed energy and declination because this has the most seperation power (see coenders & skylab models).
The spatial part $S_S, B_S$ is only depending on the distance from source to event, not on the absilute position on the sphere.
The time part $S_T, B_T$ is equivalent to that, only using the distance in time between source event and event.

**Note: It seems that in mrichmans analysis there has only been used a 1D energy only PDF. This lacks seperation power, when using both hemispheres, as in the southern sky the energy threshhold is much higher.**

## Time PDF ratio

Background in uniformly distributed in the time window.
Signal distribtution is falling off gaussian-like at both edges so normalization is different.
So the ratio $S_T / B_T$ is simply the the signal pdf divided by the uniform normalization $1 / (t_1 - t_0)$ in the time frame.

The signal PDFs written out explicitely, where $t_0$ is the source events time and $t$ the events time:

$$
    N \cdot S_T(t, t_0) = \begin{cases}
                     \frac{1}{\sqrt{2\pi}\sigma_T}\exp\left(-\frac{(t-T_0)^2}{2\sigma_T^2}
                     \right)&\quad\mathrm{, if }\ t \in [a, T_0]\\                
                     \frac{1}{\sqrt{2\pi}\sigma_T}&\quad\mathrm{, if }\ t \in [T_0, T_1]\\
                     \frac{1}{\sqrt{2\pi}\sigma_T}\exp\left(-\frac{(t-T_1)^2}
                     {2\sigma_T^2}\right)&\quad\mathrm{, if }\ t \in [T_1, b]\\ 
                    0 &\quad\mathrm{, else}
                  \end{cases}
$$

where $a, b$ are the bounds of the total time window, $T_0, T_1$ are the part, in which the signal is assumed to be uniformly distributed in time and $\sigma_T$ is the width of the gaussian edges.
The gaussian width $\sigma_T$ is as wide as the interval $T_1-T_0$ but constraint to the nearest value in $[2, 30]$ seconds if the frame gets too large or too small.
The total normalization $N$ is given by integrating over $S_T$ in $[a, b]$, resulting in:

$$
    N = \Phi(b) - \Phi(a) + \frac{T_1-T_0}{\sqrt{2\pi}\sigma_T}
$$

where

$$
    \Phi(x) = \int_{-\infty}^{x}\frac{1}{\sqrt{2\pi}\sigma_T}
      \exp\left(-\frac{(t-T_0)^2}{2\sigma_T^2}\right)\mathrm{d}t
$$
the CDF of the gaussian PDF.

The background PDF respectively is simply:

$$
    B_T(t, t_0) = \begin{cases}
                     \frac{1}{b-a}&\quad\mathrm{, if }\ t \in [a, b]\\ 
                    0 &\quad\mathrm{, else}
                  \end{cases}    
$$

To get finite support we truncate the gaussian edges at $n\cdot\sigma_T$.
Though arbitrarliy introduced the concrete cutoff of the doesn't really matter (so say 4, 5, 6 sigma, etc).

This is because in the LLH we get the product of $\langle b_B \rangle B_i$.
A larger cutoff make the normalization of the BG pdf larger, but in the same time makes the number of expected BG event get higher in the same linear fashion.
So as long as we choose a cutoff which ensures that $S \approx 0$ outside, we're good to go.

In [None]:
def time_soverb(t, t0, dt, nsig):
    """
    Time signal over background PDF.
    
    Signal and background PDFs are each normalized over seconds.
    Signal PDF has gaussian edges to smoothly let it fall of to zero, the
    stddev is dt when dt is in [2, 30]s, otherwise the nearest edge.

    To ensure finite support, the edges are truncated after nsig * dt.

    Parameters
    ----------
    t : array-like
        Times given in MJD for which we want to evaluate the ratio.
    t0 : float
        Time of the source event.
    dt : float
        Time window in seconds starting from t0 in which the signal pdf is
        assumed to be uniform. Must not be negative.
    nsig : float
        Clip the gaussian edges at nsig * dt
    """
    if dt < 0:
        raise ValueError("dt must not be negative.")

    secinday = 24. * 60. * 60.

    # Normalize relative to t0 in seconds (first multiply avoids rounding?)
    _t = t * secinday - t0 * secinday
   
    # Create signal PDF
    # Constrain sig_t to [2, 30]s regardless of uniform time window
    sig_t = np.clip(dt, 2, 30)
    sig_t_clip = nsig * sig_t
    gaus_norm = (np.sqrt(2 * np.pi) * sig_t)
    
    # Split in def regions gaus rising, uniform, gaus falling
    gr = (_t < 0) & (_t >= -sig_t_clip)
    gf = (_t > dt) & (_t <= dt + sig_t_clip)
    uni = (_t >= 0) & (_t <= dt)
    
    pdf = np.zeros_like(t, dtype=np.float)
    pdf[gr] = scs.norm.pdf(_t[gr], loc=0, scale=sig_t)
    pdf[gf] = scs.norm.pdf(_t[gf], loc=dt, scale=sig_t)
    # Connect smoothly with the gaussians
    pdf[uni] = 1. / gaus_norm
    
    # Normalize signal distribtuion
    dcdf = (scs.norm.cdf(dt + sig_t_clip, loc=dt, scale=sig_t) -
            scs.norm.cdf(-sig_t_clip, loc=0., scale=sig_t))
    norm = dcdf + dt / gaus_norm
    pdf /= norm
    
    # Calculate the ratio
    bg_pdf = 1. / (dt + 2 * sig_t_clip)
    ratio = pdf / bg_pdf
    return ratio

In [None]:
# Make a plot with ratios for different time windows as in the paper
# Arbitrary start date from data
t0 = start_mjd[100]
t0_sec = t0 * secinday

# dt from t0 in seconds, clip at 4 sigma
dts = [5, 50, 200]
nsig = 4

# Make t values for plotting in MJD around t0, fitting all in one plot
max_dt = np.amax(dts)
clip = np.clip(max_dt, 2, 30) * nsig
plt_rng = np.array([-clip, max_dt + clip])
t = np.linspace(t0_sec + 1.2 *plt_rng[0],
                t0_sec + 1.2 * plt_rng[1], 1000) / secinday
_t = t * secinday - t0 * secinday

# Mark event time
plt.axvline(0, 0, 1, c="#353132", ls="--", lw=2)

colors = ["C0", "C3", "C2"]
for i, dt in enumerate(dts):
    # Plot ratio S/B
    SoB = time_soverb(t, t0, dt, nsig)
    plt.plot(_t, SoB, lw=2, c=colors[i],
             label=r"$T_\mathrm{{uni}}$: {:>3d}s".format(dt))
    # Fill uniform part, might look nicely
    # fbtw = (_t > 0) & (_t < dt)
    # plt.fill_between(_t[fbtw], 0, SoB[fbtw], color="C7", alpha=0.1)

# Make it look like the paper plot, but with slightly extended borders, to
# nothing breaks outside the total time frame
plt.xlim(1.2 * plt_rng)
plt.ylim(0, 3)
plt.xlabel("t - t0 in sec")
plt.ylabel("S / B")
plt.legend(loc="upper right")
plt.grid(ls="--", lw=1)

plt.savefig("./data/figs/time_pdf_ratio.png", dpi=200)

plt.show()

## Spatial Pdf

The spatial pdf is holding information on how close the event was to the source position.
Close events are more likely to originate from the source.

To model this behavioure we use a Kent distribution (gaussian correctly normalized on a sphere).

$$
    S_S(x_\mathrm{evt}; x_S, \kappa) = \frac{\kappa}{4\pi \sinh{\kappa}}\cdot \exp(\kappa\cos(\psi))
$$

where $x_\mathrm{evt}$ is the directional vector of the event, $x_S$ is the directional vector of the source an $\kappa$ resembles to the uncertainty in the event reconstruction and is connected with the more familiar $\sigma$ error by.

The connections between $\kappa$ and $\sigma$ is valid up to a $\sigma\approx 40^\circ$ and is given by $\kappa = 1 /\sigma^2$.

Classicaly the background pdf is constructed from data
It is assumed to be uniform in right-ascension and the declination dependence is modeled with a spline fitted to a histogram in sinDec.
Then the PDF is given by:

$$
    B_S(x_\mathrm{evt}) = \frac{1}{2\pi}\cdot p(\sin\delta)
$$

But we already made the work of creating a smooth KDE of our data in logE, declination and sigma.
So we can use that KDE to get the values of our declination distribution.
Because integrating out the KDE is slow, we just use our previous sample from the KDE, bin it finely (quasi continously) and interpolate it with a spline to get also values from in between.
This way we are not dependent on a binning on the data itself, but can use the available validated KDE PDF.

In [None]:
def spatial_signal(src_ra, src_dec, ev_ra, ev_dec, ev_sig, kent=True):
        """
        Spatial distance PDF between source position(s) and event positions.

        Signal is assumed to cluster around source position(s).
        The PDF is a convolution of a delta function for the localized sources
        and a Kent (gaussian on a sphere) distribution with the events
        positional reconstruction error as width.
        
        Multiplie source positions can be given, to use it in a stacked
        search.
        
        Parameters
        -----------
        src_ra : array-like
            Src positions in equatorial RA in radian: [0, 2pi].
        src_dec : array-like
            Src positions in equatorial DEC in radian: [-pi/2, pi/2].
        ev_ra : array-like
            Event positions in equatorial RA in radian: [0, 2pi].
        ev_dec : array-like
            Event positions in equatorial DEC in radian: [-pi/2, pi/2].
        ev_sig : array-like
            Event positional reconstruction error in radian (eg. Paraboloid).
        
        Returns
        --------
        S : array-like, shape(n_sources, n_events)
            Spatial signal probability for each event and each source.

        """
        # Shape (n_sources, 1), suitable for 1 src or multiple srcs
        src_ra = np.atleast_1d(src_ra)[:, np.newaxis]
        src_dec = np.atleast_1d(src_dec)[:, np.newaxis]

        # Dot product in polar coordinates
        cosDist = (np.cos(src_ra - ev_ra) *
                   np.cos(src_dec) * np.cos(ev_dec) +
                   np.sin(src_dec) * np.sin(ev_dec))
    
        # Handle possible floating precision errors
        cosDist = np.clip(cosDist, -1, 1)
        
        if kent:
            # Stabilized version for possibly large kappas
            kappa = 1. / ev_sig**2
            S = (kappa / (2. * np.pi * (1. - np.exp(-2. * kappa))) *
                 np.exp(kappa * (cosDist - 1. )))
        else:
            # Otherwise use standard symmetric 2D gaussian
            dist = np.arccos(cosDist)
            ev_sig_2 = 2 * ev_sig**2
            S = np.exp(-dist**2 / (ev_sig_2)) / (np.pi * ev_sig_2)
        
        return S
    
def create_spatial_bg_spline(sin_dec, bins=100, range=None, k=3):
    """
    Fit an interpolsating spline to the a histogram of sin(dec).
    
    The spline is fitted to the logarithm of the histogram, to avoid ringing.
    Normalization is done by normalizing the hist.
    
    Parameters
    ----------
    sin_dec : array-like
        Sinus declination coorcinates of each event, [-1, 1].
    bins : int or array-like
        Binning passed to `np.histogram`. (default: 100)
    range : array-like
        Lower and upper boundary for the histogram. (default: None)
    k : int
        Order of the spline. (default: 3)
        
    Returns
    -------
    spl : scipy.interpolate.InterpolatingSpline
        Spline object interpolating the histogram. Must be evaluated with
        sin(dec) and exponentiated to give the correct values.
        Spline is interpolating outside it's definition range.
    """
    hist, bins = np.histogram(sin_dec, bins=bins, 
                              range=range, density=True)
    
    if np.any(hist <= 0.):
        estr = ("Declination hist bins empty, this must not happen. Empty " +
                "bins: {0}".format(np.arange(len(bins) - 1)[hist <= 0.]))
        raise ValueError(estr)
    elif np.any((sin_dec < bins[0]) | (sin_dec > bins[-1])):
        raise ValueError("Data outside of declination bins!")

    mids = 0.5 * (bins[:-1] + bins[1:])
    return sci.InterpolatedUnivariateSpline(mids, np.log(hist), k=k, ext=0)

def spatial_background(ev_sin_dec, sindec_log_bg_spline):
    """
    Calculate the value of the backgournd PDF for each event from a previously
    created spline, interpolating the declination distribution of the data.
    
    Parameters
    ----------
    ev_sin_dec : array-like
        Sinus Declination coordinates of each event, [-1, 1].
    sindec_log_bg_spline : scipy.interpolate.InterpolatingSpline
        Spline returning the logarithm of the bg PDF at given sin_dec values.
    
    Returns
    -------
    B : array-like
        The value of the background PDF for each event.
    """
    return 1. / 2. / np.pi * np.exp(sindec_log_bg_spline(ev_sin_dec))


def spatial_SoB(src_ra, src_dec, ev_ra, ev_dec, ev_sig,
                sindec_log_bg_spline, kent=True):
    S = spatial_signal(src_ra, src_dec, ev_ra, ev_dec, ev_sig, kent)
    B = spatial_background(ev_sin_dec, sindec_log_bg_spline)
    
    SoB = np.zeros_like(S)
    B = np.repeat(B[np.newaxis, :], repeats=S.shape[0], axis=0)
    m = B > 0
    SoB[m] = S[m] / B[m]

    return SoB

### Signal PDF

In [None]:
def plot_dec_vs_signal(S, ev_dec, src_ra, src_dec, weights, ax=None):
    if ax is None:
        _, ax = plt.subplots(1, 1)
    # Plot signal per source for each event
    for i, (sra, sdec) in enumerate(zip(src_ra, src_dec)):
        ax.plot(np.rad2deg(ev_dec), S[i], ls="-")
        ax.plot(np.rad2deg(sdec), -10, "k|")

    # Simulate a simple stacking, one weight per source
    ax.plot(np.rad2deg(ev_dec), np.sum(weights * S, axis=0) / np.sum(weights),
             ls="--", c=dg, label="stacked")

    ax.set_xlim([-1 + smin, smax + 1])
    ax.set_xlabel("DEC in °")
    ax.set_ylabel("Signal pdf")
    ax.legend(loc="upper right")
    return ax

# Simulate a simple case: 5 src and the events are in the same range, but with
# tighter spacing
smax = 5
smin = -5
step = 2

src_ra = np.deg2rad(np.arange(smin, smax + step, step))
src_dec = np.deg2rad(np.arange(smin, smax + step, step))

ev_ra = np.deg2rad(np.linspace(smin, smax, 1000))
ev_dec = np.deg2rad(np.linspace(smin, smax, 1000))
ev_sig = np.deg2rad(np.ones_like(ev_ra))

S = spatial_signal(src_ra, src_dec, ev_ra, ev_dec, ev_sig, kent=True)  

weights = np.arange(1, len(src_dec) + 1)[:, np.newaxis]
_ = plot_dec_vs_signal(S, ev_dec, src_ra, src_dec, weights)
plt.show()

# Now with the real data. Sort first in dec to show with nice lines
idx = np.argsort(exp["dec"])
ev_ra = exp["ra"][idx]
ev_dec = exp["dec"][idx]
# ev_sig = np.deg2rad(np.ones_like(ev_ra))
ev_sig = exp["sigma"][idx]

S = spatial_signal(src_ra, src_dec, ev_ra, ev_dec, ev_sig, kent=True)

weights = np.ones_like(weights)
ax = plot_dec_vs_signal(S, ev_dec, src_ra, src_dec, weights)
ax.set_yscale("log")
ax.set_ylim(1, 1e4)
plt.show()

### Background PDF

In [None]:
# KDE CV is running on cluster and pickles the GridSearchCV
fname = "./data/kde_cv/KDE_model_selector_20_exp_IC86_I_followup_2nd_pass.pickle"
with open(fname, "rb") as f:
    model_selector = pickle.load(f)

kde = model_selector.best_estimator_
bw = model_selector.best_params_["bandwidth"]
print("Best bandwidth : {:.3f}".format(bw))

# We maybe just want to stick with the slightly overfitting kernel to
# be as close as possible to data
OVERFIT = True
if OVERFIT:
    bw = 0.075
    kde = skn.KernelDensity(bandwidth=bw, kernel="gaussian", rtol=1e-8)
print("Used bandwidth : {:.3f}".format(bw))

# KDE sample must be cut in sigma before fitting, similar to range in hist
_exp = exp[exp["sigma"] <= np.deg2rad(5)]

fac_logE = 1.5
fac_dec = 2.5
fac_sigma = 2.

_logE = fac_logE * _exp["logE"]
_sigma = fac_sigma * np.rad2deg(_exp["sigma"])
_dec = fac_dec * _exp["dec"]

kde_sample = np.vstack((_logE, _dec, _sigma)).T

# Fit KDE best model to sample
kde.fit(kde_sample)

# Generate some BG samples to compare to the original data hist.
# Use more statistics, histograms get normalized and we want the best estimate
# for the pdf
nsamples_kde = int(1e7)
bg_samples = kde.sample(n_samples=nsamples_kde)

# Restore the orignal scaling and cut away spillovers from the finite width
bg_dec = bg_samples[:, 1] / fac_dec
m = (bg_dec > -np.pi / 2.) & (bg_dec < np.pi / 2.)
bg_dec = bg_dec[m]
bg_sin_dec = np.sin(bg_dec)

In [None]:
fig, (axl, axr) = plt.subplots(1, 2, figsize=(12, 4))

# First finely binned KDE. Show the data in the same binning to see the diff
bins = 100
h, b, _ = axl.hist(bg_sin_dec, bins=bins, normed=True, alpha=0.5)
h, b = np.histogram(bg_sin_dec, bins=bins, density=True)
kde_spl = create_spatial_bg_spline(bg_sin_dec, bins=bins)

_sin_dec = np.linspace(-1, 1, 1000)
pdf = np.exp(kde_spl(_sin_dec))
axl.plot(_sin_dec, pdf, lw=2)

# Now classic with coarse binned data
bins = 20
sin_dec = np.sin(exp["dec"])
h, b, _ = axr.hist(bg_sin_dec, bins=bins, normed=True, alpha=0.5)
spl = create_spatial_bg_spline(sin_dec, bins=bins)

pdf = np.exp(spl(_sin_dec))
axr.plot(_sin_dec, pdf)

# Quickly integrate BG pdf to check norm is OK (increased subdvivision lim)
I = scint.quad(spatial_background, -1, 1, args=(kde_spl), limit=100)[0]
print("Area under all sky BG PDF is : ", 2. * np.pi * I)

### Signal over Background

In [None]:
# Make srcs across the dec range. SoB should follow the sinDec BG
# distribtuion. With a single source we couldn't see that, because it drops
# to zero far from the src position
smin, smax, step = -90, +90, 10

src_ra = np.deg2rad(np.arange(smin, smax + step, step))
src_dec = np.deg2rad(np.arange(smin, smax + step, step))

ev_ra = np.deg2rad(np.linspace(smin, smax, 1000))
ev_dec = np.deg2rad(np.linspace(smin, smax, 1000))
ev_sin_dec = np.sin(ev_dec)
ev_sig = np.deg2rad(np.ones_like(ev_ra))

weights = np.arange(1, len(src_dec) + 1)[:, np.newaxis]

fig, ((axtl, axtr), (axbl, axbr)) = plt.subplots(2, 2, figsize=(12, 10))

# Signal only
S = spatial_signal(src_ra, src_dec, ev_ra, ev_dec, ev_sig, kent=True)  
_ = plot_dec_vs_signal(S, ev_dec, src_ra, src_dec, weights, ax=axtl)
axtl.set_xlim(-90, 90)

# Background only
bins = 100
h, b, _ = axl.hist(bg_sin_dec, bins=bins, normed=True, alpha=0.5)
h, b = np.histogram(bg_sin_dec, bins=bins, density=True)
_sin_dec = np.linspace(-1, 1, 1000)
pdf = np.exp(kde_spl(_sin_dec))
axbl.plot(np.rad2deg(np.arcsin(_sin_dec)), pdf, lw=2, label="pdf")
axbl.set_ylim(0, 1)
# 1 / BG PDF on second axis
axbl2 = axbl.twinx()
axbl2.plot(np.rad2deg(np.arcsin(_sin_dec)), 1. / pdf, c="C1",
           lw=2, label="1/pdf")
axbl2.set_ylim(0, 6)
axbl.set_xlabel("DEC in °")
axbl.set_xlim(-90, 90)
axbl.legend(loc="upper left")
axbl2.legend(loc="upper center")

# SoB on example + BG PDF
SoB = spatial_SoB(src_ra, src_dec, ev_ra, ev_dec, ev_sig, kde_spl, kent=True)  
weights = np.arange(1, len(src_dec) + 1)[:, np.newaxis]
_ = plot_dec_vs_signal(SoB, ev_dec, src_ra, src_dec, weights, ax=axtr)
axtr.plot(np.rad2deg(np.arcsin(_sin_dec)), pdf, lw=3, label="BG pdf", c=dg)
axtr.set_xlim(-90, 90)
axtr.set_yscale("log")
axtr.set_ylim(0.1, 1e5)
axtr.legend(loc="upper left")

# Now with the real data. Sort first in dec to show with nice lines + BG PDF
idx = np.argsort(exp["dec"])
ev_ra = exp["ra"][idx]
ev_dec = exp["dec"][idx]
ev_sin_dec = np.sin(ev_dec)
ev_sig = exp["sigma"][idx]
# ev_sig = np.deg2rad(np.ones_like(ev_ra))  # To match the simple example

SoB = spatial_SoB(src_ra, src_dec, ev_ra, ev_dec, ev_sig, kde_spl, kent=True)

_ = plot_dec_vs_signal(SoB, ev_dec, src_ra, src_dec, weights, ax=axbr)
axbr.plot(np.rad2deg(np.arcsin(_sin_dec)), pdf, lw=3, label="BG pdf", c="C0")
axbr.set_yscale("log")
axbr.set_ylim(0.1, 1e5)
axbr.legend(loc="upper left")

plt.show()

## Energy-Space Pdf

This will be the same in skylab and the first time we need a MC set.
Make equally binned 2D histograms in logE and sinDec, then take the ratio.
Because of the equal binning, the normalization is automatically correct.
Then fit a 2D spline to it which gives the signal to background ratio directly.

Here we use again a KDE fitted both to data.
This way we can sample more events in the sparsely populated areas and obtain a broader ratio distribution.
Because we can't use the sklearn KDE for weighted samples we use a normal histogram for the MC, which has more event anyway so the problem is not so urgent.

Where data is missing either use background MC or conservatively use the highest ratio where data is available also at positions, where no data is present.
This is only relevant for signal injection, because on data we have the ratio defined everywhere, where data is by definition.

In [None]:
def get_bg_sample_from_kde(nsamples_kde = int(1e7)):
    print("Sampling from BG KDE")
    # KDE CV is running on cluster and pickles the GridSearchCV
    fname = "./data/kde_cv/KDE_model_selector_20_exp_IC86_I_followup_2nd_pass.pickle"
    with open(fname, "rb") as f:
        model_selector = pickle.load(f)

    kde = model_selector.best_estimator_
    bw = model_selector.best_params_["bandwidth"]
    print("Best bandwidth : {:.3f}".format(bw))

    # We maybe just want to stick with the slightly overfitting kernel to
    # be as close as possible to data
    OVERFIT = True
    if OVERFIT:
        bw = 0.075
        kde = skn.KernelDensity(bandwidth=bw, kernel="gaussian", rtol=1e-8)
    print("Used bandwidth : {:.3f}".format(bw))

    # KDE sample must be cut in sigma before fitting, similar to range in hist
    _exp = exp[exp["sigma"] <= np.deg2rad(5)]

    fac_logE = 1.5
    fac_dec = 2.5
    fac_sigma = 2.

    _logE = fac_logE * _exp["logE"]
    _sigma = fac_sigma * np.rad2deg(_exp["sigma"])
    _dec = fac_dec * _exp["dec"]

    # Fit KDE best model to background sample
    kde_sample = np.vstack((_logE, _dec, _sigma)).T
    kde.fit(kde_sample)

    # Generate some BG samples to compare to the original data hist.
    # Use more statistics, histograms get normalized and we want the best estimate
    # for the pdf
    bg_samples = kde.sample(n_samples=nsamples_kde)

    # Restore the orignal scaling and cut away spillovers from the finite width
    bg_logE = bg_samples[:, 0] / fac_logE
    bg_dec = bg_samples[:, 1] / fac_dec
    bg_sigma = bg_samples[:, 2] / fac_sigma

    m = (bg_dec > -np.pi / 2.) & (bg_dec < np.pi / 2.)
    m = m & (bg_sigma > 0 )

    bg_logE = bg_logE[m]
    bg_dec = bg_dec[m]
    bg_sindec = np.sin(bg_dec)
    bg_sigma = np.deg2rad(bg_sigma[m])
    
    return bg_sindec, bg_logE

In [None]:
# Prepare the MC data, signal weighted to astro unbroken power law
gamma = 2.
# No flux norm, because we normalize anyway
mc_w = mc["ow"] * mc["trueE"]**(-gamma)

# Make 2D hist from data KDE and from MC, use the MC binning
mc_sindec = np.sin(mc["dec"])
mc_logE = mc["logE"]
bins = [50, 40]
range = [[-1, 1], [1, 10]]
mc_h, bx, by = np.histogram2d(mc_sindec, mc_logE, bins=bins, range=range,
                              weights=mc_w, normed=True)

b = [bx, by]

MODE = "DATA"  # "DATA", "KDE_SAM", "KDE_INT"
if MODE == "DATA":
    bg_logE = exp["logE"]
    bg_sindec = np.sin(exp["dec"])
    bg_h, _, _ = np.histogram2d(bg_sindec, bg_logE, bins=b,
                                range=range, normed=True)
elif MODE == "KDE_SAM":
    bg_sindec, bg_logE = get_bg_sample_from_kde(int(2e7))
    bg_h, _, _ = np.histogram2d(bg_sindec, bg_logE, bins=b,
                                range=range, normed=True)
elif MODE == "KDE_INT":
    _bins = np.load("data/1d_integrate_kde/logE_sinDec_bins_50x50.npy")
    vals = np.load("data/1d_integrate_kde/logE_sinDec_int_50x50.npy")
    mids = get_binmids(_bins)
    xx, yy  = map(np.ravel, np.meshgrid(mids[0], mids[1]))
    bg_h, _, _ = np.histogram2d(xx, yy, bins=_bins, weights=vals,
                                normed=True, range=range)
    # Turn around to have sinDec vs logE like in the other examples
    bg_h = bg_h.T
    # KDE_INT is not so good, because it falls too quickly. Need to clip it
    bg_h = np.clip(bg_h, 1e-10, 1)
    
# 3 cases:
#   - Data & MC: Calculate the ratio
#   - No data or no MC: Assign nearest value in energy bin
#   - No data and no MC: Assign any value (eg 1), these are never accessed
# Get logE value per bin in entrie histogram
m = get_binmids(b)

# Fill value: 1) min/max for low/hig edge or 2) nearest in column
FILLVAL = "MINMAX"  # "COL" | "MINMAX"

# This assumes at least one valid point in one sinDec slice
m1 = (bg_h > 0) & (mc_h > 0)
SoB = np.ones_like(bg_h) * -1  # Init with unphysical value
SoB[m1] = mc_h[m1] / bg_h[m1]
SOBmin, SoBmax = np.amin(SoB[m1]), np.amax(SoB[m1])

# In each energy bin assign nearest value to bins with no data or no MC
for i in np.arange(bins[0]):
    bghi = bg_h[i]  # Get sinDec slice
    mchi = mc_h[i]
    _m = (bghi <= 0) | (mchi <= 0)  # All invalid points
    # Only fill missing logE border values and then proceed to interpolation

    # First lower edge (argmax stops at first True, argmin at first False)
    low_first_invalid_id = np.argmax(_m)
    if low_first_invalid_id == 0:
        # Set lower edge with first valid point from bottom
        low_first_valid_id = np.argmin(_m)
        if FILLVAL == "COL":
            SoB[i, 0] = SoB[i, low_first_valid_id]
        elif FILLVAL == "MINMAX":
            SoB[i, 0] = np.amin(SoB[m1])

    # Repeat with turned around array for upper edge
    hig_first_invalid_id = np.argmax(_m[::-1])
    if hig_first_invalid_id == 0:
        # Set lower edge with first valid point from bottom
        hig_first_valid_id = len(_m) - 1 - np.argmin(_m[::-1])
        if FILLVAL == "COL":
            SoB[i, -1] = SoB[i, hig_first_valid_id]
        elif FILLVAL == "MINMAX":
            SoB[i, -1] = np.amax(SoB[m1])
        
    # Interpolate in each slice over missing entries
    _m = SoB[i] > 0
    x = m[1][_m]
    y = SoB[i, _m]
    fi = sci.interp1d(x, y, kind="linear")
    SoB[i] = fi(m[1])

# These do never occur, so set them to 1to be identified quickly in the plot
m4 = (bg_h <= 0) & (mc_h <= 0)
SoB[m4] = 1.

# Now fit a spline to the ratio
SoB_spl = sci.RegularGridInterpolator(m, np.log(SoB), method="linear",
                                      bounds_error=False, fill_value=0.)

In [None]:
# Coenders style sindec vs logE
m = get_binmids(b)
xx, yy = map(np.ravel, np.meshgrid(*m))

fig, ax = plt.subplots(2, 2, figsize=(12, 10))

(axtl, axtr), (axbl, axbr) = ax

# Data
_, _, _, img = axtl.hist2d(xx, yy, bins=b, weights=bg_h.T.flatten(),
                         norm=LogNorm())
axtl.set_title("Exp events : {}".format(len(exp)))
caxtl = split_axis(axtl, cbar=True)
plt.colorbar(cax=caxtl, mappable=img)

# MC
_, _, _, img = axtr.hist2d(xx, yy, bins=b, weights=mc_h.T.flatten(),
                         norm=LogNorm())
axtr.set_title("Signal. gamma = {:.1f}".format(gamma))
caxtr = split_axis(axtr, cbar=True)
plt.colorbar(cax=caxtr, mappable=img)

# Ratio hist
cnorm = max(np.amin(SoB), np.amax(SoB))  # coenders: 1e-3, 1e3
_, _, _, img = axbl.hist2d(xx, yy, bins=b, weights=SoB.T.flatten(),
                         norm=LogNorm(), cmap="coolwarm",
                         vmin=1. / cnorm, vmax=cnorm)
axbl.set_title("Signal over background".format(gamma))
caxbl = split_axis(axbl, cbar=True)
plt.colorbar(cax=caxbl, mappable=img)

# Ratio spline
x = np.linspace(*range[0], num=500 + 1)
y = np.linspace(*range[1], num=500 + 1)
XX, YY = np.meshgrid(x, y)
xx, yy = map(np.ravel, [XX, YY])
gpts = np.vstack((xx, yy)).T
zz = np.exp(SoB_spl(gpts))
ZZ = zz.reshape(XX.shape)
# Plotting with hist creates strange effects... Use pcolormesh instead
img = axbr.pcolormesh(XX, YY, ZZ, norm=LogNorm(), cmap="coolwarm",
                    vmin=1. / cnorm, vmax=cnorm)
axbr.set_title("Spline interpolation".format(gamma))
caxbr = split_axis(axbr, cbar=True)
plt.colorbar(cax=caxbr, mappable=img)

# plt.savefig("./data/figs/energy_ratio_spline_minmaxfill.png", dpi=200)
plt.show()