In [None]:
import helper as hlp

import numpy as np

import matplotlib.pyplot as plt
import matplotlib.dates as mpldates
import matplotlib.gridspec as gridspec
from matplotlib.colors import LogNorm
%matplotlib inline

import scipy.interpolate as sci
import scipy.optimize as sco
import scipy.stats as scs

import json
import datetime
import pickle
from astropy.time import Time as astrotime

import sklearn.neighbors as skn
import sklearn.model_selection as skms  # Newer version of grid_search

from corner_hist import corner_hist
from anapymods3.plots.general import split_axis, get_binmids, hist_marginalize

# Load data

Load IC86 data from epinat, which should be the usual IC86-I (2011) PS sample, but pull corrected and OneWeights corrected by number of events generated.

In [None]:
exp, mc, livetime = hlp.load_data()

# Get data livetime

Generate from good run list as stated here:
- http://icecube.wisc.edu/~coenders/html/build/html/ic86-bdt/muonL3.html
- https://wiki.icecube.wisc.edu/index.php/IC86_I_Point_Source_Analysis/Data_and_Simulation

It should be 332.61 days as stated by jefeintzeig and scoenders.
We create one bin per included run, with exactly that width.
Excluded runs are those with too high/low rate and without everything marked "good".

Livetime ist a bit higher, because we used a newer runlist from iclive instead of the old non-json v1.4.
See side test for that comparison.

In [None]:
run_list = hlp.get_run_list()
run_dict = hlp.get_run_dict(run_list)
inc_run_arr, _livetime = hlp.get_good_runs(run_dict)

print("IC86-I livetime from iclive: ", _livetime)

# Bin BG according to runlist

Each run is one bin in the bg rate vs time plot.
The rate is normed to Hertz by dividing through the bin sizes in seconds.

In [None]:
# Store events in bins with run borders
exp_times = exp["timeMJD"]
start_mjd = inc_run_arr["start_mjd"]
stop_mjd = inc_run_arr["stop_mjd"]

tot = 0
evts_in_run = {}
for start, stop , runid in zip(start_mjd, stop_mjd, inc_run_arr["runID"]):
    mask = (exp_times >= start) & ( exp_times < stop)
    evts_in_run[runid] = exp[mask]
    tot += np.sum(mask)
    
# Crosscheck, if we got all events and counted nothing double
print("Do we have all events? ", tot == len(exp))
print("  Events selected : ", tot)
print("  Events in exp   : ", len(exp))

In [None]:
# Create binmids and histogram values in each bin
binmids = 0.5 * (start_mjd + stop_mjd)
h = np.zeros(len(binmids), dtype=np.float)

for i, evts in enumerate(evts_in_run.values()):
    h[i] = len(evts)
    
# Mask those with zero rate
m = h > 0.
binmids = binmids[m]
h = h[m]
    
# Create plot arrays
runtimes_mjd = inc_run_arr["stop_mjd"] - inc_run_arr["start_mjd"]
xerr = runtimes_mjd[m] / 2.
yerr = np.sqrt(h)

# Show in Hertz, so go from MJD days to seconds in bin widths
secsinday = 24. * 60. * 60
norm = (stop_mjd[m] - start_mjd[m]) * secsinday
h_norm = h / norm
# Poisson errors just get scaled
yerr_norm = yerr / norm

# Weights only for the weighted average
weights = np.ones_like(yerr)
weights[yerr_norm == 0] = 0
weights[yerr_norm != 0] = 1 / yerr[yerr_norm != 0]
def f(x, a, b, c):
    """Fix baseline to weighted average"""
    return a * np.sin(b * (normed - c)) + np.average(h_norm, weights=weights)
normed = (binmids - binmids.min()) / (binmids.max() - binmids.min())

# Scaled seed from handcrafted guess in cell below
p0 = [-0.0005, 2 * np.pi, 0.1]

# Fit a poly to the rate. No weights, because we threw out entries with 0
# Also with weight, the period is only have despite the good seed values...
res = sco.curve_fit(f=f, xdata=normed, ydata=h_norm, p0=p0)
pars = res[0]

print("Best fit pars : ", pars)

In [None]:
"""
Plot like mrichman did on p. 113
Note: Date plots are THE MOST DIFFICULT AND LEAST FUN THING TODO...
"""
fig, ax = plt.subplots(1, 1)

# Show dates on x axis
datetimes = astrotime(binmids, format="mjd").to_datetime()
dates = mpldates.date2num([dt.date() for dt in datetimes])

# Every month, first day
months = mpldates.MonthLocator(bymonth=np.arange(1, 13), bymonthday=1)
monthsFmt = mpldates.DateFormatter("%b %Y")
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(monthsFmt)

ax.errorbar(dates, h_norm, fmt=".", xerr=xerr, yerr=yerr_norm)
ax.set_xlabel("Date")
ax.set_ylabel("Rate in HZ")
ax.set_xlim(dates[0], dates[-1])
ax.set_ylim(0., None)

# Plot polyfit
delta_days = (datetimes[-1] - datetimes[0]).days
xdatetimes = [datetimes[0] + datetime.timedelta(days=int(x))for x in
              np.arange(0, delta_days)]
xtimes_mjd = astrotime(xdatetimes).mjd
normed = (xtimes_mjd - binmids.min()) / (binmids.max() - binmids.min())
y = f(normed, *pars)

# Handcrafted seed trial & error
# s = [-0.0005, 2 * np.pi, 0.1]
# y = s[0] * np.sin(s[1] * (normed + s[2]))
#     + np.average(h_norm, weights=weights)

# Convert back to mpl dates
xdates = mpldates.date2num([xd.date() for xd in xdatetimes])
ax.plot(xdates, y, "r-", zorder=5)
ax.axhline(np.average(h_norm, weights=weights), 0, 1, color="k",
                      ls="--", zorder=5)

# Autoprettify main xlabels
fig.autofmt_xdate(rotation=60)

# Show mjd on top
def ax2ticker(x):
    dates = mpldates.num2date(x)
    mjd = astrotime(dates).mjd
    return mjd
ax2 = ax.twiny()
ax2.set_xticks(ax.get_xticks())
ax2.set_xbound(ax.get_xbound())
ax2.set_xticklabels(ax2ticker(ax.get_xticks()),
                    rotation=60, horizontalalignment="left")
ax2.set_xlabel("MJD")

# Let's make the BG pdf

Proceeding to section 6.3.1 Randomized BG Injection, p. 113.
Mrichmann draws events by:

1. Get number of bg events to be injected from a poisson distribution with expectation values drawn from the previously build bg temporal distribution.
   $$
   P_{\langle n_B\rangle}(N_m) = \frac{\langle n_B\rangle^{N_m}}{N_m\!}\cdot \exp(\langle n_B\rangle)
   $$
2. These events are then drawn from a 3D pdf in energy proxy, zenith proxy and sigma proxy.
   He does it by dividing 10x10x10 bins, first selecting energy, then zenith in that energy bin, then sigma in that zenith bin.
   
Here we create a smooth PDF using a kernel density estimator and obtain a sample by running a MCMC chain to create a sample a priori.
The bandwidth is set globally and cross validated to be robust.

**Some note on `numpy.histogramdd`:**

The input must be an array with shape (nDim, len(data)).

Shape of h is the same as the number of bins in each dim: (50, 40, 10)
So the first dimension picks a single logE slice -> h[i].shape = (40, 10)
Second dim picks a dec slice -> h[:, i].shape = (50, 10)
3rd picks a sigma slice -> h[:, :, i].shape = (50, 40)

This is important: meshgrid repeats in second axis on first array xx.
For the second array, the first axis is repeated.
But h iterates over energy in 1st axis. So if we don't transpose, we have the whole histogram flipped! Compare to plot in mrcihmanns thesis (cos(zen))

**Some notes on KDE:**

Sebastian has already made a tool for adaptive and asymmetric KDE.
1. The Kernel is the covariance matrix of the whole data set to regard different scales
    + Note: This may only be a problem, if one dim is spread with peaks, while the other is wide spread only. Then we cannot scale the Kernel to small to fit the peaks because the smooth dimension is preventing that.
2. Use Silvermans or Scotts rule as a first guess.
3. Run a second pass and vary the local bandwidth according to the first guess local density.

We could replace 1 and 2 by scaling the data with the inverse covariance and then using a cross validation to find the first guess bandwidth.
Then using a second pass to vary locally.

## 3D histogram
First we make a 3D histogram to better compare to mrichmann and to get an overview over the distribution.

In [None]:
# HANDTUNED scale parameter to "fit" KDE expectation to data...
# TODO: Use Adaptive kernel width and asymmetric gaus kernels
#       For sigma it might make sense to a take a restricted kernel [0, inf]
fac_logE = 1.5
fac_dec = 2.5
fac_sigma = 2.

logE = fac_logE * exp["logE"]
sigma = fac_sigma * np.rad2deg(exp["sigma"])
# np.cos(np.pi / 2. + exp["dec"]); dec is for {sin(dec), dec, cos(zen)}
dec = fac_dec * exp["dec"]

# Binning is rather arbitrary because we don't calc stuff with the hist
bins = [50, 50, 50]
# Range for sigma is picked by looking at the 1D distribution and cutting of
# the tail. This will be covered by the KDE tail anyway. Rest is default
r = [[np.amin(logE), np.amax(logE)],
     [np.amin(dec), np.amax(dec)],
     [0., fac_sigma * 5.]]

sample = np.vstack((logE, dec, sigma)).T
h, bins = np.histogramdd(sample=sample, bins=bins, range=r, normed=False)

# Make bin mids for later use
mids = []
for b in bins:
    mids.append(0.5 * (b[:-1] + b[1:]))

# Make a nice corner plot
fig, ax = corner_hist(h, bins=bins,
                      label=["logE", "dec", "sigma deg"],
                      hist2D_args={"cmap": "Greys"},
                      hist_args={"color":"#353132"})

## Kernel Density Estimation

We use scikit learn's cross validation with a gaussian kernel to get the most robust bandwidth.
Then we integrate with the same binning as above and compare to the 3D histogram.

This section relies heavily on [Jake van der Plas examples for KDE](https://jakevdp.github.io/blog/2013/12/01/kernel-density-estimation/).
More info on how KDE cross validation works can be found in [Modern Nonparametric Methods](http://www2.stat.duke.edu/~wjang/teaching/S05-293/lecture/ch6.pdf).

````
# KDE CV is running on cluster and pickles the GridSearchCV
fname = "data/KDE_model_selector_CV20_exp_IC86_I.pickle"
with open(fname, "rb") as f:
    model_selector = pickle.load(f)

kde = model_selector.best_estimator_
bw = model_selector.best_params_["bandwidth"]
print("Best bandwidth : {:.3f}".format(bw))

# Estimate pdf for data sample with best model
kde.fit(sample)

# Generate some BG samples to compare to the original data hist
bg_samples = kde.sample(n_samples=2 * len(exp))

# Make histogram with same binning as original data
bg_h, bg_bins = np.histogramdd(sample=sample, bins=bins, range=r)
```

In [None]:
kde = skn.KernelDensity(rtol=1e-8, kernel="gaussian", bandwidth=0.1)

# Estimate pdf for data sample with best model
kde.fit(sample)

# Generate some BG samples to compare to the original data hist.
# Use more statistics, histograms get normalized and we want the best estimate
# for the pdf
nsamples_kde = int(1e8)
bg_samples = kde.sample(n_samples=nsamples_kde)

# Make histogram with same binning as original data
bg_h, bg_bins = np.histogramdd(sample=bg_samples, bins=bins, range=r, normed=True)

fig, ax = corner_hist(bg_h, bins=bg_bins,
                      label=["logE", "sin(dec)", "sigma deg"],
                      hist2D_args={"cmap": "Greys"},
                      hist_args={"color":"#353132"})

## Compare KDE to original data

Make a ratio histogram of the KDE sample and the original data sample.

#### Helper functions

In [None]:
def marginalize_kde(sample, rnge, nvals, axis):
    """
    Integrate out the KDE to 1D by using a large sample and do a MC
    integration by simply counting all point in that range.
    """
    # Make bins
    bins = np.linspace(rnge[0], rnge[1], nvals)
    y, bins = np.histogram(sample.T[int(axis)], bins=bins, density=True)
    x = 0.5 * (bins[:-1] + bins[1:])  
    return x, y

In [None]:
# Try to integrate one dimension out with a real integration over one dim.
# Pretend, that the integral point in the middle is a good approximation of
# the bin.
# TODO: Multiply the value from the integration with dx, dy binwidth to
# approximate the integration over the binwidth.
# Simply sampling is so much easier

import scipy.integrate as scint

xx, yy = map(np.ravel, np.meshgrid(m[0], m[1]))

grids = np.vstack((xx, yy)).T

def pdf(x, *args):
    # axes = which axes are fixed. x is integrated over.
    # a0, a1 are the coords of the fixed gridpoints, as stated in axes
    a0, a1, axes = args
    
    point = np.array([x, x, x])
    point[axes] = [a0, a1]
    
    return np.exp(kde.score_samples([point,]))
    
margin = []
for gp in grids:
    integral = scint.quad(pdf, bins[2][0], bins[2][-1],
                          args=(gp[0], gp[1], [0, 1]))
    margin.append(integral)

vals = np.array(margin)[:, 0]
m = get_binmids(bins)
xx, yy = map(np.ravel, np.meshgrid(m[0], m[1]))

plt.hist2d(xx, yy, bins=[bins[0], bins[1]], weights=vals);

### 2D marginalization

In [None]:
xlabel = ["scaled " + s for s in ["logE", "logE", "dec"]]
ylabel = ["scaled " + s for s in ["dec", "sigma in °", "sigma in °"]]

for i, axes in enumerate([[0, 1], [0, 2], [1, 2]]):
    _b = np.array(bins)
    h_exp, b_exp = np.histogramdd(sample[:, axes],
                                  bins=_b[axes], normed=True)
    h_kde, b_kde = np.histogramdd(bg_samples[:, axes],
                                  bins=_b[axes], normed=True)
    
    # KDE is expectation, but sampled with much more events.
    # Weights would simply scale the total number of KDE events to match the
    # number of original events. That would be the mean for the poisson
    # distribution in each bin. So to get OK KDE expectation sqrt(n) errors
    # in each bin, we divide not by the number of drawn KDE but by the number
    # of original events.   
    # Again shapes of meshgrid and hist are transposed
    diffXX, _ = np.meshgrid(np.diff(_b[0]), np.diff(_b[1]))
    norm_kde = len(exp) * diffXX.T
    sigma_kde = np.sqrt(h_kde / norm_kde)

    # Make 3 different diff/ratio hists to estimate KDE quality in
    # 1D marginalization.
    m = (h_exp > 0.)
    ratio_h = np.zeros_like(h_exp)
    ratio_h[m] = h_kde[m] / h_exp[m]

    diff_h = h_kde - h_exp

    m = (sigma_kde > 0.)
    sigma_ratio_h = np.zeros_like(h_exp)
    sigma_ratio_h[m] = (h_exp[m] - h_kde[m]) / sigma_kde[m]

    # Bin mids and hist grid
    _b = b_exp
    m = get_binmids(_b)
    xx, yy = map(np.ravel, np.meshgrid(m[0], m[1]))
    
    
    # Big plot on the left and three right
    fig = plt.figure(figsize=(10, 6))
    gs = gridspec.GridSpec(3, 3)
    axl = fig.add_subplot(gs[:, :2])
    axrt = fig.add_subplot(gs[0, 2])
    axrc = fig.add_subplot(gs[1, 2])
    axrb = fig.add_subplot(gs[2, 2])
    
    # Steal space for colorbars
    caxl = split_axis(axl, "right")
    caxrt = split_axis(axrt, "left")
    caxrc = split_axis(axrc, "left")
    caxrb = split_axis(axrb, "left")

    # Unset top and center xticklabels as they are shared with the bottom plot
    axrt.set_xticklabels([])
    axrc.set_xticklabels([])
        
    # Left: Difference over KDE sigma
    # cbar_extr = max(np.amax(sigma_ratio_h),  # Center colormap to min/max
    #                         abs(np.amin(sigma_ratio_h)))
    _, _, _, imgl = axl.hist2d(xx, yy, bins=_b, weights=sigma_ratio_h.T.ravel(),
                               cmap="seismic", vmax=5, vmin=-5)
    cbarl = plt.colorbar(cax=caxl, mappable=imgl)
    axl.set_xlabel(xlabel[i])
    axl.set_ylabel(ylabel[i])
    axl.set_title("(exp - kde) / sigma_kde")
    
    # Right top: Ratio
    _, _, _, imgrt = axrt.hist2d(xx, yy, bins=_b, weights=ratio_h.T.ravel(),
                                 cmap="seismic", vmax=2, vmin=0);
    cbarrt = plt.colorbar(cax=caxrt, mappable=imgrt)
    axrt.set_title("kde / exp")

    # Right center: Data hist
    _, _, _, imgrc = axrc.hist2d(xx, yy, bins=_b, weights=h_exp.T.ravel(),
                                 cmap="Greys", norm=LogNorm());
    cbarrc = plt.colorbar(cax=caxrc, mappable=imgrc)
    axrc.set_title("exp logscale")

    # Right bottom: KDE hist, same colorbar scale as on data
    _, _, _, imgrb = axrb.hist2d(xx, yy, bins=_b, weights=h_kde.T.ravel(),
                                 cmap="Greys", norm=LogNorm());
    # Set with same colormap as on data
    imgrb.set_clim(cbarrc.get_clim())
    cbarrb = plt.colorbar(cax=caxrb, mappable=imgrb)
    axrb.set_title("kde logscale")
    
    # Set tick and label positions
    for ax in [caxrt, caxrc, caxrb]:
        ax.yaxis.set_label_position("right")
        ax.yaxis.tick_left()
    
    fig.tight_layout()
    plt.show()

### 1D marginalization

In [None]:
_b = np.array(bins)
h_exp, b_exp = np.histogramdd(sample[:, [0,]], bins=_b[[0,]], normed=True)

m = get_binmids(b_exp)

plt.hist(m[0], bins=b_exp[0], weights=h_exp);

In [None]:
xlabel = ["scaled " + s for s in ["logE", "dec", "sigma °"]]

for i, axes in enumerate([(1, 2), (0, 2), (0, 1)]):
    h_exp, b_exp = hist_marginalize(h, bins, axes=axes)
    h_kde, b_kde = hist_marginalize(bg_h, bg_bins, axes=axes)
      
    # KDE is expectation, but sampled with much more events.
    # Weights would simply scale the total number of KDE events to match the
    # number of original events. That would be the mean for the poisson
    # distribution in each bin. So to get OK KDE expectation sqrt(n) errors
    # in each bin, we divide not by the number of drawn KDE but by the number
    # of original events.
    norm_kde = len(exp) * np.diff(b_kde)
    sigma_kde = np.sqrt(h_kde / norm_kde)

    # Make 3 different diff/ratio hists to estimate KDE quality in
    # 1D marginalization.
    m = (h_exp > 0.)
    ratio_h = np.zeros_like(h_exp)
    ratio_h[m] = h_kde[m] / h_exp[m]

    diff_h = h_kde - h_exp

    m = (sigma_kde > 0.)
    sigma_ratio_h = np.zeros_like(h_exp)
    sigma_ratio_h[m] = (h_exp[m] - h_kde[m]) / sigma_kde[m]

    # Bin mids
    _b = b_exp
    m = get_binmids([_b])[0]
    
    # Plot both and the ration normed. Big plot on the left and three right
    fig = plt.figure(figsize=(10, 6))
    gs = gridspec.GridSpec(3, 3)
    axl = fig.add_subplot(gs[:, :2])
    axrt = fig.add_subplot(gs[0, 2])
    axrc = fig.add_subplot(gs[1, 2])
    axrb = fig.add_subplot(gs[2, 2])

    axrt.set_xticklabels([])
    axrc.set_xticklabels([])

    # Set ticks and labels right
    for ax in [axrt, axrc, axrb]:
        ax.yaxis.set_label_position("right")
        ax.yaxis.tick_right()

    # Limits
    for ax in [axl, axrt, axrc, axrb]:
        ax.set_xlim(_b[0], _b[-1])
        
    # Main plot:
    # Plot more dense to mimic a smooth curve
    __h, __b = np.histogram(bg_samples[:, i], bins=500,
                            range=[_b[0], _b[-1]], density=True)
    __m = get_binmids([__b])[0]
    axl.plot(__m, __h, lw=3, alpha=0.5)
    
    _ = axl.hist(m, bins=_b, weights=h_exp, label="exp", histtype="step",
                 lw=2, color="k")
    _ = axl.errorbar(m, h_kde, yerr=sigma_kde, fmt=",", color="r")
    _ = axl.hist(m, bins=_b, weights=h_kde, label="kde", histtype="step",
                 lw=2, color="r")    
    
    axl.set_xlabel(xlabel[i])
    axl.legend(loc="upper right")

    # Top right: Difference
    _ = axrt.axhline(0, 0, 1, color="k", ls="-")
    _ = axrt.hlines([-.02, -.01, .01, .02], _b[0], _b[-1],
                    colors='#353132', linestyles='dashed')
    _ = axrt.hist(m, bins=_b, weights=diff_h, histtype="step", lw=2, color="r")
    axrt.set_ylim(-.05, +.05)
    axrt.set_ylabel("kde - exp")

    # Center right: Ratio
    _ = axrc.axhline(1, 0, 1, color="k", ls="-")
    _ = axrc.hlines([0.8, 0.9, 1.1, 1.2], _b[0], _b[-1],
                    colors='#353132', linestyles='dashed')
    _ = axrc.hist(m, bins=_b, weights=ratio_h, histtype="step", lw=2, color="r")
    axrc.set_ylim(.5, 1.5)
    axrc.set_ylabel("kde / exp")

    # Bottom right: Ratio of diff to sigma of expectation
    _ = axrb.axhline(0, 0, 1, color="k", ls="-")
    _ = axrb.hlines([-2, -1, 1, 2], _b[0], _b[-1],
                    colors='#353132', linestyles='dashed')
    _ = axrb.hist(m, bins=_b, weights=sigma_ratio_h, histtype="step", lw=2, color="r")
    axrb.set_ylim(-3, +3)
    axrb.set_ylabel("(exp-kde)/sigma_kde")
    plt.show()

# Define the Likelihoods

Here we define our Likelihoods.
We are given a source event occurance (can be GRB, GW, HESE or anything else) at a given position in space and time.
We want to search for a significant contribution of other events, within a predefined region in time and space around the source events.
For this we need to derive the expected signal and background contributions in that frame.

The Likelihood that describes this scenario can be derived from counting statistics.
If we expect $n_S$ signal and $n_B$ background events in the given frame, then the probability of observing $N$ events is given by a poisson pdf:

$$
    P_\text{Poisson}(N\ |\ n_S + n_B) = \mathcal{L}(N | n_S, n_b) = \frac{(n_S + n_B)^{-N}}{N!}\cdot \exp{-(n_S + n_B)}
$$

We want to fit for the number of signal events $n_S$ in the frame.
But each event doesn_t have the same probability of contributing to either signal or background, because we don't have that information on a per event basis.
So we include prior information on a per event basis to account for that.

$$
    \mathcal{L}(N | n_S, n_B) = \frac{(n_S + n_B)^{-N}}{N!}\cdot \exp{-(n_S + n_B)} \cdot \prod_{i=1}^N P_i
$$

Also the simple poisson pdf above only has one parameter, the total number of events, which can be fit for.
So we need to resolve this degeneracy in $n_S$, $n_B$ by giving additional information.
For that we include a weighted combination of the probability for an event to be signal, denoted by the PDF $S_i$ and for it to background, denoted by $B_i$.
Because the simple counting probabilities are $n_S / (n_S + n_B)$ to count a signal event and likewise $n_B / (n_S + n_B)$ to count a background event we construct the per event prior $P_i$ as:

$$
    P_i = \frac{n_S}{n_S + n_B}\cdot S_i + \frac{n_B}{n_S + n_B}\cdot B_i
        = \frac{n_S \cdot S_i + n_B \cdot B_i}{n_S + n_B}
$$

Note, that for equal probabilities $S_i$ and $B_i$, we simply and up with the normal poisson counting statistic.

Plugging that back into the likelihood we get:

$$
    \mathcal{L}(N | n_S, n_B) = \frac{(n_S + n_B)^{-N}}{N!}\cdot \exp{(-(n_S + n_B))} \cdot \prod_{i=1}^N \frac{n_S \cdot S_i + n_B \cdot B_i}{n_S + n_B}
$$

Taking the natrual logarithm to get the log-likelihood we arrive at:

$$
    \ln\mathcal{L}(N | n_S, n_B) = -(n_S + n_B) -\ln(N!) + \sum_{i=1}^N \ln((n_S + n_B) P_i)
$$

If we weight up $n_S$ then every events signal PDF is contributing a bit more than the background pdf.
So the fitter tries to find the combination of $n_S$ and $n_B$ that maximizes the likelihood.

To further simplify, we can use a measured and fixed background expectation rate $\langle n_B\rangle$ and fit only for the number of signal events.
Then we only fit for the number of signal events $n_S$.
The fixed background rate can be extracted from data by using the pdf of a larger timescale and average over that (or fit a function) to ensure that local fluctuations don't matter.

Then we end up with our full Likelihood (the denominator in $P_i$ cancels with the term from the poisson PDF):

$$
    \ln\mathcal{L}(N | n_S) = -(n_S + \langle n_B\rangle) -\ln(N!) + \sum_{i=1}^N \ln(n_S S_i + \langle n_B\rangle B_i)
$$

For the test statistic we want to test the hypothesis of having no signal $n_S=0$ vs. the alternative with a free parameter $n_S$:

$$
    \Lambda = \ln\frac{\mathcal(\hat{n}_S)}{\mathcal{n_S=0}}
            = \frac{-(\hat{n}_S + \langle n_B\rangle) -\ln(N!) + \sum_{i=1}^N \ln(\hat{n}_S S_i + \langle n_B\rangle B_i)}{-\langle n_B\rangle -\ln(N!) + \sum_{i=1}^N \ln(\langle n_B\rangle B_i)}
            = -\hat{n}_S + \sum_{i=1}^N \ln\left( \frac{\hat{n}_S S_i}{\langle n_B\rangle B_i} + 1 \right)
$$

The per event PDFs $S_i$ and $B_i$ can depend on arbitrary parameters.
The common choise here is to use a time, energy proxy and spatial proxy depency which has most seperation power:

$$
    S_i(x_i, t_i, E_i) = S_T(t_i) \cdot S_S(x_i) \cdot S_E(E_i) \\ 
    B_i(x_i, t_i, E_i) = B_T(t_i) \cdot B_S(x_i) \cdot B_E(E_i) 
$$

Because the Likelihood only contains ratios of the PDF, we only have to construct 1D PDFs of the signal to background ratio for each time, spatial and energy distribution.

**Why not using a combined spatial/energy PDF? Like the one used to draw background events. See coenders talk 4 for exactly that**

## Time PDF

The time pdf is starting at the source events time with a given time window.
The edges are falling off like gaussian with a relativ length.
Edge gaussians are truncated to 0 after 4 sigma to avoid more calculation with no effect, as the function is almost zero after 5 sigma.

In [None]:
def time_pdf(t, window_size, trunc=4):
    """
    t in seconds
    
    window_size in seconds
    
    trunc in units of sigma
    """
    # Set edge width to window size
    gaus_sigma = window_size
    sigma_llim, sigma_ulim = 2, 30
    # Limit size of edges in both directions
    if gaus_sigma < sigma_llim:
        gaus_sigma = sigma_llim
    elif gaus_sigma > sigma_ulim:
        gaus_sigma = sigma_ulim
        
    # Get the pdfs
    # Set support (truncation) for gaussian kernel
    gaus = scs.norm(a=-trunc * gaus_sigma, b=+trunc * gaus_sigma)