In [None]:
#hide
from utilities.ipynb_docgen import *
from nbdev import *

# Analyses

> Ongoing analyses


**`light_curve` library functions used here**
- `get_cells`
- `get_lightcurve`
- `get_bb_partition`
- `partition_cells`
- `fit_cells`
- `flux_plot`
- `simulate_cells`

## Introduction

This notebook does not produce a module to add to the library, it a place to run, and report on analyses using the library. It uses local code, shown below.

This is an exploration of this style of presenting code, and data analysis based on it.

## Geminga analyses

Following Kerr's example, we use the source Geminga to check that the this analysis of data gives results consistent with its being constant. As you can see below, the BB analysis finds 16 breaks, which merit further checks. That is followed by a simulation using
Geminga's weight distribution and exposure. The run shown here finds one minor break, a change of 0.1%. 

**Local code**

In [None]:
#collapse-hide
from light_curves.config import *
from light_curves.bayesian import *
from light_curves.simulation import *
from light_curves.lightcurve import *
from light_curves.cells import *

def bb_overplot(config, lc, bb_fit, ax=None,  **kwargs):
    fig, ax = plt.subplots(1,1, figsize=(12,4)) if not ax else (ax.figure, ax)
    flux_plot(config, lc, ax=ax, colors=(('lightblue', 'sandybrown', 'blue')),**kwargs)
    flux_plot(config, bb_fit, ax=ax, step=True, **kwargs)
    
def simulation(config, source, bb_key=None):
    """Create and analyze a a simulation for the source
    Returns the simulated, and fit light curves
    """

    lc = get_lightcurve(config, source)
    data_cells = get_cells(config, source)

    #  Get the rate from the data
    cq = data_cells.query('e>0.3')
    T, N = np.sum(cq.tw), np.sum(cq.n)
    sflux=lambda t: N/T

    # simulate, then fit cells to create a simulated light curve 
    sim_cells = simulate_cells(config, source, source_flux=sflux  )
    sim_lc  = fit_cells(config, sim_cells) 

    sim_edges = get_bb_partition(config, sim_lc,  key=bb_key) #'simulated_BB_partition_Geminga') 

    # partion, then fit the cells according to the edges
    sim_bb_cells = partition_cells(config, sim_cells, sim_edges);
    sim_bb_fit  = fit_cells(config, sim_bb_cells, )
    return sim_lc, sim_bb_fit

def analyze_data(config, source):
    """
    Analyze data from the source
    
    Returns, data a partitioned light curves
    """
    lc = get_lightcurve(config, source)
    cells = get_cells(config, source)
    edges = get_bb_partition(config, lc, LikelihoodFitness, key='bb-Geminga-test') 
    bb_cells = partition_cells(config, cells, edges);
    bb_lc  = fit_cells(config, bb_cells, )
    return lc, bb_lc

def fit_table(lc, expect=1.0):
    """Generate a summary table from a light curve"""
    fits = lc.fit
    flux = fits.apply(lambda f: f.flux)
    errors = fits.apply(lambda f: (round(f.errors[0]-f.flux,3), round(f.errors[1]-f.flux ,3) ) )
    sigma_dev = fits.apply(lambda f: round(f.poiss.sigma_dev(expect),1) )
    df = lc['t tw n e'.split()]; df
    df.loc[:,'flux'] = flux.values.round(4)
    df.loc[:, 'errors'] = errors.values
    df.loc[:, 'sigma_dev'] = sigma_dev.values
    return df

In [None]:
#collapse-hide
def analysis_plots(name, expect=0.991, simname=''):
    """
    #### {sim} Geminga light curve
    
    Perform Bayesian Blocks partition, fits to blocks
    
    {output}
    
    This shows the fits to all cells, with the BB fit overlays
    {fig1}
    
    Since this is a constant source, there should be no breaks, that is, only one partition.
    Here is a table of the partition fits:
    
    {df_text}
    The last column represents the compatibility of the flux measurement for each partition
    with the expected value {expect} in equivalent sigma units.
    
    Expand the plot around short, < 100 day partitions.
    {short_check}
    
    {fig2}
    """
    config = Config()
    source = PointSource(name)
    simulated = bool(simname)
    sim= 'Simulated' if simulated else ''
    with capture_print('Analysis output' ) as output:
        if not simulated:
            lc, bb_lc = analyze_data(config, source)
        else: 
            lc, bb_lc = simulation(config, source, bb_key=simname) 

    pd.set_option('display.precision', 3)#, 'display.colheader_justify','left')
    expect
    df = fit_table(bb_lc, expect=expect)
    df_text = monospace(str(df), 'BB fit table', open=True)
        
    plt.rc('font', size=16)
    fig1, ax = plt.subplots(1,1, sharex=True, figsize=(10,4), num=1)
    bb_overplot(config, lc, bb_lc, ax = ax)
    ax.text(0.05, 0.85, name,  transform=ax.transAxes);
    fig1.width=600

    bb_short = bb_lc.query('tw<100'); ns =len(bb_short)
    if ns>0:
        short_check=f'There are {ns} such.'
        rows = (ns+1)//3
        fig2, axx = plt.subplots(3, rows, figsize=(15, 4*rows),sharey=True, sharex=True,
                     gridspec_kw=dict(top=0.85, left=0.08, bottom=0.15, hspace=0.2 ),num=2)
        fig2.width=600
        for t, ax in zip(bb_short.t, axx.flatten()):
            bb_overplot(config, lc, bb_lc, ax=ax, tzero=t, xlim=(-50, +50))
    else:
        fig2=''
        short_check = 'None found.'
    return locals()

if Config().valid:
    nbdoc(analysis_plots, 'Geminga')

####  Geminga light curve

Perform Bayesian Blocks partition, fits to blocks

<details  class="nbdoc-description" >  <summary> Analysis output </summary>  <div style="margin-left: 5%"><pre>Light curve for Geminga: Restoring from cache with key "lightfcurve_Geminga"<br>Cell data for Geminga: Restoring from cache with key "cells_Geminga"<br>BB edges for...: Restoring from cache with key "bb-Geminga-test"<br>Partitioned 3873 cells into 17 blocks, using LikelihoodFitness <br>Loaded 17 / 17 cells with exposure &gt; 0.3 for fitting<br></pre></div> </details>

This shows the fits to all cells, with the BB fit overlays
<div class="nbdoc_image">
<a href="images/analysis_plots_fig_01.png"><figure style="margin-left: 5%">
   <img src="images/analysis_plots_fig_01.png" alt="Figure 1 at images/analysis_plots_fig_01.png" width=600> 
</figure></a></div>


Since this is a constant source, there should be no breaks, that is, only one partition.
Here is a table of the partition fits:

<details open class="nbdoc-description" >  <summary> BB fit table </summary>  <div style="margin-left: 5%"><pre>          t      tw       n      e   flux           errors  sigma_dev<br>0   55027.0   688.0  207832  1.126  0.998  (-0.003, 0.003)        2.5<br>1   55378.0    14.0    3797  0.911  1.122  (-0.024, 0.025)        5.5<br>2   55560.5   351.0   96416  1.028  0.985  (-0.004, 0.004)       -1.5<br>3   55748.0    24.0    6887  0.960  1.123  (-0.018, 0.018)        7.5<br>4   56113.0   706.0  190965  1.011  0.991  (-0.003, 0.003)        0.1<br>5   56486.0    40.0   11267  0.989  1.070  (-0.014, 0.014)        6.0<br>6   56512.5    13.0    3402  1.046  0.901  (-0.021, 0.022)       -4.0<br>7   56673.5   309.0   84043  1.020  0.989  (-0.005, 0.005)       -0.4<br>8   56879.5   103.0   28168  0.996  1.027  (-0.008, 0.008)        4.3<br>9   56972.5    83.0   20961  0.957  0.966  (-0.009, 0.009)       -2.7<br>10  57017.5     7.0    2070  0.973  1.156  (-0.034, 0.034)        5.1<br>11  57109.0   176.0   45616  0.950  1.003  (-0.006, 0.006)        1.9<br>12  57197.5     1.0     149  0.852  0.535  (-0.066, 0.071)       -5.5<br>13  57723.0  1050.0  285243  1.013  0.993  (-0.003, 0.003)        0.6<br>14  58274.0    52.0    9282  0.622  1.071  (-0.015, 0.015)        5.5<br>15  58468.5   337.0   74075  0.814  0.991  (-0.005, 0.005)        0.1<br>16  58667.0    60.0   10261  0.586  1.078  (-0.015, 0.015)        6.1</pre></div> </details>
The last column represents the compatibility of the flux measurement for each partition
with the expected value 0.991 in equivalent sigma units.

Expand the plot around short, < 100 day partitions.
There are 9 such.

<div class="nbdoc_image">
<a href="images/analysis_plots_fig_02.png"><figure style="margin-left: 5%">
   <img src="images/analysis_plots_fig_02.png" alt="Figure 2 at images/analysis_plots_fig_02.png" width=600> 
</figure></a></div>



In [None]:
#
if Config().valid:
    nbdoc(analysis_plots, 'Geminga', name='analysis_sim',  simname='analysis_plot_sim')

#### Simulated Geminga light curve

Perform Bayesian Blocks partition, fits to blocks

<details  class="nbdoc-description" >  <summary> Analysis output </summary>  <div style="margin-left: 5%"><pre>Light curve for Geminga: Restoring from cache with key "lightfcurve_Geminga"<br>Cell data for Geminga: Restoring from cache with key "cells_Geminga"<br>binned exposure for source Geminga: Restoring from cache with key "binned_exposure_Geminga"<br>Weight histogram for Geminga: Restoring from cache with key "weight_hist_Geminga"<br>Loaded 3873 / 4015 cells with exposure &gt; 0.3 for fitting<br>BB edges for...: Restoring from cache with key "analysis_plot_sim"<br>Partitioned 3873 cells into 2 blocks, using LikelihoodFitness <br>Loaded 2 / 2 cells with exposure &gt; 0.3 for fitting<br></pre></div> </details>

This shows the fits to all cells, with the BB fit overlays
<div class="nbdoc_image">
<a href="images/analysis_sim_fig_01.png"><figure style="margin-left: 5%">
   <img src="images/analysis_sim_fig_01.png" alt="Figure 1 at images/analysis_sim_fig_01.png" width=600> 
</figure></a></div>


Since this is a constant source, there should be no breaks, that is, only one partition.
Here is a table of the partition fits:

<details open class="nbdoc-description" >  <summary> BB fit table </summary>  <div style="margin-left: 5%"><pre>         t      tw       n      e   flux           errors  sigma_dev<br>0  56183.0  3000.0  869950  1.044  0.989  (-0.001, 0.001)       -1.3<br>1  58190.0  1014.0  244968  0.870  0.989  (-0.003, 0.003)       -0.8</pre></div> </details>
The last column represents the compatibility of the flux measurement for each partition
with the expected value 0.991 in equivalent sigma units.

Expand the plot around short, < 100 day partitions.
None found.




## TO DO
- Find out why, for the simulation, and apparently for data, there is a 1% descrepancy between the measured and expected ($\alpha=0$) flux. There are two ways that likelihoods for adjacent cells are combined: the Bayesian Blocks uses tables generated from the Poisson-like representations to each cell, currently with 50 entries, while the fits to the blocks it recognizes uses the actual weights. I have not carefully compared these.

- Look at the 9 or so intervals detected for the data, of which 5 have measured flux increases around 10%,  more than 4$\sigma$. Possibilities are a problem with the exposure, and a change in the background. The latter can be examined by a 2-D fit with $\beta$ free. Another possibility, fixing $\alpha=0$ and fitting $\beta$ is not (yet) supported.


In [None]:
!date

Tue Dec 22 07:10:40 PST 2020
