In [None]:
# default_exp bayesian
%load_ext autoreload
%autoreload 2

from nbdev import showdoc 
from utilities.ipynb_docgen import *

# Bayesian Blocks

> Partition a light curve with the Bayesian Block algorithm

The algorithm depends on a 'fitness' function of the light curve, an evaluation of the 
likelihoods for a set of sequential cells. There are two such, using the number of counts, and the Kerr likelihood.

- `CountFitness`
- `LikelihoodFitness`

See the [Bayesian Block reference](https://arxiv.org/pdf/1207.5578.pdf)

In [None]:
#export
import os
import numpy as np
import pandas as pd
from astropy.stats.bayesian_blocks import FitnessFunc

from light_curves.config import *
from light_curves.lightcurve import get_lightcurve, fit_cells, flux_plot
from light_curves.cells import get_cells, partition_cells


In [None]:
#collapse_hide


lcs ={}
def data_setup(lcs = lcs, mjd_query='54750<t<54855', names=['Geminga','3C 279']):
    """
    ## Generate data sets for an AGN and a pulsar
    {printout}
    
    Choose the time interval, {mjd_query} ({days} days) to bracket a modest flare of the AGN.
     
    <table>
    <tr> <td>Pulsar</td><td>AGN</td></tr>
    <tr>
    <td>{fig1}</td> <td>{fig2}</td>
    </tr>
    </table>
    """
    
    from light_curves.config import Config,  PointSource
    from light_curves.lightcurve import get_lightcurve, flux_plot
    config = Config()
    figs=[]
    plt.rc('font', size=20)
    with capture_print('printout') as printout:
        for i,name in enumerate(names):
            lcfull = get_lightcurve(config,  PointSource(name))
            lc = lcs[name] = lcfull.query(mjd_query) if mjd_query else lcfull
            fig= flux_plot(config,lc, fignum=i, title=name)
            figs.append(figure(fig, width=300))
    fig1, fig2 = figs
    mjd_query = mjd_query.replace('<', '&lt;')
    days = len(lc)
    return locals()

if Config().valid:
    nbdoc(data_setup, lcs, mjd_query='54750<t<54855', )

## Generate data sets for an AGN and a pulsar
<details  class="nbdoc-description" >  <summary> printout </summary>  <div style="margin-left: 5%"><pre>Light curve for Geminga: Saving to cache with key "lightfcurve_Geminga"<br>Cell data for Geminga: Saving to cache with key "cells_Geminga"<br>Photon data: Restoring from cache with key "photons_Geminga"<br><br>	Selected 1,313,726 photons within 5 deg of  (195.13,4.27)<br>	Energies: 100.0-1000000 MeV<br>	Dates:    2008-08-04 15:46 - 2019-08-03 01:17<br>	MJD  :    54682.7          - 58698.1         <br>Load weights from file /mnt/c/users/thbur/OneDrive/fermi/weight_files/Geminga_weights.pkl<br>	Found: PSR J0633+1746 at (195.14, 4.27)<br>	Applyng weights: 240 / 1313726 photon pixels are outside weight region<br>	233109 weights set to NaN<br>binned exposure for source Geminga: Saving to cache with key "binned_exposure_Geminga"<br>exposure for Geminga: Saving to cache with key "exposure_Geminga"<br>Processing 12 S/C history (FT2) files<br>  applying cuts cos(theta) &lt; 0.4,  z &lt; 100<br>	file /home/burnett/work/lat-data/ft2/ft2_2008.fits: 362996 entries, 360944 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2009.fits: 874661 entries, 870446 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2010.fits: 889547 entries, 884697 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2011.fits: 882832 entries, 871672 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2012.fits: 881317 entries, 868109 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2013.fits: 885307 entries, 867342 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2014.fits: 894730 entries, 886570 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2015.fits: 890006 entries, 886086 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2016.fits: 890933 entries, 884823 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2017.fits: 888349 entries, 883761 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2018.fits: 842824 entries, 830723 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2019.fits: 737029 entries, 514657 in GTI<br>	Found 9,609,830 S/C entries:  2,695,715 remain after zenith and theta cuts<br>Calculate exposure using the energy domain 100.0-1000000.0 4 bins/decade<br>2695715 entries, MJD 54683 - 58698<br>Time bins: 4015 intervals of 1 days, in range (54683.0, 58698.0)<br>Loaded 3873 / 4015 cells with exposure &gt; 0.3 for light curve analysis<br>first cell: t                                                    54683.5<br>tw                                                         1<br>e                                                    1.26456<br>n                                                        330<br>w          [0.95343286, 0.83939403, 0.68349504, 0.0219892...<br>S                                                    225.585<br>B                                                    114.707<br>loglike    light_curves.loglike.LogLike:  time 54683.500,...<br>Name: 0, dtype: object<br>Fitting likelihoods with poisson representation<br>Light curve for 3C 279: Saving to cache with key "lightfcurve_3C 279"<br>Cell data for 3C 279: Saving to cache with key "cells_3C 279"<br>Photon data: Saving to cache with key "photons_3C 279"<br>Loading  132 months from Arrow dataset /home/burnett/data/dataset<br>....................................................................................................................................<br>	Selected 215,158 photons within 5 deg of  (305.10,57.06)<br>	Energies: 100.0-1000000 MeV<br>	Dates:    2008-08-04 15:52 - 2019-08-03 01:50<br>	MJD  :    54682.7          - 58698.1         <br>Load weights from file /mnt/c/users/thbur/OneDrive/fermi/weight_files/3C_279_weights.pkl<br>	Found: P88Y3243 at (305.10, 57.07)<br>	Applyng weights: 2086 / 215158 photon pixels are outside weight region<br>	67655 weights set to NaN<br>binned exposure for source 3C 279: Saving to cache with key "binned_exposure_3C 279"<br>exposure for 3C 279: Saving to cache with key "exposure_3C 279"<br>Processing 12 S/C history (FT2) files<br>  applying cuts cos(theta) &lt; 0.4,  z &lt; 100<br>	file /home/burnett/work/lat-data/ft2/ft2_2008.fits: 362996 entries, 360944 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2009.fits: 874661 entries, 870446 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2010.fits: 889547 entries, 884697 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2011.fits: 882832 entries, 871672 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2012.fits: 881317 entries, 868109 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2013.fits: 885307 entries, 867342 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2014.fits: 894730 entries, 886570 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2015.fits: 890006 entries, 886086 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2016.fits: 890933 entries, 884823 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2017.fits: 888349 entries, 883761 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2018.fits: 842824 entries, 830723 in GTI<br>	file /home/burnett/work/lat-data/ft2/ft2_2019.fits: 737029 entries, 514657 in GTI<br>	Found 9,609,830 S/C entries:  2,438,906 remain after zenith and theta cuts<br>	cut from 2,438,906 to 105 - 2438858, or 2,438,753 entries after MJD range selection<br>Calculate exposure using the energy domain 100.0-1000000.0 4 bins/decade<br>2438753 entries, MJD 54683 - 58697<br>Time bins: 4014 intervals of 1 days, in range (54683.0, 58697.0)<br>Loaded 3845 / 4014 cells with exposure &gt; 0.3 for light curve analysis<br>first cell: t                                                    54683.5<br>tw                                                         1<br>e                                                   0.858171<br>n                                                         19<br>w          [0.064962015, 0.8456794, 0.4740412, 0.5252704,...<br>S                                                    14.8428<br>B                                                      16.69<br>loglike    light_curves.loglike.LogLike:  time 54683.500,...<br>Name: 0, dtype: object<br>Fitting likelihoods with poisson representation<br></pre></div> </details>

Choose the time interval, 54750&lt;t&lt;54855 (105 days) to bracket a modest flare of the AGN.
 
<table>
<tr> <td>Pulsar</td><td>AGN</td></tr>
<tr>
<td><div class="nbdoc_image">
<figure style="margin-left: 5%" title="Figure 2">  <a href="images/data_setup_fig_02.png" title="images/data_setup_fig_02.png">    <img src="images/data_setup_fig_02.png" alt="Figure 2 at images/data_setup_fig_02.png" width=300>   </a> </figure>
</div>
</td> <td><div class="nbdoc_image">
<figure style="margin-left: 5%" title="Figure 3">  <a href="images/data_setup_fig_03.png" title="images/data_setup_fig_03.png">    <img src="images/data_setup_fig_03.png" alt="Figure 3 at images/data_setup_fig_03.png" width=300>   </a> </figure>
</div>
</td>
</tr>
</table>


In [None]:
#export
class CountFitness(FitnessFunc):
    """
    Adapted version of a astropy.stats.bayesian_blocks.FitnessFunc
    Considerably modified to give the `fitness function` access to the cell data.
    
    Implements the Event model using exposure instead of time.

    """

    def __init__(self, lc, p0=0.05,):
        """
        - lc  -- a LightCurve data table, with  exposure (e) and counts (n),
            as well as a representation of the likelihood for each cell
        - p0 --
        """
        self.p0=p0
        self.df= df= lc
        N = self.N = len(df)
        # Invoke empirical function from Scargle 2012
        self.ncp_prior = self.p0_prior(N)

        #actual times for bin edges
        t = df.t.values
        dt = df.tw.values/2
        self.mjd = np.concatenate([t-dt, [t[-1]+dt[-1]] ] ) # put one at the end
        self.name = self.__class__.__name__
        self.setup()

    def setup(self):
        df = self.df

        # counts per cell
        self.nn = df.n.values
        assert min(self.nn)>0, 'Attempt to Include a cell with no contents'

        # edges and block_length use exposure as "time"
        e = df.e.values
        self.edges = np.concatenate([[0], np.cumsum(e)])
        self.block_length = self.edges[-1] - self.edges

    def __str__(self):
        
        return f'{self.name}: {self.N} cells, spanning {self.block_length[0]:.1f} days, prior={self.ncp_prior:.1f}'
        
    def __call__(self, R):
        """ The fitness function needed for BB algorithm
        For cells 0..R return array of length R+1 of the maximum log likelihoods for combined cells
        0..R, 1..R, ... R
        """
        # exposures and corresponding counts
        w_k = self.block_length[:R + 1] - self.block_length[R + 1]
        N_k = np.cumsum(self.nn[:R + 1][::-1])[::-1]

        # Solving eq. 26 from Scargle 2012 for maximum $\lambda$ gives
        return N_k * (np.log(N_k) - np.log(w_k))

    def fit(self):
        """Fit the Bayesian Blocks model given the specified fitness function.
        Refactored version using code from bayesian_blocks.FitnesFunc.fit
        Returns
        -------
        edges : ndarray
            array containing the (M+1) edges, in MJD units, defining the M optimal bins
        """
        # This is the basic Scargle algoritm, copied almost verbatum
        # ---------------------------------------------------------------

        # arrays to store the best configuration
        N = self.N
        best = np.zeros(N, dtype=float)
        last = np.zeros(N, dtype=int)

        # ----------------------------------------------------------------
        # Start with first data cell; add one cell at each iteration
        # ----------------------------------------------------------------
        for R in range(N):

            # evaluate fitness function
            fit_vec = self(R)

            A_R = fit_vec - self.ncp_prior
            A_R[1:] += best[:R]

            i_max = np.argmax(A_R)
            last[R] = i_max
            best[R] = A_R[i_max]

        # ----------------------------------------------------------------
        # Now find changepoints by iteratively peeling off the last block
        # ----------------------------------------------------------------
        change_points = np.zeros(N, dtype=int)
        i_cp = N
        ind = N
        while True:
            i_cp -= 1
            change_points[i_cp] = ind
            if ind == 0:
                break
            ind = last[ind - 1]
        change_points = change_points[i_cp:]

        return self.mjd[change_points]

In [None]:
#hide
showdoc.show_doc(CountFitness)

<h2 id="CountFitness" class="doc_header"><code>class</code> <code>CountFitness</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>CountFitness</code>(**`lc`**, **`p0`**=*`0.05`*) :: `FitnessFunc`

Adapted version of a astropy.stats.bayesian_blocks.FitnessFunc
Considerably modified to give the `fitness function` access to the cell data.

Implements the Event model using exposure instead of time.

In [None]:
#collapse_hide
def doc_countfitness( fitness, light_curve_dict, source_name):
    """
    ### {class_name} test with source {source_name}
         
    Create object: `bbfitter = {class_name}(lc)`
    
    Object description:   {bbfitter}
    
    Then `bbfitter({n})` returns the values
        {values}
   
    Finally, the partition algorithm, 'bbfitter.fit()' returns {cffit}
    
    """
    
    lc = light_curve_dict[source_name]
    bbfitter = fitness(lc)
    class_name = bbfitter.name
    n = 10
    values  = np.array(bbfitter(n)).round(1)    
    cffit = bbfitter.fit()
    
    return locals()

In [None]:
#hide

if Config().valid:
    nbdoc(doc_countfitness, CountFitness, light_curve_dict = lcs, source_name='Geminga')
    nbdoc(doc_countfitness, CountFitness, light_curve_dict = lcs, source_name='3C 279')
     

### CountFitness test with source Geminga
     
Create object: `bbfitter = CountFitness(lc)`

Object description:   CountFitness: 105 cells, spanning 131.0 days, prior=4.9

Then `bbfitter(10)` returns the values
    [14517.  13066.1 11631.2 10159.8  9106.2  7633.9  6573.   5173.6  3959.6
  2746.6  1451.2]

Finally, the partition algorithm, 'bbfitter.fit()' returns [54750. 54788. 54855.]


### CountFitness test with source 3C 279
     
Create object: `bbfitter = CountFitness(lc)`

Object description:   CountFitness: 105 cells, spanning 138.0 days, prior=4.9

Then `bbfitter(10)` returns the values
    [1657.1 1431.1 1267.9 1145.3  919.4  811.2  648.   510.2  378.7  266.3
  148.2]

Finally, the partition algorithm, 'bbfitter.fit()' returns [54750. 54754. 54785. 54790. 54807. 54827. 54855.]


In [None]:
#export
class LikelihoodFitness(CountFitness):
    """ Fitness function that uses the full likelihood
    """
    
    def __init__(self, lc,  p0=0.05, npt=50):
        self.npt = npt
        super().__init__(lc, p0)
        
    def setup(self):
        df = self.df
        N = self.N
        
        def liketable(prep):
            return prep.create_table(self.npt)
        
        self.tables = df.fit.apply(liketable).values

    def __str__(self):
        return f'{self.__class__.__name__}: {self.N} cells,  prior={self.ncp_prior:.1f}'

    def __call__(self, R):
        
        a, y  = self.tables[R]
        x = np.linspace(*a)
        y = np.zeros(self.npt)
        rv = np.empty(R+1)
        for i in range(R, -1, -1): 
            a, yi = self.tables[i]
            xi = np.linspace(*a)
            y += np.interp(x, xi, yi, left=-np.inf, right=-np.inf)
            amax = np.argmax(y)
            rv[i] =y[amax]
        return rv    

In [None]:
#hide
showdoc.show_doc(LikelihoodFitness)

<h2 id="LikelihoodFitness" class="doc_header"><code>class</code> <code>LikelihoodFitness</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>LikelihoodFitness</code>(**`lc`**, **`p0`**=*`0.05`*, **`npt`**=*`50`*) :: [`CountFitness`](/light_curves/bayesian.html#CountFitness)

Fitness function that uses the full likelihood
    

In [None]:
if Config().valid:
    nbdoc(doc_countfitness, LikelihoodFitness, light_curve_dict = lcs, source_name='Geminga')
    nbdoc(doc_countfitness, LikelihoodFitness, light_curve_dict = lcs, source_name='3C 279')
     

### LikelihoodFitness test with source Geminga
     
Create object: `bbfitter = LikelihoodFitness(lc)`

Object description:   LikelihoodFitness: 105 cells,  prior=4.9

Then `bbfitter(10)` returns the values
    [-4.1 -3.8 -1.5 -1.4 -1.3 -1.3 -0.4 -0.4 -0.3 -0.3 -0. ]

Finally, the partition algorithm, 'bbfitter.fit()' returns [54750. 54793. 54855.]


### LikelihoodFitness test with source 3C 279
     
Create object: `bbfitter = LikelihoodFitness(lc)`

Object description:   LikelihoodFitness: 105 cells,  prior=4.9

Then `bbfitter(10)` returns the values
    [-8.3 -8.2 -8.1 -7.1 -5.7 -5.7 -4.7 -3.6 -2.9 -2.6 -0. ]

Finally, the partition algorithm, 'bbfitter.fit()' returns [54750. 54779. 54785. 54790. 54809. 54827. 54843. 54855.]


In [None]:
#export
def get_bb_partition(config, lc, fitness_class=LikelihoodFitness, p0=0.05, key=None):    

    """Perform Bayesian Block partition of the cells found in a light curve
    
    - lc : input light curve
    - fitness_class 
    
    return edges for partition
    """
    assert issubclass(fitness_class,CountFitness), 'fitness_class wrong'
    assert 'fit' in lc.columns, 'Expect the dataframe ho have the Poisson representation'


    def doit():
        fitness = fitness_class(lc, p0=p0)
        # Now run the astropy Bayesian Blocks code using my version of the 'event' model
        return fitness.fit() 
        
    key = f'BB_edges_' if key is '' else key
    
    edges = config.cache(key, doit,  description='BB edges for...')
    
    if config.verbose>0:
        print(f'Partitioned {len(lc)} cells into {len(edges)-1} blocks, using {fitness_class.__name__} ' )
    return edges

[autoreload of light_curves.load_gti failed: Traceback (most recent call last):
  File "/home/burnett/miniconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/home/burnett/miniconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/home/burnett/miniconda3/lib/python3.7/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/home/burnett/miniconda3/lib/python3.7/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/mnt/c/users/thbur/OneDrive/work/light_curves/light_curves/load_gti.py", line 10, in <module>
    from .config import MJD, Config, Cache
ImportError: can

In [None]:
#hide
showdoc.show_doc(get_bb_partition)

[autoreload of light_curves.cells failed: Traceback (most recent call last):
  File "/home/burnett/miniconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/home/burnett/miniconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/home/burnett/miniconda3/lib/python3.7/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/home/burnett/miniconda3/lib/python3.7/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/mnt/c/users/thbur/OneDrive/work/light_curves/light_curves/cells.py", line 10, in <module>
    from .photon_data import get_photon_data
ImportError: cannot 

<h4 id="get_bb_partition" class="doc_header"><code>get_bb_partition</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>get_bb_partition</code>(**`config`**, **`lc`**, **`fitness_class`**=*`LikelihoodFitness`*, **`p0`**=*`0.05`*, **`key`**=*`None`*)

Perform Bayesian Block partition of the cells found in a light curve

- lc : input light curve
- fitness_class 

return edges for partition

In [None]:
#collapse_hide

def test_bb(lcs, name, fitness):
    """
    #### BB partition for {name} using {fitness.__name__}
    
    {lc_fig}
    """

    config = Config()
     
    lc = lcs[name]
    edges = get_bb_partition(config, lc, fitness, key=None) 
    lc_fig = flux_plot(config, lc, title=f'{name} partition with {fitness.__name__}')
    lc_fig.width=400
    ax = lc_fig.axes[0]
    edges = np.concatenate([edges, [edges[-1]] ])
    for  i,t in enumerate(edges[::2]):
        if 2*i+1==len(edges): break
        t2 = edges[2*i+1]
        ax.axvspan(t, t2, color='lightcyan')
    for t in edges:
        ax.axvline(t, ls=':', color='cyan')
    return locals()

if Config().valid:
    nbdoc(test_bb, lcs, '3C 279', CountFitness)
    nbdoc(test_bb, lcs, '3C 279', LikelihoodFitness)
#     nbdoc(test_bb, lcs, 'Geminga', CountFitness)
#     nbdoc(test_bb, lcs, 'Geminga', LikelihoodFitness)

[autoreload of light_curves.lightcurve failed: Traceback (most recent call last):
  File "/home/burnett/miniconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/home/burnett/miniconda3/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/home/burnett/miniconda3/lib/python3.7/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/home/burnett/miniconda3/lib/python3.7/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/mnt/c/users/thbur/OneDrive/work/light_curves/light_curves/lightcurve.py", line 11, in <module>
    from .cells import get_cells
ImportError: cannot im

Partitioned 105 cells into 6 blocks, using CountFitness 


#### BB partition for 3C 279 using CountFitness

<div class="nbdoc_image">
<figure style="margin-left: 5%" title="Figure 1">  <a href="images/test_bb_fig_01.png" title="images/test_bb_fig_01.png">    <img src="images/test_bb_fig_01.png" alt="Figure 1 at images/test_bb_fig_01.png" width=400>   </a> </figure>
</div>



Function test_bb failed: name 'Poisson' is not defined


NameError: name 'Poisson' is not defined

## Test fitting the new cells from the partition

In [None]:
#export
def bb_overplot(config, lc, bb_fit, ax=None, **kwargs):
    fig, ax = plt.subplots(figsize=(10,4)) if ax is None else (ax.figure, ax)
    flux_plot(config, lc,   ax=ax, 
              colors=(('lightblue', 'sandybrown', 'blue')), **kwargs)
    flux_plot(config, bb_fit, ax=ax, step=True, **kwargs)

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()
!date