In [None]:
from nbdev import *
%nbdev_default_export bayesian
%load_ext autoreload
%autoreload 2
from utilities.ipynb_docgen import *
!date

Cells will be exported to wtlike.bayesian,
unless a different module is specified after an export flag: `%nbdev_export special.module`
Wed May  5 16:21:31 PDT 2021


# Bayesian Blocks

> Partition a light curve with the Bayesian Block algorithm

The functions implemented here are:

- `get_bb_partition` to perform a BB partition. It requires a "fitness" function to perform an evaluation of the 
likelihoods for a set of sequential cells. There are two such, using the number of counts, and the Kerr likelihood. See the [Bayesian Block reference](https://arxiv.org/pdf/1207.5578.pdf).

 - `CountFitness`
 - `LikelihoodFitness`, the default
 

- `bb_overplot` which overplots fits to the partitioned blocks on the original light curve.


In [None]:
%nbdev_export
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy.stats.bayesian_blocks import FitnessFunc

from wtlike.config import *
from wtlike.lightcurve import * #get_lightcurve, fit_cells, flux_plot
from wtlike.cell_data import * #get_cells, partition_cells
from wtlike.loglike import *

plt.rc('font', size=18)

In [None]:
#export
class CountFitness(FitnessFunc):
    """
    Adapted version of a astropy.stats.bayesian_blocks.FitnessFunc
    Considerably modified to give the `fitness function` access to the cell data.
    
    Implements the Event model using exposure instead of time.

    """

    def __init__(self, lc, p0=0.05,):
        """
        - lc  -- a LightCurve data table, with  exposure (e) and counts (n),
            as well as a representation of the likelihood for each cell
        - p0 --
        """
        self.p0=p0
        self.df= df= lc
        N = self.N = len(df)
        # Invoke empirical function from Scargle 2012
        self.ncp_prior = self.p0_prior(N)

        #actual times for bin edges
        t = df.t.values
        dt = df.tw.values/2
        self.mjd = np.concatenate([t-dt, [t[-1]+dt[-1]] ] ) # put one at the end
        self.name = self.__class__.__name__
        self.setup()

    def setup(self):
        df = self.df

        # counts per cell
        self.nn = df.n.values
        assert min(self.nn)>0, 'Attempt to Include a cell with no contents'

        # edges and block_length use exposure as "time"
        e = df.e.values
        self.edges = np.concatenate([[0], np.cumsum(e)])
        self.block_length = self.edges[-1] - self.edges

    def __str__(self):
        
        return f'{self.name}: {self.N} cells, spanning {self.block_length[0]:.1f} days, prior={self.ncp_prior:.1f}'
        
    def __call__(self, R):
        """ The fitness function needed for BB algorithm
        For cells 0..R return array of length R+1 of the maximum log likelihoods for combined cells
        0..R, 1..R, ... R
        """
        # exposures and corresponding counts
        w_k = self.block_length[:R + 1] - self.block_length[R + 1]
        N_k = np.cumsum(self.nn[:R + 1][::-1])[::-1]

        # Solving eq. 26 from Scargle 2012 for maximum $\lambda$ gives
        return N_k * (np.log(N_k) - np.log(w_k))

    def fit(self):
        """Fit the Bayesian Blocks model given the specified fitness function.
        Refactored version using code from bayesian_blocks.FitnesFunc.fit
        Returns
        -------
        edges : ndarray
            array containing the (M+1) edges, in MJD units, defining the M optimal bins
        """
        # This is the basic Scargle algoritm, copied almost verbatum
        # ---------------------------------------------------------------

        # arrays to store the best configuration
        N = self.N
        best = np.zeros(N, dtype=float)
        last = np.zeros(N, dtype=int)

        # ----------------------------------------------------------------
        # Start with first data cell; add one cell at each iteration
        # ----------------------------------------------------------------
        for R in range(N):

            # evaluate fitness function
            fit_vec = self(R)

            A_R = fit_vec - self.ncp_prior
            A_R[1:] += best[:R]

            i_max = np.argmax(A_R)
            last[R] = i_max
            best[R] = A_R[i_max]

        # ----------------------------------------------------------------
        # Now find changepoints by iteratively peeling off the last block
        # ----------------------------------------------------------------
        change_points = np.zeros(N, dtype=int)
        i_cp = N
        ind = N
        while True:
            i_cp -= 1
            change_points[i_cp] = ind
            if ind == 0:
                break
            ind = last[ind - 1]
        change_points = change_points[i_cp:]

        return self.mjd[change_points]

#export
class LikelihoodFitness(CountFitness):
    """ Fitness function that uses the full likelihood
    """
    
    def __init__(self, lc,  p0=0.05, npt=50):
        self.npt = npt
        super().__init__(lc, p0)
        
    def setup(self):
        df = self.df
        N = self.N
        
        def liketable(prep):
            return prep.create_table(self.npt)
        
        self.tables = df.fit.apply(liketable).values

    def __str__(self):
        return f'{self.__class__.__name__}: {self.N} cells,  prior={self.ncp_prior:.1f}'

    def __call__(self, R):
        
        a, y  = self.tables[R]
        x = np.linspace(*a)
        y = np.zeros(self.npt)
        rv = np.empty(R+1)
        for i in range(R, -1, -1): 
            a, yi = self.tables[i]
            xi = np.linspace(*a)
            y += np.interp(x, xi, yi, left=-np.inf, right=-np.inf)
            amax = np.argmax(y)
            rv[i] =y[amax]
        return rv    

In [None]:
%nbdev_export
def doc_countfitness( fitness, light_curve_dict, source_name):
    """
    #### {class_name} test with source {source_name}
         
    Create object: `bbfitter = {class_name}(lc)`
    
    Object description:   {bbfitter}
    
    Then `bbfitter({n})` returns the values
        {values}
   
    Finally, the partition algorithm, 'bbfitter.fit()' returns {cffit}
    
    """
    
    lc = light_curve_dict[source_name]
    bbfitter = fitness(lc)
    class_name = bbfitter.name
    n = 10
    values  = np.array(bbfitter(n)).round(1)    
    cffit = bbfitter.fit()
    
    return locals()

In [None]:
%nbdev_export
def get_bb_partition(config, lc, fitness_class=LikelihoodFitness, p0=0.05, key=None, clear=False):    

    """Perform Bayesian Block partition of the cells found in a light curve
    
    - lc : input LightCurve object or DataFrame with fit cells
    - fitness_class 
    
    return edges for partition
    """
    assert issubclass(fitness_class,CountFitness), 'fitness_class wrong'
    if not isinstance(lc, pd.DataFrame):
        lc = lc.dataframe
    assert 'fit' in lc.columns, 'Expect the dataframe to have the Poisson fit object'


    def doit():
        fitness = fitness_class(lc, p0=p0)
        # Now run the astropy Bayesian Blocks code using my version of the 'event' model
        return fitness.fit() 
        
    key = f'BB_edges_' if key is '' else key
    
    edges = config.cache(key, doit,  description=key if config.verbose>0 else '', overwrite=clear)
    
    if config.verbose>0:
        print(f'Partitioned {len(lc)} cells into {len(edges)-1} blocks, using {fitness_class.__name__} ' )
    return edges

In [None]:
show_doc(get_bb_partition, title_level=2)

<h2 id="get_bb_partition" class="doc_header"><code>get_bb_partition</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h2>

> <code>get_bb_partition</code>(**`config`**, **`lc`**, **`fitness_class`**=*`'LikelihoodFitness'`*, **`p0`**=*`0.05`*, **`key`**=*`None`*, **`clear`**=*`False`*)

Perform Bayesian Block partition of the cells found in a light curve

- lc : input LightCurve object or DataFrame with fit cells
- fitness_class 

return edges for partition

In [None]:
%nbdev_export
def bb_overplot(config, lc, bb_fit, ax=None, source_name=None, **kwargs):
    """Plot light curve: cell fits with BB overplot
    """
    import matplotlib.pyplot as plt
    colors = kwargs.pop('colors', ('lightblue', 'wheat', 'blue'))
    fig, ax = plt.subplots(1,1, figsize=(12,4)) if not ax else (ax.figure, ax)
    flux_plot(config, lc, ax=ax, colors=colors, source_name=source_name,  **kwargs)
    flux_plot(config, bb_fit, ax=ax, step=True, step_label='BB overlay', zorder=10,**kwargs)
    fig.set_facecolor('white')

In [None]:
show_doc(bb_overplot,title_level=2)

<h2 id="bb_overplot" class="doc_header"><code>bb_overplot</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h2>

> <code>bb_overplot</code>(**`config`**, **`lc`**, **`bb_fit`**, **`ax`**=*`None`*, **`source_name`**=*`None`*, **\*\*`kwargs`**)

Plot light curve: cell fits with BB overplot
    

### Fit blocks and overplot

The following code shows how to refit, and demonstrates the overplot function.

In [None]:
%nbdev_export
class BBanalysis(LightCurve):
    """
    """
    
    def applyBB(self, key=None, clear=False):
        """ Apply the Bayesian Blocks algorithm to partition the current set of cells into blocks,
        then create a new set of cells and fit them
        
        - key : cache key. None, defaul to not use the cache
        - clear : if True, clear the cache for this key
        """

        self.bb_edges  = get_bb_partition(self.config, self.lc_df,  key=key, clear=clear) 
        
        self.bb_cells = partition_cells(self.config, self.cells, self.bb_edges)
        self.bb_fit = fit_cells(self.config, self.bb_cells, )
        
    def plot_BB(self, ax=None, **kwargs):
        """Plot the light curve with BB overplot
        """
        import matplotlib.pyplot as plt
        if not hasattr(self, 'bb_cells'):
            self.applyBB()
        figsize = kwargs.pop('figsize', (12,4))
        fignum = kwargs.pop('fignum', 1)
        ts_min = kwargs.pop('ts_min',-1)
        source_name =kwargs.pop('source_name', self.source_name)
        fig, ax = ig, ax = plt.subplots(figsize=figsize, num=fignum) if ax is None else (ax.figure, ax)
        bb_overplot(self.config, self.lc_df, self.bb_fit, ax=ax, ts_min=ts_min,
                    source_name=source_name, **kwargs)
        fig.set_facecolor('white')
        return fig
    
    def bb_table(self, **kwargs):
        """ Return a table of fluxes for the BB analysis"""
        return self.flux_table(self.bb_fit, **kwargs)
        
BBA = BBanalysis

In [None]:
show_doc(BBanalysis)

<h2 id="BBanalysis" class="doc_header"><code>class</code> <code>BBanalysis</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>BBanalysis</code>(**\*`pars`**, **\*\*`kwargs`**) :: [`LightCurve`](/wtlikelightcurve#LightCurve)

    

## Test/Demonstration with 3C 279

In [None]:
%nbdev_hide
# bba = BBA('3C 279')

# bba.exposure_fator

# bba2 = bba.view((57186, 57191, 0.25/24))

# bba2.plot_flux(ts_min=5, fmt='o');

# bba3 = bba.view((57186, 57191, -1))

# bba3.plot_flux(fmt='o', tzero=57187);

In [None]:
%nbdev_collapse_input
bba=bba2=None

def demo():
    """
    ### 3C 279 Demonstration
    {out1}    {fig1}
    
    Expand to see position of a flare:
    {fig2}
    {out3}    {fig3}
    {out4}    {fig4}
    
    The bb fits
    {bb_table}
    
    Figure 4 From [Kerr paper](https://arxiv.org/pdf/1910.00140.pdf)
    
    """
    global bba, bba2
    
    with capture_print('Create full light curve') as out1:
        bba = BBA('3C 279')
    
    fig1 = figure(
        bba.plot_flux(yscale='log', ylim=(0.2,20), ts_min=4, figsize=(15,5), xlabel='MJD', fignum=1),
        width=600)  
    
    fig2 = figure(
        bba.plot_flux(  figsize=(15,5), xlabel='MJD', fmt='o', fignum=2,
            xlim=(57100, 57300),),
        width=600)  
    
    with capture_print('Define subset around large flare at MJD 57189') as out3:
        bba2 = bba.view((57186, 57191, 0.25/24))
    fig3 = figure(
        bba2.plot_flux(fmt='o', tzero=57186,  ts_min=4,
                       #   flux_factor=bba2.exposure_factor,
                       fignum=3 ),
            width=600)
    
    with capture_print('Apply BB to subset and replot') as out4:
        bba2.applyBB(key='flare', clear=True)
    fig4 = figure(
        bba2.plot_BB(fmt='o', tzero=57186,  ts_min=4, 
                     #    flux_factor=bba2.exposure_factor, 
                     fignum=4),
                width=600)
    
    bb_table = bba2.bb_table()
    
    kerr_fig4 = image('kerr_fig4.png', width=200, caption=None)
    return locals()

nbdoc(demo)

### 3C 279 Demonstration
<details  class="nbdoc-description" >  <summary> Create full light curve </summary>  <div style="margin-left: 5%"><pre>photons and exposure for 3C 279: Restoring from cache with key "3C 279__data"<br>BBanalysis: Source 3C 279 with:<br>	 data:       179,695 photons from   2008-08-04 to 2021-04-21<br>	 exposure: 2,817,749 intervals from 2008-08-04 to 2021-04-20<br>Bin photon data into 663 1-week bins from 54683.0 to 59324.0<br>Loaded 656 / 663 cells with at least 1 photons and exposure &gt; 1e-06 for light curve analysis<br></pre></div> </details>    <div class="nbdoc_image">
<figure style="margin-left: 5%" title="Figure 1">  <a href="images/demo_fig_01.png" title="images/demo_fig_01.png">    <img src="images/demo_fig_01.png" alt="Figure 1 at images/demo_fig_01.png" width=600>   </a> </figure>
</div>


Expand to see position of a flare:
<div class="nbdoc_image">
<figure style="margin-left: 5%" title="Figure 2">  <a href="images/demo_fig_02.png" title="images/demo_fig_02.png">    <img src="images/demo_fig_02.png" alt="Figure 2 at images/demo_fig_02.png" width=600>   </a> </figure>
</div>

<details  class="nbdoc-description" >  <summary> Define subset around large flare at MJD 57189 </summary>  <div style="margin-left: 5%"><pre>Bin photon data into 480 15-min bins from 57186.0 to 57191.0<br>Loaded 165 / 480 cells with at least 1 photons and exposure &gt; 1e-06 for light curve analysis<br></pre></div> </details>    <div class="nbdoc_image">
<figure style="margin-left: 5%" title="Figure 3">  <a href="images/demo_fig_03.png" title="images/demo_fig_03.png">    <img src="images/demo_fig_03.png" alt="Figure 3 at images/demo_fig_03.png" width=600>   </a> </figure>
</div>

<details  class="nbdoc-description" >  <summary> Apply BB to subset and replot </summary>  <div style="margin-left: 5%"><pre>flare: Saving to cache<br>Partitioned 165 cells into 9 blocks, using LikelihoodFitness <br>Loaded 9 / 9 cells for fitting<br></pre></div> </details>    <div class="nbdoc_image">
<figure style="margin-left: 5%" title="Figure 4">  <a href="images/demo_fig_04.png" title="images/demo_fig_04.png">    <img src="images/demo_fig_04.png" alt="Figure 4 at images/demo_fig_04.png" width=600>   </a> </figure>
</div>


The bb fits
<div>
<style scoped>
    .dataframe tbody tr th:only-of-type {
        vertical-align: middle;
    }

    .dataframe tbody tr th {
        vertical-align: top;
    }

    .dataframe thead th {
        text-align: right;
    }
</style>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>t</th>
      <th>tw</th>
      <th>n</th>
      <th>flux</th>
      <th>ts</th>
      <th>errors</th>
      <th>limit</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>57186.641</td>
      <td>1.052</td>
      <td>45</td>
      <td>1.863</td>
      <td>77.500</td>
      <td>(-0.367, 0.405)</td>
      <td>2.592</td>
    </tr>
    <tr>
      <td>57187.891</td>
      <td>1.448</td>
      <td>642</td>
      <td>24.765</td>
      <td>4346.900</td>
      <td>(-0.996, 1.023)</td>
      <td>26.489</td>
    </tr>
    <tr>
      <td>57188.812</td>
      <td>0.396</td>
      <td>284</td>
      <td>18.307</td>
      <td>1816.700</td>
      <td>(-1.109, 1.154)</td>
      <td>20.276</td>
    </tr>
    <tr>
      <td>...</td>
      <td>...</td>
      <td>...</td>
      <td>...</td>
      <td>...</td>
      <td>...</td>
      <td>...</td>
    </tr>
    <tr>
      <td>57189.646</td>
      <td>0.083</td>
      <td>146</td>
      <td>23.384</td>
      <td>1058.600</td>
      <td>(-1.922, 2.031)</td>
      <td>26.899</td>
    </tr>
    <tr>
      <td>57189.948</td>
      <td>0.521</td>
      <td>259</td>
      <td>12.211</td>
      <td>1440.700</td>
      <td>(-0.782, 0.815)</td>
      <td>13.605</td>
    </tr>
    <tr>
      <td>57190.589</td>
      <td>0.760</td>
      <td>212</td>
      <td>3.677</td>
      <td>660.900</td>
      <td>(-0.291, 0.304)</td>
      <td>4.199</td>
    </tr>
  </tbody>
</table>
</div>

Figure 4 From [Kerr paper](https://arxiv.org/pdf/1910.00140.pdf)


In [None]:
show_doc(BBanalysis)

<h2 id="BBanalysis" class="doc_header"><code>class</code> <code>BBanalysis</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>BBanalysis</code>(**\*`pars`**, **\*\*`kwargs`**) :: [`LightCurve`](/wtlikelightcurve#LightCurve)

    

In [None]:
# hide
from nbdev.export import notebook2script
notebook2script()
!date

Converted 00_config.ipynb.
Converted 01_effective_area.ipynb.
Converted 02_data_man.ipynb.
Converted 03_source_data.ipynb.
Converted 03_weights.ipynb.
Converted 06_poisson.ipynb.
Converted 07_cell_data.ipynb.
Converted 08_loglike.ipynb.
Converted 09_lightcurve.ipynb.
Converted 10_simulation.ipynb.
Converted 14_bayesian.ipynb.
Converted index.ipynb.
Wed May  5 16:21:42 PDT 2021
