In [None]:
# default_exp cell_data
from nbdev import *

%reload_ext autoreload
%autoreload 2

# Manage cell data
> Create cells from source data

In [None]:
# export
import os
import numpy as np
import pandas as pd
from wtlike.config import *
from wtlike.source_data import *
from wtlike.loglike import LogLike, PoissonRep

In [None]:
# export

class CellData(SourceData):
    """Manage a set of cells generated from a data set
    
        Invoke superclass to load photon data and exposure for the source.
 
        * time_bins, default config.time_bins
        
        
        Note that the `e`  cell entry is the actual exposure for the cell in units $cm^2\ s$, times $10^{-6}$.
        """
    
    def __init__(self, *pars, **kwargs): 
        """

        """
        bins = kwargs.pop('bins', kwargs.pop('time_bins', Config().time_bins))
        #  load source data
        super().__init__(*pars, **kwargs )


        self.rebin(bins)
        self.parent = None

    def rebin(self, newbins):
        """bin, or rebin 
        """
        photon_data = self.photons
        self.cell_edges = edges = time_bin_edges(self.config, self.exposure, newbins)
        if self.config.verbose>0:
            step = newbins[2]
            self.step_name = 'orbit-based' if step<=0 else bin_size_name(step)
            print(f'CellData: Bin photon data into {int(len(edges)/2)} {self.step_name}'\
                  f' bins from {edges[0]:.1f} to {edges[-1]:.1f}')
        
        # note need to take care of interleave
        self.binexp = self.binned_exposure( edges ) [0::2] 

        
#         #self.fexposure=(expose/self.exptot).astype(np.float32)
#         self.fexposure = expose 
        
        self.get_cells()
        
    def get_cells(self, exposure_factor=1e-6):
        """
        Generate the cell DataFrame
        
        - exposure_factor --  recast exposure as cm^2 * Ms if $10^{-6}$ 
        
        Thus the `e`  cell entry is the actual exposure for the cell in units $cm^2\ Ms$.
        """
        # restrict photons to range of bin times
        photons = self.photons.query(f'{self.cell_edges[0]}<time<{self.cell_edges[-1]}')
        
        # use photon times to get indices into photon list
        edges = np.searchsorted(photons.time, self.cell_edges)
        
        wts = photons.weight.values
        start,stop = self.cell_edges[0::2], self.cell_edges[1::2]
        center = (start+stop)/2
        width = (stop-start)
        cells = []
        ek = np.append(edges[0::2], edges[-1])
        etot = self.exptot*exposure_factor

        Sk, Bk = self.S/etot, self.B/etot

        for k, (t, tw, e) in enumerate( zip(
                    center, width, self.binexp*exposure_factor) ):
            w = wts[ek[k]:ek[k+1]] 
            n = len(w)
            cells.append(dict(t=t, tw=tw, 
                              e=e,
                              n=n,
                              w=w,
                              S=e*Sk,
                              B=e*Bk,
                             )
                        )
        self.cells =  pd.DataFrame(cells)
        return self.cells

    def update(self): pass # virtual
    
    def view(self, newbins=None):
        """Return a "view": a new instance of this class with a perhaps a different set of cells
        
        - newbins -- a tuple (start, stop, step) to define new binning.
          - start and stop are either MJD values, or offsets from the start or stop.
          - step -- the cell size in days, or if zero, orbit-based binning
        """
        import copy
        if self.config.verbose>1:
            print(f'Making a view of the class {self.__class__}')
        r = copy.copy(self)

        if newbins is not None:
            r.rebin(newbins)
        r.parent = self
        r.update()
        return r
        
#### needs fixxing    
#     def __repr__(self):
#         return f'''{self.__class__}:
#         {len(self.fexposure)} intervals from {self.cell_edges[0]:.1f} to {self.cell_edges[-1]:.1f} for source {self.source_name}
#         S {self.S:.2f}  B {self.B:.2f} '''

    
    def concatenate( self ):
        """
        Combine this set of cells to one
        Return a dict with summed n, S, B, and concatenated w
        """

        cells = self.cells
        
        newcell = dict()

        if 't' in cells:
            ca, cb =cells.iloc[0], cells.iloc[-1]
            newcell.update(dict(t= 0.5*(ca.t-ca.tw/2 + cb.t+cb.tw/2), tw=cb.t-ca.t ))

        for col in ' n S B'.split():
            newcell[col] = cells[col].sum()
        newcell['w'] = np.concatenate(list(cells.w.values))
        return newcell
    
        
    def full_likelihood(self ):
        """Concatentate all the cells, return a LogLike object
        """
        return LogLike(self.concatenate()) 
    
    def plot_concatenated(self, fignum=1, **kwargs):
        """Likelihood function, with fit for concatenated data
        """
        import matplotlib.pyplot as plt
        lka = self.full_likelihood()
        fig,ax = plt.subplots(figsize=(4,2), num=fignum)
        lka.plot(ax=ax, **kwargs) 
        return fig

In [None]:
# hide

# cd = CellData('Geminga')

# cd.plot_concatenated(xlim=(0.99, 1.01), title=f'{cd.source.name}');
# print('Parmeters from Poisson fit')
# L = cd.full_likelihood()
# pr = PoissonRep(L)
# print(pd.Series(pr.info()))

# (cd.cells.n/cd.cells.e).describe()

In [None]:
show_doc(CellData)
show_doc(CellData.view)

<h2 id="CellData" class="doc_header"><code>class</code> <code>CellData</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>CellData</code>(**\*`pars`**, **\*\*`kwargs`**) :: [`SourceData`](/wtlikesource_data#SourceData)

Manage a set of cells generated from a data set

Invoke superclass to load photon data and exposure for the source.

* time_bins, default config.time_bins


Note that the `e`  cell entry is the actual exposure for the cell in units $cm^2\ s$, times $10^{-6}$.

<h4 id="CellData.view" class="doc_header"><code>CellData.view</code><a href="__main__.py#L88" class="source_link" style="float:right">[source]</a></h4>

> <code>CellData.view</code>(**`newbins`**=*`None`*)

Return a "view": a new instance of this class with a perhaps a different set of cells

- newbins -- a tuple (start, stop, step) to define new binning.
  - start and stop are either MJD values, or offsets from the start or stop.
  - step -- the cell size in days, or if zero, orbit-based binning

In [None]:
# export
def concatenate_cells( cells):
    """
    Combine a group of cells to one
    - cells: dataframe with cells containing  n, w, S, B<br>
            Optionally, if $t$ is present, generate t and tw
    Return a dict with summed n, S, B, and concatenated w
    """
    newcell = dict()
    if 't' in cells:
        ca, cb =cells.iloc[0], cells.iloc[-1]
        newcell.update(dict(t= 0.5*(ca.t-ca.tw/2 + cb.t+cb.tw/2), tw=cb.t-ca.t ))

    for col in ' n S B'.split():
        newcell[col] = cells[col].sum()
    newcell['w'] = np.concatenate(list(cells.w.values))
    return newcell

In [None]:
# export
def partition_cells(config, cells, edges):
    """ Partition a set of cells
     - cells -- A DataFrame of cells
     - edges  -- a list of edge times delimiting boundaries between cells
     
    Returns a DataFrame of combined cells, with times and widths adjusted to account for missing cells
    
    """
    # get indices of  cell idexes just beyond each edge time
    ii = np.searchsorted(cells.t, edges)
    
    # Get the appropriate boundary times to apply to combined cells
    # this is complicated by missing cells, need to put boundary in gaps if ncessary
    ileft = ii[:-1]
    cleft = cells.iloc[ileft ]
    tleft =  (cleft.t - cleft.tw/2).values
    iright = ii[1:]-1
    cright = cells.iloc[iright ]  
    tright = (cright.t+cright.tw/2).values
    betweens = 0.5*(tleft[1:] + tright[:-1])
    tboundary = np.append(np.insert(betweens, 0, tleft[0]), tright[-1])
    
    # now combine the cells, 
    newcells = []
    for k in range(len(ii)-1):
        a,b = ii[k:k+2]
        subset = cells.iloc[a:b]; 

#         ca, cb = subset.iloc[0], subset.iloc[-1]
#         newcell = dict(t= 0.5*(ca.t-ca.tw/2 + cb.t+cb.tw/2)  )
        tl, tr = tboundary[k:k+2]
        newcell = dict(t=0.5*(tl+tr), tw=tr-tl)
        
        for col in 'e n S B'.split():
            newcell[col] = subset[col].sum()
        newcell['e'] /= len(subset)
        newcell['w'] = np.concatenate(list(subset.w.values)) #np.array(w, np.uint8)
        newcells.append(newcell)
    return pd.DataFrame(newcells)      

In [None]:
show_doc(partition_cells)

<h4 id="partition_cells" class="doc_header"><code>partition_cells</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>partition_cells</code>(**`config`**, **`cells`**, **`edges`**)

Partition a set of cells
 - cells -- A DataFrame of cells
 - edges  -- a list of edge times delimiting boundaries between cells
 
Returns a DataFrame of combined cells, with times and widths adjusted to account for missing cells

In [None]:
# hide
from nbdev.export import notebook2script
notebook2script()
!date

Converted 00_config.ipynb.
Converted 01_data_man.ipynb.
Converted 02_effective_area.ipynb.
Converted 03_weights.ipynb.
Converted 04_simulation.ipynb.
Converted 05_source_data.ipynb.
Converted 06_poisson.ipynb.
Converted 07_loglike.ipynb.
Converted 08_cell_data.ipynb.
Converted 09_lightcurve.ipynb.
Converted 14_bayesian.ipynb.
Converted 90-main.ipynb.
Converted 99_tutorial.ipynb.
Converted index.ipynb.
Mon May 10 17:22:19 PDT 2021
