In [None]:
from nbdev import *
# default_exp source_data
%reload_ext autoreload
%autoreload 2

# Source Data analysis
> Analyze the selected data

### Overview

For a point source, and data selection, the class `SourceData` is the user interface to source-oriented data. It relies on the modules `select_data` for actual data, or `simulation` for simulated data. 


In [None]:
# export
import os, sys
import numpy as np
import pandas as pd
from wtlike.config import *
from wtlike.select_data import *
from wtlike.simulation import *

In [None]:
# export


class SourceData(object):
    """ Load the photon data near the source and associated exposure.

    Either from:
      1. `config.wtlike_data/'data_files'`, the Path to folder with list of pickle files
      2. the cache, with key `{source.name}_data`

    * source : name, PointSource, or Simulation
    * `config` : basic configuration
    * `source` : PointSource object if specified
    * `clear` : if set, overwrite the cached results

    Calculate the values for

    * S, B : sums of w and 1-w
    * exptot : total associated exposure
    """

    def __init__(self, source, config=None,  clear=False,
                 week_range=None, key=''):
        """

        """

        self.config = config if config else Config()
        assert self.config.valid
        self.verbose = self.config.verbose
        self.simulated=False

        ## source is either a name, a PointSource object, or a Simulation
        if type(source)==str:

            try:
                self.source = PointSource(source)
            except Exception as e:
                print(f'{e}', file=sys.stderr)
                raise

            self.source_name = self.source.name

        elif isinstance(source, PointSource):
            self.source = source # do I need this?
            self.source_name = source.name

        elif isinstance(source, Simulation):
            self.simulated=True
            self.source=None
            self.source_name = source.name
            # can put this into cache
            source.run()
            self.photons = source.photons
            self.exposure = source.exposure

        if self.source is not None:
            key = f'{self.source.filename}_data' if key=='' else key
            self.source.data_key = key
        else: # no cache for sim, yet
            key=None


        if not self.simulated:
            # either load from data, or from a chache
            if self.config.wtlike_data/'data_files' is None and key not in config.cache:
                raise Exception(f'Data for {self.source_name} is not cached, and config.wtlike_data/"data_files" is not set')

            if week_range is not None:
                # always load directly if weeks specified
                r = load_from_weekly_data(self.config, self.source, week_range=week_range)
            else:
                r = self.config.cache(key,
                            load_from_weekly_data, self.config, self.source, week_range=None,
                            overwrite=clear,
                            description=f'SourceData: photons and exposure for {self.source_name}')
            photons, self.exposure = r[:2]
            self.runs = r[2] if len(r)==3 else None
            # get the photon data with good weights, not NaN (maybe remove small weigts, too)
            good = np.logical_not(np.isnan(photons.weight))
            self.photons = photons.loc[good]

        else: #TODO
            pass

        # make range of MJD or days available
        self.start = self.exposure.start[0]
        self.stop =  self.exposure.stop.values[-1]
        self.exptot = self.exposure.exp.sum()

        # estimates for signal and background counts in total exposure
        w = self.photons.weight
        self.S = np.sum(w)
        self.B = np.sum(1-w)

        if self.verbose>0:
            print(SourceData.__repr__(self))

    def rates(self):
        print(f'Average fluxes for {self.source_name}: signal {self.S/self.exptot:.2e}/s, background {self.B/self.exptot:.2e}/s')

    def __repr__(self):
        time = self.photons.time.values

        exp = self.exposure
        days  = np.sum(exp.stop-exp.start); secs = days*24*3600
        exp_text = f' average flux {self.exptot/secs:.0f} cm^2 for {secs/1e6:.1f} Ms'

        if not self.simulated:
            photon_text = f'photons from {UTC(time[0])[:10]} to {UTC(time[-1])[:10]}'
        else:
            photon_text = f'simulated photons over {days:.1f} days.'

        r = f'SourceData: Source {self.source_name} with:'\
            f'\n\t data:     {len(self.photons):9,} {photon_text}'\
            f'\n\t exposure: {len(self.exposure):9,} intervals, {exp_text}'

        self.src_flux, self.bkg_flux = self.S/self.exptot,  self.B/self.exptot
        r+= f'\n\t rates:  source {self.src_flux:.2e}/s, background {self.bkg_flux:.2e}/s,'\
            f' S/N ratio {self.src_flux/self.bkg_flux:.2e}'

        return r

    def binned_exposure(self, time_edges):
        """Bin the exposure

        - time_bins: list of edges.
        """
        return binned_exposure(self.config, self.exposure,  time_edges)

    def binned_cos_theta(self, time_bins=None):
        """ Calculate average cosine of angle with respect to bore axis, per time bin
        """
        if time_bins is None:
            time_bins = get_default_bins(self.config, self.exposure)
        df = self.exposure.copy()
        estop =df.stop.values
        df.loc[:,'tbin'] =np.digitize(estop, time_bins)
        ct = df.groupby('tbin').mean()['cos_theta']
        return ct, time_bins

    def weight_histogram(self, nbins=1000, key=''):
        """ return a weight distribution
        """
        def doit(nbins):
            return np.histogram(self.p_df.weight.values, np.linspace(0,1,nbins+1))[0]

        key = f'{self.source_name}_weight_hist' if key=='' else key
        description = f'Weight histogram for {self.source_name}' if self.config.verbose>0 else ''
        return self.config.cache(key, doit, nbins, description=description)

    def plot_data(self):
        import matplotlib.pyplot as plt
        if self.simulated:
            print(f'Simulated!')
            fig, (ax1, ax4) = plt.subplots(1,2, figsize=(8,4))
            ax1.hist(self.photons.time.values, 500, histtype='step');
            ax1.set(xlabel='Time (MJD)')

            ax4.hist(self.photons.weight, 100, histtype='step')
            ax4.set(xlabel='weight');


        else:
            fig, (ax1,ax2, ax3,ax4) = plt.subplots(1,4, figsize=(15,4))
            ax1.hist(self.photons.time.values, 100, histtype='step');
            ax1.set(xlabel='Time (MJD)')
            ax2.hist(self.photons.radius.values**2, 100, histtype='step', log=True);
            ax2.set(xlabel='Radius**2 (deg**2)', ylim=(100, None));

            ax3.hist(self.photons.band, 32, histtype='step', log=True);
            ax3.set(xlabel='Band index')
            ax4.hist(self.photons.weight, 100, histtype='step')
            ax4.set(xlabel='weight');

    def update_cache(self, **kwargs): #week_range=(-1,None), save=True):
        return update_cache(self, **kwargs)

In [None]:
#hide
config=Config()
config.verbose=1
source = PointSource('PSR B1259-63') #'4FGL J1257.0-6339')
sd = SourceData( source,config)

# update_cache(sd,)

SourceData: photons and exposure for PSR B1259-63: Restoring from cache with key "PSR_B1259-63_data"
SourceData: Source PSR B1259-63 with:
	 data:       881,233 photons from 2008-08-04 to 2021-06-04
	 exposure: 3,368,542 intervals,  average flux 3478 cm^2 for 100.6 Ms
	 rates:  source 8.18e-09/s, background 2.51e-06/s, S/N ratio 3.26e-03


In [None]:
#hide
sim = Simulation('test_sim', src_flux=1e-6, tstart=0, tstop=1, )
simsd = SourceData(sim)

generated 522 photons
SourceData: Source test_sim with:
	 data:           522 simulated photons over 1.0 days.
	 exposure:       288 intervals,  average flux 3000 cm^2 for 0.1 Ms
	 rates:  source 9.52e-07/s, background 1.06e-06/s, S/N ratio 8.97e-01


In [None]:
#hide
# config=Config(); config.verbose=2
# source = PointSource('4FGL J1257.0-6339', nickname='J1257')
# sd = SourceData(source, config=config, week_range=None, key=None)
#sd = SourceData('Geminga');


# b8 = sd.photons.query('band==12')
# plt.semilogy(b8.radius, b8.weight, '.');

In [None]:
show_doc(SourceData)
show_doc(SourceData.binned_exposure)

<h2 id="SourceData" class="doc_header"><code>class</code> <code>SourceData</code><a href="" class="source_link" style="float:right">[source]</a></h2>

> <code>SourceData</code>(**`source`**, **`config`**=*`None`*, **`clear`**=*`False`*, **`week_range`**=*`None`*, **`key`**=*`''`*)

Load the photon data near the source and associated exposure. 

Either from:
  1. `config.wtlike_data/'data_files'`, the Path to folder with list of pickle files
  2. the cache, with key `{source.name}_data`

* source : name, PointSource, or Simulation
* [`config`](/wtlikeconfig) : basic configuration
* `source` : PointSource object if specified
* `clear` : if set, overwrite the cached results

Calculate the values for

* S, B : sums of w and 1-w
* exptot : total associated exposure

<h4 id="SourceData.binned_exposure" class="doc_header"><code>SourceData.binned_exposure</code><a href="__main__.py#L124" class="source_link" style="float:right">[source]</a></h4>

> <code>SourceData.binned_exposure</code>(**`time_edges`**)

Bin the exposure

- time_bins: list of edges.  

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()
!date

Converted 00_config.ipynb.
Converted 01_data_man.ipynb.
Converted 02_effective_area.ipynb.
Converted 03_weights.ipynb.
Converted 04_exposure.ipynb.
Converted 04_select_data.ipynb.
Converted 04_simulation.ipynb.
Converted 05_source_data.ipynb.
Converted 06_poisson.ipynb.
Converted 07_loglike.ipynb.
Converted 08_cell_data.ipynb.
Converted 09_lightcurve.ipynb.
Converted 14_bayesian.ipynb.
Converted 90_main.ipynb.
Converted 99_tutorial.ipynb.
Converted index.ipynb.
Sun Jun  6 05:05:39 PDT 2021
