In [None]:
# default_exp photon_data
%load_ext autoreload
%autoreload 2
from nbdev.showdoc import show_doc

# Photon Data

> Load timed photon data

Loads the photon data around a source, subject to a GTI.

The function `get_photon_data` loads a data dataset of photons in a cone about a specified source, with time, position and energy info, the latter two binned.

These are retrieved from a set of pickled dicts, one per week.

In [None]:
#hide

from nbdev.showdoc import *

In [None]:
#export
import os, sys
import healpy
import pickle
import pandas as pd
import numpy as np
import pyarrow.parquet as pq
from wtlike.config import *
from wtlike.load_gti import get_gti

In [None]:
#export
def _load_photon_data(config, table, tstart, 
                      conepix, gti, center, band_limits, radius, 
                      nest=True):
    """For a given month table, select photons in cone, add tstart to times,
    return DataFrame with band, time, pixel, radius
    """
    allpix = np.array(table.column('nest_index'))

    def cone_select(allpix, conepix, shift=None):
        """Fast cone selection using NEST and shift
        """
        if shift is None:
            return np.isin(allpix, conepix)
        assert nest, 'Expect pixels to use NEST indexing'
        a = np.right_shift(allpix, shift)
        c = np.unique(np.right_shift(conepix, shift))
        return np.isin(a,c)

    # a selection of all those in an outer cone
    incone = cone_select(allpix, conepix, 13)

    # times: convert to double, add to start, convert to MJD
    time = MJD(np.array(table['time'],float)[incone]+tstart)
    in_gti = gti(time)
    if np.sum(in_gti)==0:
        print(f'WARNING: no photons in GTI for month {month}!', file=sys.stderr)

    pixincone = allpix[incone][in_gti]

    # distance from center for all accepted photons
    ll,bb = healpy.pix2ang(config.nside, pixincone,  nest=nest, lonlat=True)
    cart = lambda l,b: healpy.dir2vec(l,b, lonlat=True)
    t2 = np.degrees(np.array(np.sqrt((1.-np.dot(center, cart(ll,bb)))*2), np.float32))

    # assemble the DataFrame, remove those outside the radius
    out_df = pd.DataFrame(np.rec.fromarrays(
        [np.array(table['band'])[incone][in_gti], time[in_gti], pixincone, t2],
        names='band time pixel radius'.split()))

    # apply final selection for radius and energy range

    if band_limits is None: return out_df.query(f'radius<{radius}')

    return out_df.query(f'radius<{radius} & {band_limits[0]} < band < {band_limits[1]}')

In [None]:
#export
def _get_photons(config, source, nest=True):
    # check GTI
    gti = get_gti(config)

    # cone geometry stuff: get corresponding pixels and center vector
    l,b,radius = source.l, source.b, config.radius
    cart = lambda l,b: healpy.dir2vec(l,b, lonlat=True)
    conepix = healpy.query_disc(config.nside, cart(l,b), np.radians(radius), nest=nest)
    center = healpy.dir2vec(l,b, lonlat=True)

    ebins = config.energy_edges
    ecenters = np.sqrt(ebins[:-1]*ebins[1:]);
    band_limits = 2*np.searchsorted(ecenters, config.energy_range) if config.energy_range is not None else None


    # get the monthly-partitioned dataset and tstart values
    datapath = config.files.data
    dataset = datapath/'dataset'
    tstart_dict= pickle.load(open(datapath/'tstart.pkl', 'rb'))
    months = tstart_dict.keys()

    if config.verbose>0:
        print(f'Loading  {len(months)} months from Arrow dataset {dataset}\n', end='')

    dflist=[]
    for month, tstart in tstart_dict.items(): #months:
        table= pq.read_table(dataset, filters=[f'month == {month}'.split()])

        d = _load_photon_data(config, table, tstart, 
                              conepix, gti, center, band_limits, radius,  
                              nest)
        if d is not None:
            dflist.append(d)
            if config.verbose>1: print('.', end='')
        else:
            if config.verbose>1: print('x', end='')
            continue

    assert len(dflist)>0, '\nNo photon data found?'
    df = pd.concat(dflist, ignore_index=True)
    return df

In [None]:
#export
def get_photon_data(config: 'configuration data',
                    source: 'Source data',
                    key='',
                    ):
    """
    Parameters:
    
    - `source` -- `PointSource` object
    - `key` [''] cache key -- default, use "photons_name", set to None to ignore cache 
    
    Steps:
    -  Read photon data from a Parquet dataset, 
    -  select cone around the source
    -  use exposure to add exposures
    -  return DataFrame with columns `band time pixel radius`
    """
        
    key = f'photons_{source.name}' if key=='' else key
    
    if config.verbose>0 and key is not None: 
        print(f'Photon data: {"Saving to" if key not in config.cache else "Restoring from"} cache with key "{key}"')

    df = config.cache(key, _get_photons, config, source)
    
    if config.verbose>0:
        emin,emax = config.energy_range or (config.energy_edges[0],config.energy_edges[-1])
        print(f'\n\tSelected {len(df):,} photons within {config.radius}'\
              f' deg of  ({source.l:.2f},{source.b:.2f})')
        print(f'\tEnergies: {emin:.1f}-{emax:.0f} MeV')
        ta,tb = df.iloc[0].time, df.iloc[-1].time
        print(f'\tDates:    {UTC(ta):16} - {UTC(tb)}'
            f'\n\tMJD  :    {ta:<16.1f} - {tb:<16.1f}')
    
    return df

In [None]:
show_doc(get_photon_data)

<h4 id="get_photon_data" class="doc_header"><code>get_photon_data</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>get_photon_data</code>(**`config`**:`configuration data`, **`source`**:`Source data`, **`key`**=*`''`*)

Parameters:

- `source` -- [`PointSource`](/wtlike/config.html#PointSource) object
- `key` [''] cache key -- default, use "photons_name", set to None to ignore cache 

Steps:
-  Read photon data from a Parquet dataset, 
-  select cone around the source
-  use exposure to add exposures
-  return DataFrame with columns `band time pixel radius`

### Test reading from a dataset

In [None]:


config = Config(verbose=3)
if config.valid:
    source = PointSource('Geminga')
    print(f'Test loading a photon data set, for source {source.name}')
    photon_data = get_photon_data(config,  source, key=None)
    print(f'Head of the table, length {len(photon_data):,}:\n{photon_data.head()}')
else:
    print('Not testing since no files.')

Test loading a photon data set, for source Geminga
Loading  132 months from Arrow dataset /home/burnett/data/dataset
....................................................................................................................................
	Selected 1,313,726 photons within 5 deg of  (195.13,4.27)
	Energies: 100.0-1000000 MeV
	Dates:    2008-08-04 15:46 - 2019-08-03 01:17
	MJD  :    54682.7          - 58698.1         
Head of the table, length 1,313,726:
   band          time    pixel    radius
0     6  54682.657022  6738278  0.698381
1     3  54682.657934  6761152  2.498099
2     4  54682.658637  6739138  0.290310
3     1  54682.658760  6714890  3.276757
4    11  54682.659099  6736721  4.899003


#### Parquet conversion code

Copied here, needs integration. 
Don't know how to make more of those "time_info" filesm, but the data come from the binned FITS files. 
Need to modify this to read from the FITS files

In [None]:
# #hide
# #####################################################################################
# #    Parquet code -- TODO just copied, need to test.
# #           from notebooks/code development/parquet_writer.ipynb

# class ParquetConversion(object):
#     import glob, pickle;
#     import healpy
#     import pyarrow as pa
#     import pyarrow.parquet as pq
    
#     def __init__(self, 
#                  data_file_pattern ='$FERMI/data/P8_P305/time_info/month_*.pkl',
#             dataset = '/nfs/farm/g/glast/u/burnett/analysis/lat_timing/data/photon_dataset'):

#         self.files = sorted(glob.glob(os.path.expandvars(data_file_pattern)));
#         print(f'Found {len(self.files)} monthly files with pattern {data_file_pattern}'\
#              f'\nWill store parquet files here: {dataset}')
#         if os.path.exists(dataset):
#             print(f'Dataset folder {dataset} exists')
#         else:
#             os.makedirs(dataset)
#         self.dataset=dataset
            
#     def convert_all(self):
#         files=self.files
#         dataset=self.dataset
#         nside=1024
    
#         def convert(month):

#             infile = files[month-1]
#             print(month, end=',')
#             #print(f'Reading file {os.path.split(infile)[-1]} size {os.path.getsize(infile):,}' )   

#             with open(infile, 'rb') as inp:
#                 t = pickle.load(inp,encoding='latin1')

#             # convert to DataFrame, add month index as new column for partition, then make a Table
#             df = pd.DataFrame(t['timerec'])
#             tstart = t['tstart']
#             df['month']= np.uint8(month)
#             # add a columb with nest indexing -- makes the ring redundant, may remove later
#             df['nest_index'] = healpy.ring2nest(nside, df.hpindex).astype(np.int32)
#             table = pa.Table.from_pandas(df, preserve_index=False)

#             # add to partitioned dataset
#             pq.write_to_dataset(table, root_path=dataset, partition_cols=['month'] )

#         for i in range(len(files)):
#             convert(month=i+1)


In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()
!date

Converted 00_config.ipynb.
Converted 01_effective_area.ipynb.
Converted 02_gti.ipynb.
Converted 03_exposure.ipynb.
Converted 04_photon_data.ipynb.
Converted 05_weights.ipynb.
Converted 06_poisson.ipynb.
Converted 07_cells.ipynb.
Converted 08_loglike.ipynb.
Converted 09_lightcurve.ipynb.
Converted 10_simulation.ipynb.
Sat Dec 26 14:37:51 PST 2020
