Import packages and setting parameters

In [1]:
# packages
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import os, logging, sys, glob
from astropy import units as u
from lstchain.reco.utils import get_effective_time
pd.set_option("display.max_columns", None)

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# --- logging --- #
import logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)

# location of the scripts
sys.path.insert(0, '/fefs/aswg/workspace/juan.jimenez/stereo_analysis/scripts')
import auxiliar as aux
import geometry as geom
aux.params()

# --- parameters --- #
# name of the studied source
source_name = 'BLLac'


# tablename to open lst hdf files
tablename = '/dl2/event/telescope/parameters/LST_LSTCam'
# columns that are extracted
columns = ['obs_id', 'event_id', 'intensity', 'x', 'y', 'r', 'phi', 'length', 'width', 'psi', 'time_gradient', 'intercept',
           'alt_tel', 'az_tel', 'dragon_time', 'delta_t', 'reco_energy', 'reco_alt', 'reco_az', 'gammaness', 'reco_src_x',
           'reco_src_y', 'reco_disp_norm']
# ------------------ #

# --- file paths --- #
root_path = '/fefs/aswg/workspace/juan.jimenez/data'

common_data_path = f'/fefs/aswg/workspace/juan.jimenez/stereo_analysis/config_files/common_data{source_name}.txt'

stereo_mean_path = f'{root_path}/dl2/stereo_mean/dl2_mean_{source_name}_total.3tel.h5'

lst_dl2_path     = f'/fefs/aswg/workspace/abelardo.moralejo/jjimenez_master/Crab_LST1/DL2/dl2_*.h5'
lst_out_path     = f'{root_path}/dl2/lst/'
# ------------------ #

# --- calibration parameters --- #
nominal_focal = 28       * u.m
focal         = 29.30565 * u.m
aberration_correction = focal / nominal_focal

logger.info(f'Study of the source: {source_name}')
logger.info(f'\nAll data taken from the path: {lst_dl2_path}')
logger.info(f'\nReference stereo data taken from the file: {stereo_mean_path}')

Study of the source: BLLac

All data taken from the path: /fefs/aswg/workspace/abelardo.moralejo/jjimenez_master/Crab_LST1/DL2/dl2_*.h5

Reference stereo data taken from the file: /fefs/aswg/workspace/juan.jimenez/data/dl2/stereo_mean/dl2_mean_BLLac_total.3tel.h5


First of all we find the files and find the streo events to discard the non-stereo events from the LST dataset

In [2]:
# --- reading dataset --- #
dataset = glob.glob(lst_dl2_path)
dataset.sort()
logger.info(f'Found {len(dataset)} files in {lst_dl2_path}')

# --- lst runs we want --- #
# first of all we can read the common data file
logger.info(f'Opening the file...\n{common_data_path}\n')

# saving the same-night runs comparing LST runs with all MAGIC
jobs_list = np.loadtxt(common_data_path, dtype='str')
logger.info(f'Found {len(jobs_list)} runs\n')

# extracting all LST runs
lst_runs = []
for job in jobs_list:
    midindex = job.find('-')
    lst_runs.append(int(job[:midindex]))
    
logger.info(f'Opening the file...\n{stereo_mean_path}')
df_mean = pd.read_hdf(stereo_mean_path)
lst_events = df_mean['total_id'].to_numpy()

Found 68 files in /fefs/aswg/workspace/abelardo.moralejo/jjimenez_master/Crab_LST1/DL2/dl2_*.h5
Opening the file...
/fefs/aswg/workspace/juan.jimenez/stereo_analysis/config_files/common_dataBLLac.txt

Found 27 runs

Opening the file...
/fefs/aswg/workspace/juan.jimenez/data/dl2/stereo_mean/dl2_mean_BLLac_total.3tel.h5


Now we iterate over all files given in the path, and after we join them. Finally the data is stored.

In [3]:
# --- iterating over all files --- #
logger.info(f'\nIterating over all filenames {len(lst_runs)} runs\n')

table, t_eff, t_elapsed = [], 0, 0

for file, i in zip(dataset, range(len(dataset))):

    if int(file[-8:-3]) in lst_runs:
        
        logger.info(f'Reading {file}, ({i/len(dataset)*100:.1f}%)')
        # temporal table with only the requested columns
        tb = pd.read_hdf(file, tablename)[columns]
        
        obs_id_array   = tb['obs_id'].to_numpy().astype(str)
        event_id_array = tb['event_id'].to_numpy().astype(str)
        tb.loc[:,'total_id'] = np.char.add(obs_id_array, np.char.add('.',event_id_array))
        
        tb = tb.query(f'total_id in @lst_events', inplace=False)
    
        if len(tb) > 0:
            # getting the times of the events
            lt, et = get_effective_time(tb)
            t_eff     += lt
            t_elapsed += et

            # appending to main dataframes
            logger.info(f'Coincidences for run {int(file[-8:-3])} are {len(tb)}')
            table.append(tb)
        else:
            logger.info(f'No coincidences for run {int(file[-8:-3])}')

logger.info(f'Completed (100%)\n\n')

# concatenating the dataframe
table  = pd.concat(table)

# --- create .h5 file --- #
logger.info(f'\nCreating .h5 files and storing in {lst_out_path}')
table.to_hdf(os.path.join(lst_out_path, f'dl2_lst_{source_name}.h5'), key='events/parameters')

# displaying an example of few events
display(table.head(5))


Iterating over all filenames 27 runs

Completed (100%)




ValueError: No objects to concatenate

In the case the dataframe already exists and you only want to read it:

In [None]:
# --- reading in the case it is already created --- #
table = pd.read_hdf(os.path.join(lst_out_path, f'dl2_lst_{source_name}.h5'), key='events/parameters')

# and printing an example of both dataframes
logger.info(f'Total dataframe for LST events processed with lstchain: dl2_lst_{source_name}.h5')
logger.info(f'A total of {int(len(table))} events found:')
display(table.head(5))