Import packages and setting parameters

In [None]:
# packages
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import yaml, os, sys, glob, logging
pd.set_option("display.max_columns", None)

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# location of the scripts
sys.path.insert(0, '/fefs/aswg/workspace/juan.jimenez/stereo_analysis/scripts')
import auxiliar as aux
import find_files as find
aux.params() # graphic parameters

# --- logging --- #
import logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)

# --- other parameters --- #
# name of the source we are studying
source_name = 'Crab'
# ------------------------ #

path_merged = f'/fefs/aswg/workspace/juan.jimenez/data/dl2/stereo_merged_{source_name}/dl2_merged_{source_name}_total.3tel.h5'
path_mean   = f'/fefs/aswg/workspace/juan.jimenez/data/dl2/stereo_mean/*{source_name}*total*.h5'
path_lst    = f'/fefs/aswg/workspace/juan.jimenez/data/dl2/lst/dl2_lst_{source_name}.h5'
path_magic  = f'/fefs/aswg/workspace/juan.jimenez/data/dl2/melibea/dl2_melibea_{source_name}.h5'

output_path = f'/fefs/aswg/workspace/juan.jimenez/data/dl2/coincident'

logger.info(f'Study of the source: {source_name}')

### Reading the data
* **total dl2**
* **mean_3tel**
* **Total lstchain analysis**
* **Total melibea analysis**

In [None]:
# reading the main files
dir_merged = glob.glob(path_merged)[0]
dir_mean   = glob.glob(path_mean)[0]
dir_lst    = glob.glob(path_lst)[0]
dir_magic  = glob.glob(path_magic)[0]

df_merged =  pd.read_hdf(dir_merged, key='events/parameters')
df_mean   =  pd.read_hdf(dir_mean,   key='events/parameters')
df_lst    =  pd.read_hdf(dir_lst,    key='events/parameters')
df_magic  =  pd.read_hdf(dir_magic,  key='events/parameters')

logger.info(f'The merged dl2 ({sys.getsizeof(df_merged)*1e-9:.1f}Gb):')
display(df_merged.head(5))
logger.info(f'\nThe mean-dl2 ({sys.getsizeof(df_mean)*1e-9:.1f}Gb):')
display(df_mean.head(5))
logger.info(f'\nThe lst-dl2 ({sys.getsizeof(df_lst)*1e-9:.1f}Gb):')
display(df_lst.head(5))
logger.info(f'\nThe melibea-dl2 ({sys.getsizeof(df_magic)*1e-9:.1f}Gb):')
display(df_magic.head(5))

## Now we find all the events contained in each dataset

In [None]:
ids_merged_total = np.unique(df_merged['total_id'].to_numpy())
ids_merged_magic = np.unique(df_merged['magic_id'].to_numpy())

ids_mean = df_mean['total_id'].to_numpy()

initial_n = len(ids_mean)
logger.info(f'The total amount of events we have in which we have coincidence between all telescopes are {initial_n}.')

# checking that merged and mean datasets contain the same amount of events
if len(ids_mean) != len(ids_merged_total):
    logger.error(f'There is different amount of data in merged and mean files:')
    logger.error(f'Mean data   --> {len(ids_merged_total)} events')
    logger.error(f'Merged data --> {len(ids_mean)} events')
    sys.exit()

ids_magic = df_magic['magic_id'].to_numpy()
ids_lst   = df_lst['total_id'].to_numpy()

logger.info(f'\n\nThe amount of events in lst-lstchain file are {len(ids_lst)}, a {len(ids_lst)/len(ids_mean)*100:.2f}% of total.')
logger.info(f'\nThe amount of events in magic-melibea file are {len(ids_magic)}, a {len(ids_magic)/len(ids_mean)*100:.2f}% of total.')


# checking that all events in LST and melibea files is contained into merged and mean datasets
common_lst_ids_check = df_lst.query(f'total_id in @ids_merged_total', inplace=False)['total_id'].to_numpy()
if len(common_lst_ids_check) != len(ids_lst):
    logger.error(f'There is different amount of data in lst-lstchain and mean files:')
    logger.error(f'LST data        --> {len(ids_lst)} events')
    logger.error(f'Common LST data --> {len(common_lst_ids_check)} events')
    sys.exit()
    
common_magic_ids_check = df_magic.query(f'magic_id in @ids_merged_magic', inplace=False)['magic_id'].to_numpy()
if len(common_magic_ids_check) != len(ids_magic):
    logger.error(f'There is different amount of data in lst-lstchain and mean files:')
    logger.error(f'MAGIC data        --> {len(ids_magic)} events')
    logger.error(f'Common MAGIC data --> {len(common_magic_ids_check)} events')
    sys.exit()

Now we have the lst-lstchain coincident events, so the total amount of data without melibea. Now we extract the magic id from the events in lst and look for them in the melibea file.

In [None]:
df_merged.query(f'magic_id in @ids_magic', inplace=True)
df_merged.query(f'total_id in @ids_lst',   inplace=True)

absolute_ids = np.unique(df_merged['total_id'].to_numpy())
absolute_ids_magic = np.unique(df_merged['magic_id'].to_numpy())
final_n = len(absolute_ids)

logger.info(f'After the coincidence filtering the total amount of events is {final_n}, i.e. {final_n/initial_n*100:.2f}% of initial total')

df_mean.query(f'total_id in @absolute_ids', inplace=True)
df_lst.query(f'total_id in @absolute_ids', inplace=True)
df_magic.query(f'magic_id in @absolute_ids_magic', inplace=True)

logger.info(f'Checking events:\nmerged - {int(len(df_merged)/3)}\nmean -   {len(df_mean)}\nlst -    {len(df_lst)}\nmagic -  {len(df_magic)}')

### Overwritting the dataframes

In [None]:
# creating the output file folder
aux.createdir(output_path)

# --- overwritting again the dataframes --- #
logger.info('Creating .h5 files...\n')
df_merged.to_hdf(os.path.join(output_path, f'dl2_merged_{source_name}.h5'),  key='events/parameters')
df_mean.to_hdf(  os.path.join(output_path, f'dl2_mean_{source_name}.h5'),    key='events/parameters')
df_lst.to_hdf(   os.path.join(output_path, f'dl2_lst_{source_name}.h5'),     key='events/parameters')
df_magic.to_hdf( os.path.join(output_path, f'dl2_melibea_{source_name}.h5'), key='events/parameters')