Import packages and setting parameters

In [None]:
# --- packages --- #
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import os, sys, glob
import uproot
pd.set_option("display.max_columns", None) # to show all columns when displaying dataframes

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# --- logging --- #
import logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)

# --- scripts paths --- #
sys.path.insert(0, '/fefs/aswg/workspace/juan.jimenez/stereo_analysis/scripts')
import auxiliar as aux
import find_files
aux.params()

# --- other parameters --- #
# name of the source we are studying
source_name = 'BLLac'
# ------------------------ #

# --- file paths --- #
# common data file
common_data = f'/fefs/aswg/workspace/juan.jimenez/stereo_analysis/config_files/common_data{source_name}.txt'
# output directory for melibea files
output_dir = '/fefs/aswg/workspace/juan.jimenez/data/dl2/melibea'
# name of the total merged file that contain the coincident event ids
path_merged = f'/fefs/aswg/workspace/juan.jimenez/data/dl2/stereo_merged_{source_name}/dl2_merged_{source_name}_total.3tel.h5'
# ------------------ #

logger.info(f'Study of the source: {source_name}')

## Extracting `obs_ids` we are interested in

In [None]:
# extracting all observation ids
run_strings = np.loadtxt(common_data, dtype='str')

run_id_LST   = [int(f.split('-')[0]) for f in run_strings] 
run_id_MAGIC = [str(f.split('-')[1]) for f in run_strings] 
run_id_MAGIC = np.sort(list(dict.fromkeys([int(i)  for r in run_id_MAGIC for i in r.split(',')])))

# finding .root directories
files_MAGIC = find_files.find_MAGIC_melibea(run_id_MAGIC)

runs_MAGIC = []
for file in files_MAGIC:
    index = file.find('/Melibea/')
    
    if index == -1:
        index_alt = file.find('/mars_q/')
        runs_MAGIC.append(int(file[index_alt+8+9:index_alt+8+9+8]))
        
    else:
        runs_MAGIC.append(int(file[index + 29 : index + 29 + 8]))
        
logger.info(f'\nSelected {len(runs_MAGIC)} runs')

Extracting also the event ids que want. We only are interested in the coincident ones.

In [None]:
dir_merged = glob.glob(path_merged)[0]
df_merged =  pd.read_hdf(dir_merged, key='events/parameters')

logger.info(f'The merged dl2 ({sys.getsizeof(df_merged)*1e-9:.1f}Gb):')
display(df_merged.head(5))

magic_ids = np.unique(df_merged['magic_id'].to_numpy())
logger.info(f'\nThe amount of events available for stereo analysis is {len(magic_ids)}\n')

# Extracting the data we want from `melibea`

In [None]:
# stereo evevent id or header
event_id = 'MRawEvtHeader_1.fStereoEvtNumber'

# size or intensity recorded by both telescopes
intensity_M1 = 'MHillas_1.fSize'
intensity_M2 = 'MHillas_2.fSize'

# source positions for M1 and M2
src_pos_M1_X = 'MSrcPosCam_1.fX'
src_pos_M1_Y = 'MSrcPosCam_1.fY'
src_pos_M2_X = 'MSrcPosCam_2.fX'
src_pos_M2_Y = 'MSrcPosCam_2.fY'

# impact parameters
imp_par_1    = 'MStereoParDisp.fM1Impact'
imp_par_1_Az = 'MStereoParDisp.fM1ImpactAz'
imp_par_2    = 'MStereoParDisp.fM2Impact'
imp_par_2_Az = 'MStereoParDisp.fM2ImpactAz'

# stereo reconstruction parameters with, and without disp method
st_dirX = 'MStereoParDisp.fDirectionX'
st_dirY = 'MStereoParDisp.fDirectionY'

# pointing positions
pointing_ra  = 'MPointingPos_1.fRa'
pointing_dec  = 'MPointingPos_1.fDec'
pointing_alt = 'MPointingPos_1.fZd'
pointing_az  = 'MPointingPos_1.fAz'

# recovered energy and hadroness
reco_energy = 'MEnergyEst.fEnergy'
hadroness   = 'MHadronness.fHadronness'

# dec and re coord 
reco_dec = 'MStereoParDisp.fDirectionDec'
reco_ra  = 'MStereoParDisp.fDirectionRA' 

# timestamp in ns
timestamp = 'MRawEvtHeader_1.fClockCounter'
delta_t   = 'MRawEvtHeader_1.fTimeDiff'

# impact parameter and height
h_max   = 'MStereoParDisp.fMaxHeight'
slope_1 = 'MHillas_1.fDelta'
slope_2 = 'MHillas_2.fDelta'

# creating the dataframe with the needed modifications of variables, standarizing everything
melibea_runs_df = []
for file in files_MAGIC:
    
    # opening the file with uproot
    myFile = uproot.open(file)

    # finding the number of the key needed (because changes in each run)
    root_label = myFile.keys()[2]

    # appending the data labels and names of the columsn for the dataframe
    dataL = [ event_id ,  intensity_M1 ,  intensity_M2 ,  src_pos_M1_X ,  src_pos_M1_Y ,  src_pos_M2_X]
    names = ['event_id', 'intensity_M1', 'intensity_M2', 'src_pos_M1_X', 'src_pos_M1_Y', 'src_pos_M2_X']
    dataL = [*dataL, *[ src_pos_M2_Y ,  imp_par_1 ,  imp_par_1_Az ,  imp_par_2 ,  imp_par_2_Az ]]
    names = [*names, *['src_pos_M2_Y', 'imp_par_1', 'imp_par_1_Az', 'imp_par_2', 'imp_par_2_Az']]
    dataL = [*dataL, *[ st_dirX ,  st_dirY ,  reco_energy ,  hadroness ,  reco_dec ,  reco_ra,   pointing_alt ,  pointing_az ]]
    names = [*names, *['st_dirX', 'st_dirY', 'reco_energy', 'gammaness', 'reco_dec', 'reco_ra', 'pointing_alt', 'pointing_az']]
    dataL = [*dataL, *[ h_max ,  slope_1 ,  slope_2 ,  pointing_ra ,  pointing_dec ,  timestamp ,  delta_t ]]
    names = [*names, *['h_max', 'slope_1', 'slope_2', 'pointing_ra', 'pointing_dec', 'timestamp', 'delta_t']]

    # data matrix
    data  = np.array([np.array(myFile[root_label][d].array()) for d in dataL])

    # create the dataframe
    df = pd.DataFrame(data.T, columns=names)

    # add obs_ids
    df['obs_id'] = runs_MAGIC[files_MAGIC.index(file)]
    
    # convert to int the event id's
    df['event_id']  = df['event_id'].astype('int')
    
    # convert hadroness to gammaness
    df['gammaness'] = 1. - df['gammaness']
    
    # convert MeV to TeV
    df['reco_energy'] = df['reco_energy'].to_numpy() * 1e-3
    
    # converting coordinates from hours to degrees
    df['reco_ra']      = df['reco_ra'] * 15
    df['pointing_ra']  = df['pointing_ra'] * 15
    df['pointing_alt'] = 90 - df['pointing_alt']
    
    # adding a magic total id
    df['magic_id']    = np.char.add(df['obs_id'].to_numpy().astype(str), np.char.add('.',df['event_id'].to_numpy().astype(str))) 

    # finding if there are no reconstructed events
    nonreco = len(df.query(f'reco_energy == -0.001', inplace=False))
    
    fname_index = [i for i in range(len(file)) if file.startswith('/', i)][-1]
    logger.info(f'For {file[fname_index:]} found {nonreco} non-reconstructed events ({100 * nonreco / len(df):.2f}%)')
    
    melibea_runs_df.append(df)

In [None]:
# concatenate all the melibea dataframes for each run
melibea_runs_df = pd.concat(melibea_runs_df)

# now we filter for the coincident events
melibea_runs_df.query(f'magic_id in @magic_ids', inplace=True)

# sort the indexes of the dataframe
melibea_runs_df.set_index(['obs_id', 'event_id'], inplace=True)
melibea_runs_df.sort_index(inplace=True)

# create directory
aux.createdir(output_dir)
# filename
fname = os.path.join(output_dir, f'dl2_melibea_{source_name}.h5')
# converting to .h5
melibea_runs_df.to_hdf(fname, key='/events/parameters')
logger.info(f'A total dataframe of {len(melibea_runs_df)} events is created at:\n--> {fname}')

# printing the dataframe also
display(melibea_runs_df.head(5))

## Check the different keys we have inside `melibea.root` files

In [None]:
# opening with uproot
myFile = uproot.open(files_MAGIC[0])
# extracting primary key, that changes in each dataset
primary_key = myFile.keys()[2]

# opening all the keys
all_keys = myFile[primary_key].keys()

# and showing them
all_keys