Import packages and setting parameters

In [1]:
# --- packages --- #
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import os, sys, glob
import uproot
pd.set_option("display.max_columns", None) # to show all columns when displaying dataframes

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# --- logging --- #
import logging
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
logger.setLevel(logging.INFO)

# --- scripts paths --- #
sys.path.insert(0, '/fefs/aswg/workspace/juan.jimenez/stereo_analysis/scripts')
import auxiliar as aux
import find_files
aux.params()

# --- other parameters --- #
# name of the source we are studying
source_name = 'BLLac'
# ------------------------ #

# --- file paths --- #
# common data file
common_data = '/fefs/aswg/workspace/juan.jimenez/stereo_analysis/config_files/common_data.txt'
# output directory for melibea files
output_dir = '/fefs/aswg/workspace/juan.jimenez/data/dl2/melibea'
# name of the total merged file that contain the coincident event ids
path_merged = f'/fefs/aswg/workspace/juan.jimenez/data/dl2/stereo_merged_{source_name}/dl2_merged_{source_name}_total.3tel.h5'
# ------------------ #

logger.info(f'Study of the source: {source_name}')

Study of the source: BLLac


## Extracting `obs_ids` we are interested in

In [2]:
# extracting all observation ids
run_strings = np.loadtxt(common_data, dtype='str')

run_id_LST   = [int(f.split('-')[0]) for f in run_strings] 
run_id_MAGIC = [str(f.split('-')[1]) for f in run_strings] 
run_id_MAGIC = np.sort(list(dict.fromkeys([int(i)  for r in run_id_MAGIC for i in r.split(',')])))

# finding .root directories
files_MAGIC = find_files.find_MAGIC_melibea(run_id_MAGIC)

runs_MAGIC = []
for file in files_MAGIC:
    index = file.find('/Melibea/')
    
    if index == -1:
        index_alt = file.find('/mars_q/')
        runs_MAGIC.append(int(file[index_alt+8+9:index_alt+8+9+8]))
        
    else:
        runs_MAGIC.append(int(file[index + 29 : index + 29 + 8]))

Main melibea files root folder is /fefs/onsite/common/MAGIC/data/ST/event/Melibea
And other apart analysed runs in:
--> /fefs/aswg/workspace/julian.sitarek/analiza/Crab_2021/an6/data/mars_q/*
--> /fefs/aswg/workspace/julian.sitarek/analiza/Crab_2022/mars_q/*

Found 56 files already analysed

Finding all runs...

Selecting the paths to the input obs_ids...

For the MAGIC run 05098874 no files found
For the MAGIC run 05098875 no files found
For the MAGIC run 05098876 no files found
For the MAGIC run 05098925 no files found
For the MAGIC run 05098926 no files found
For the MAGIC run 05099039 no files found
For the MAGIC run 05099040 no files found
For the MAGIC run 05099041 no files found
For the MAGIC run 05099042 no files found
For the MAGIC run 05099043 no files found
For the MAGIC run 05099044 no files found
For the MAGIC run 05099045 no files found
For the MAGIC run 05099046 no files found
For the MAGIC run 05099047 no files found


Extracting also the event ids que want. We only are interested in the coincident ones.

In [3]:
dir_merged = glob.glob(path_merged)[0]
df_merged =  pd.read_hdf(dir_merged, key='events/parameters')

logger.info(f'The merged dl2 ({sys.getsizeof(df_merged)*1e-9:.1f}Gb):')
display(df_merged.head(5))

magic_ids = np.unique(df_merged['magic_id'].to_numpy())
logger.info(f'\nThe amount of events available for stereo analysis is {len(magic_ids)}\n')

The merged dl2 (0.8Gb):


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,obs_id_magic,event_id_magic,obs_id_lst,event_id_lst,intensity,x,y,r,phi,length,length_uncertainty,width,width_uncertainty,psi,skewness,kurtosis,slope,intercept,intensity_width_1,intensity_width_2,pixels_width_1,pixels_width_2,n_pixels,n_islands,pointing_alt,pointing_az,timestamp,time_diff,multiplicity,combo_type,alt,alt_uncert,az,az_uncert,core_x,core_y,impact,h_max,reco_energy,reco_energy_var,reco_disp,reco_disp_var,reco_alt,reco_az,disp_diff_sum,disp_diff_mean,gammaness,gammaness_var,total_id,magic_id
obs_id,event_id,tel_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1
5219,107,1,5098298,9894,5219.0,107.0,2307.416203,-0.643494,0.691849,0.944849,132.926134,0.146443,0.004771,0.110942,0.004136,37.954998,0.103959,3.339539,-1.84479,10.73027,0.115574,0.292651,0.008086,0.017251,105,1,1.098832,0.890933,1625883000.0,0.000459,3,3,62.261268,0.978335,54.894019,0.978335,32.851247,12.570225,68.045347,8128.120684,0.351016,0.035154,0.824763,0.056601,62.230273,55.176567,1.895731,0.63191,0.015,0.014775,5219.107,5098298.9894
5219,107,2,5098298,9894,5219.0,107.0,595.25,-0.209438,0.25565,0.330487,129.325616,0.127092,0.003101,0.087064,0.001758,63.823745,-1.001241,2.417922,13.042229,15.978483,0.048194,0.134686,0.004812,0.01155,62,3,1.098764,0.890969,1625883000.0,0.011432,3,3,62.261268,0.978335,54.894019,0.978335,32.851247,12.570225,69.404742,8128.120684,1.316875,0.273765,1.087578,0.145453,62.721671,54.93561,1.895731,0.63191,0.0075,0.007444,5219.107,5098298.9894
5219,107,3,5098298,9894,5219.0,107.0,442.085938,-0.285171,0.292247,0.408327,134.297924,0.076278,0.002005,0.042299,0.001437,-75.934742,0.545201,2.22238,3.992887,12.253938,0.192924,0.414848,0.0077,0.015399,45,1,1.098876,0.890955,1625883000.0,0.009212,3,3,62.261268,0.978335,54.894019,0.978335,32.851247,12.570225,69.256321,8128.120684,0.45954,0.243987,0.781336,0.0478,61.827632,54.602092,1.895731,0.63191,0.113833,0.100875,5219.107,5098298.9894
5219,113,1,5098298,9895,5219.0,113.0,11417.958494,-0.260791,0.744738,0.789079,109.299137,0.296848,0.003746,0.091128,0.001598,3.820321,0.28972,2.736755,-15.520754,24.374656,0.032386,0.078675,0.012938,0.026954,271,2,1.098832,0.890933,1625883000.0,3.1e-05,3,3,64.691815,0.984917,54.818494,0.984917,132.130718,-96.099117,201.909346,7700.803672,8.564279,0.072953,2.480231,0.262637,64.847291,55.021416,1.13851,0.379503,0.0,0.0099,5219.113,5098298.9895
5219,113,2,5098298,9895,5219.0,113.0,9335.53125,0.054253,0.316131,0.320752,80.261998,0.159086,0.001138,0.055032,0.000539,17.885044,-0.697459,2.909673,-14.064053,20.616672,0.044869,0.111058,0.016362,0.034649,259,1,1.098764,0.890969,1625883000.0,0.000791,3,3,64.691815,0.984917,54.818494,0.984917,132.130718,-96.099117,92.121463,7700.803672,2.99501,0.041675,1.583643,0.116036,64.592551,54.500935,1.13851,0.379503,0.07,0.0651,5219.113,5098298.9895



The amount of events available for stereo analysis is 484722



# Extracting the data we want from `melibea`

In [4]:
# stereo evevent id or header
event_id = 'MRawEvtHeader_1.fStereoEvtNumber'

# size or intensity recorded by both telescopes
intensity_M1 = 'MHillas_1.fSize'
intensity_M2 = 'MHillas_2.fSize'

# source positions for M1 and M2
src_pos_M1_X = 'MSrcPosCam_1.fX'
src_pos_M1_Y = 'MSrcPosCam_1.fY'
src_pos_M2_X = 'MSrcPosCam_2.fX'
src_pos_M2_Y = 'MSrcPosCam_2.fY'

# impact parameters
imp_par_1    = 'MStereoParDisp.fM1Impact'
imp_par_1_Az = 'MStereoParDisp.fM1ImpactAz'
imp_par_2    = 'MStereoParDisp.fM2Impact'
imp_par_2_Az = 'MStereoParDisp.fM2ImpactAz'

# stereo reconstruction parameters with, and without disp method
st_dirX = 'MStereoParDisp.fDirectionX'
st_dirY = 'MStereoParDisp.fDirectionY'

# pointing positions
pointing_ra  = 'MPointingPos_1.fRa'
pointing_dec  = 'MPointingPos_1.fDec'
pointing_alt = 'MPointingPos_1.fZd'
pointing_az  = 'MPointingPos_1.fAz'

# recovered energy and hadroness
reco_energy = 'MEnergyEst.fEnergy'
hadroness   = 'MHadronness.fHadronness'

# dec and re coord 
reco_dec = 'MStereoParDisp.fDirectionDec'
reco_ra  = 'MStereoParDisp.fDirectionRA' 

# timestamp in ns
timestamp = 'MRawEvtHeader_1.fClockCounter'
delta_t   = 'MRawEvtHeader_1.fTimeDiff'

# impact parameter and height
h_max   = 'MStereoParDisp.fMaxHeight'
slope_1 = 'MHillas_1.fDelta'
slope_2 = 'MHillas_2.fDelta'

# creating the dataframe with the needed modifications of variables, standarizing everything
melibea_runs_df = []
for file in files_MAGIC:
    
    # opening the file with uproot
    myFile = uproot.open(file)

    # finding the number of the key needed (because changes in each run)
    root_label = myFile.keys()[2]

    # appending the data labels and names of the columsn for the dataframe
    dataL = [ event_id ,  intensity_M1 ,  intensity_M2 ,  src_pos_M1_X ,  src_pos_M1_Y ,  src_pos_M2_X]
    names = ['event_id', 'intensity_M1', 'intensity_M2', 'src_pos_M1_X', 'src_pos_M1_Y', 'src_pos_M2_X']
    dataL = [*dataL, *[ src_pos_M2_Y ,  imp_par_1 ,  imp_par_1_Az ,  imp_par_2 ,  imp_par_2_Az ]]
    names = [*names, *['src_pos_M2_Y', 'imp_par_1', 'imp_par_1_Az', 'imp_par_2', 'imp_par_2_Az']]
    dataL = [*dataL, *[ st_dirX ,  st_dirY ,  reco_energy ,  hadroness ,  reco_dec ,  reco_ra,   pointing_alt ,  pointing_az ]]
    names = [*names, *['st_dirX', 'st_dirY', 'reco_energy', 'gammaness', 'reco_dec', 'reco_ra', 'pointing_alt', 'pointing_az']]
    dataL = [*dataL, *[ h_max ,  slope_1 ,  slope_2 ,  pointing_ra ,  pointing_dec ,  timestamp ,  delta_t ]]
    names = [*names, *['h_max', 'slope_1', 'slope_2', 'pointing_ra', 'pointing_dec', 'timestamp', 'delta_t']]

    # data matrix
    data  = np.array([np.array(myFile[root_label][d].array()) for d in dataL])

    # create the dataframe
    df = pd.DataFrame(data.T, columns=names)

    # add obs_ids
    df['obs_id'] = runs_MAGIC[files_MAGIC.index(file)]
    
    # convert to int the event id's
    df['event_id']  = df['event_id'].astype('int')
    
    # convert hadroness to gammaness
    df['gammaness'] = 1. - df['gammaness']
    
    # convert MeV to TeV
    df['reco_energy'] = df['reco_energy'].to_numpy() * 1e-3
    
    # converting coordinates from hours to degrees
    df['reco_ra']      = df['reco_ra'] * 15
    df['pointing_ra']  = df['pointing_ra'] * 15
    df['pointing_alt'] = 90 - df['pointing_alt']
    
    # adding a magic total id
    df['magic_id']    = np.char.add(df['obs_id'].to_numpy().astype(str), np.char.add('.',df['event_id'].to_numpy().astype(str))) 

    # finding if there are no reconstructed events
    nonreco = len(df.query(f'reco_energy == -0.001', inplace=False))
    
    fname_index = [i for i in range(len(file)) if file.startswith('/', i)][-1]
    logger.info(f'For {file[fname_index:]} found {nonreco} non-reconstructed events ({100 * nonreco / len(df):.2f}%)')
    
    melibea_runs_df.append(df)

For /20210710_05098298_Q_BLLac-W0.40+090.root found 101289 non-reconstructed events (80.83%)
For /20210710_05098299_Q_BLLac-W0.40+270.root found 100223 non-reconstructed events (80.72%)
For /20210711_05098326_Q_BLLac-W0.40+000.root found 59173 non-reconstructed events (82.93%)
For /20210711_05098327_Q_BLLac-W0.40+180.root found 62199 non-reconstructed events (83.73%)
For /20210713_05098367_Q_BLLac-W0.40+090.root found 45616 non-reconstructed events (92.70%)
For /20210713_05098368_Q_BLLac-W0.40+270.root found 37249 non-reconstructed events (92.03%)
For /20210717_05098498_Q_BLLac-W0.40+270.root found 70756 non-reconstructed events (82.09%)
For /20210718_05098541_Q_BLLac-W0.40+180.root found 62816 non-reconstructed events (82.53%)
For /20210718_05098542_Q_BLLac-W0.40+090.root found 61958 non-reconstructed events (82.30%)
For /20210803_05098833_Q_BLLac-W0.40+270.root found 117319 non-reconstructed events (80.11%)
For /20210803_05098834_Q_BLLac-W0.40+000.root found 117011 non-reconstructed 

In [5]:
# concatenate all the melibea dataframes for each run
melibea_runs_df = pd.concat(melibea_runs_df)

# now we filter for the coincident events
melibea_runs_df.query(f'magic_id in @magic_ids', inplace=True)

# sort the indexes of the dataframe
melibea_runs_df.set_index(['obs_id', 'event_id'], inplace=True)
melibea_runs_df.sort_index(inplace=True)

# create directory
aux.createdir(output_dir)
# filename
fname = os.path.join(output_dir, f'dl2_melibea_{source_name}.h5')
# converting to .h5
melibea_runs_df.to_hdf(fname, key='/events/parameters')
logger.info(f'A total dataframe of {len(melibea_runs_df)} events is created at:\n--> {fname}')

# printing the dataframe also
display(melibea_runs_df.head(5))

A total dataframe of 484682 events is created at:
--> /fefs/aswg/workspace/juan.jimenez/data/dl2/melibea/dl2_melibea_BLLac.h5


Unnamed: 0_level_0,Unnamed: 1_level_0,intensity_M1,intensity_M2,src_pos_M1_X,src_pos_M1_Y,src_pos_M2_X,src_pos_M2_Y,imp_par_1,imp_par_1_Az,imp_par_2,imp_par_2_Az,st_dirX,st_dirY,reco_energy,gammaness,reco_dec,reco_ra,pointing_alt,pointing_az,h_max,slope_1,slope_2,pointing_ra,pointing_dec,timestamp,delta_t,magic_id
obs_id,event_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
5098298,9894,595.25,442.085938,-109.434044,-44.17741,-110.260971,-42.57893,-1.0,,-1.0,,,,-0.001,0.027,,,62.957345,51.046428,-1.0,0.456862,-0.245485,330.679167,42.677778,3290800000.0,0.011432,5098298.9894
5098298,9895,9335.53125,5245.914062,-109.434135,-44.17738,-110.260948,-42.578999,-1.0,,-1.0,,,,-0.001,0.001667,,,62.957348,51.046426,-1.0,1.258643,1.017411,330.679167,42.677778,3290831000.0,0.000791,5098298.9895
5098298,9896,133.4375,83.890625,-109.436279,-44.176556,-110.260338,-42.580627,-1.0,,-1.0,,,,-0.001,0.003333,,,62.957402,51.046378,-1.0,0.789299,0.887595,330.679167,42.677778,3291594000.0,0.018241,5098298.9896
5098298,9900,406.070312,292.015625,-109.439896,-44.175175,-110.259315,-42.58337,10584.287109,-72.574722,11632.484375,-28.421066,-0.049612,-0.417368,0.203304,0.012,42.473122,330.180302,62.957494,51.046297,912879.125,-0.661427,0.219169,330.679167,42.677778,3292880000.0,0.005154,5098298.99
5098298,9901,75.375,50.273438,-109.440697,-44.174866,-110.259087,-42.583977,-1.0,,-1.0,,,,-0.001,0.013333,,,62.957514,51.046279,-1.0,1.046968,-0.891276,330.679167,42.677778,3293165000.0,0.007138,5098298.9901


## Check the different keys we have inside `melibea.root` files

In [None]:
# opening with uproot
myFile = uproot.open(files_MAGIC[0])
# extracting primary key, that changes in each dataset
primary_key = myFile.keys()[2]

# opening all the keys
all_keys = myFile[primary_key].keys()

# and showing them
all_keys