In [1]:
import uproot
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm


In [2]:
def make_df(file_path):
    df = pd.DataFrame()

    tritrig_file = uproot.open(file_path)

    # Open ROOT tree
    tree = tritrig_file['preselection']

    # Access branches
    B_vertex = tree['vertex.']
    B_ele = tree['ele.']
    B_pos = tree['pos.']


    # Get vertex positions
    vertex_pos = B_vertex['vertex.pos_'].array()
    df['vertex_pos_x'] = np.array(vertex_pos['fX'])
    df['vertex_pos_y'] = np.array(vertex_pos['fY'])
    df['vertex_pos_z'] = np.array(vertex_pos['fZ'])

    # Get arrays of the electron and positron energies as measured with Ecal
    df['ele_E_Ecal'] = np.array(B_ele['ele.energy_'])
    df['pos_E_Ecal'] = np.array(B_pos['pos.energy_'])
    # Get number of hits in Ecal clusters
    df['ele_Ecal_nhits'] = np.array(tree['ele_clu_nhits'])
    df['pos_Ecal_nhits'] = np.array(tree['pos_clu_nhits'])
    # Get Ecal cluster positions
    df['ele_Ecal_x']= np.array(B_ele['ele.cluster_.x_'])
    df['ele_Ecal_y']= np.array(B_ele['ele.cluster_.y_'])
    df['ele_Ecal_z']= np.array(B_ele['ele.cluster_.z_'])
    df['pos_Ecal_x']= np.array(B_pos['pos.cluster_.x_'])
    df['pos_Ecal_y']= np.array(B_pos['pos.cluster_.y_'])
    df['pos_Ecal_z']= np.array(B_pos['pos.cluster_.z_'])

    # Get number of hits on track
    df['ele_trk_nhits'] = np.array(B_ele[ 'ele.track_/ele.track_.n_hits_'])
    df['pos_trk_nhits'] = np.array(B_pos[ 'pos.track_/pos.track_.n_hits_'])
    # Get Electron and Positron momenta
    arr = B_vertex['vertex.p1_'].array()
    df['ele_Px'] = np.array(arr['fX'])
    df['ele_Py'] = np.array(arr['fY'])
    df['ele_Pz'] = np.array(arr['fZ'])
    arr = B_vertex['vertex.p2_'].array()
    df['pos_Px'] = np.array(arr['fX'])
    df['pos_Py'] = np.array(arr['fY'])
    df['pos_Pz'] = np.array(arr['fZ'])
    # Collect track parameters for electrons and positrons
    df['ele_phi0'] = -np.array(B_ele['ele.track_/ele.track_.phi0_'])
    df['ele_dp'] = np.array(B_ele['ele.track_/ele.track_.d0_'])
    df['ele_kappa_o_alpha'] = -np.array(B_ele['ele.track_/ele.track_.omega_'])
    df['ele_dz'] = -np.array(B_ele['ele.track_/ele.track_.z0_'])
    df['ele_tan_lambda'] = -np.array(B_ele['ele.track_/ele.track_.tan_lambda_'])
    df['pos_phi0'] = -np.array(B_pos['pos.track_/pos.track_.phi0_'])
    df['pos_dp'] = np.array(B_pos['pos.track_/pos.track_.d0_'])
    df['pos_kappa_o_alpha'] = -np.array(B_pos['pos.track_/pos.track_.omega_'])
    df['pos_dz'] = -np.array(B_pos['pos.track_/pos.track_.z0_'])
    df['pos_tan_lambda'] = -np.array(B_pos['pos.track_/pos.track_.tan_lambda_'])

    return df


In [4]:
# Create dataframes 
df_tritrig_1 = make_df('/Users/mghrear/data/HPS_mc/2019_tritrig_pulser_recon_2/tritrig.root')
df_tritrig_2 = make_df('/Users/mghrear/data/HPS_mc/2019_tritrig_pulser_recon_2_pt2/tritrig.root')
df_phiKK = make_df('/Users/mghrear/data/HPS_mc/2019_phiKK_pulser_recon/phiKK.root')
df_wab_2 = make_df('/Users/mghrear/data/HPS_mc/2019_WAB_pulser_recon_2/wab.root')
df_wab_1 = make_df('/Users/mghrear/data/HPS_mc/2019_WAB_pulser_recon_1/wab.root')
df_data = make_df('/Users/mghrear/data/HPS_data/2019_flattened2/2019_run010051.root')


df_tritrig = pd.concat([df_tritrig_1, df_tritrig_2], ignore_index=True, sort=False)
df_wab = pd.concat([df_wab_1, df_wab_2], ignore_index=True, sort=False)




  df['vertex_pos_x'] = np.array(vertex_pos['fX'])
  df['vertex_pos_y'] = np.array(vertex_pos['fY'])
  df['vertex_pos_z'] = np.array(vertex_pos['fZ'])
  df['ele_Px'] = np.array(arr['fX'])
  df['ele_Py'] = np.array(arr['fY'])
  df['ele_Pz'] = np.array(arr['fZ'])
  df['pos_Px'] = np.array(arr['fX'])
  df['pos_Py'] = np.array(arr['fY'])
  df['pos_Pz'] = np.array(arr['fZ'])
  df['vertex_pos_x'] = np.array(vertex_pos['fX'])
  df['vertex_pos_y'] = np.array(vertex_pos['fY'])
  df['vertex_pos_z'] = np.array(vertex_pos['fZ'])
  df['ele_Px'] = np.array(arr['fX'])
  df['ele_Py'] = np.array(arr['fY'])
  df['ele_Pz'] = np.array(arr['fZ'])
  df['pos_Px'] = np.array(arr['fX'])
  df['pos_Py'] = np.array(arr['fY'])
  df['pos_Pz'] = np.array(arr['fZ'])
  df['vertex_pos_x'] = np.array(vertex_pos['fX'])
  df['vertex_pos_y'] = np.array(vertex_pos['fY'])
  df['vertex_pos_z'] = np.array(vertex_pos['fZ'])
  df['ele_Px'] = np.array(arr['fX'])
  df['ele_Py'] = np.array(arr['fY'])
  df['ele_Pz'] = np.array(arr['f

In [5]:
df_tritrig.to_pickle('/Users/mghrear/data/ML_data//BDT_2019_tritrig.pk')
df_phiKK.to_pickle('/Users/mghrear/data/ML_data//BDT_2019_phiKK.pk')
df_wab.to_pickle('/Users/mghrear/data/ML_data//BDT_2019_wab.pk')
df_data.to_pickle('/Users/mghrear/data/ML_data//BDT_2019_data.pk')