In [1]:
X_SIZE = 6.25
Y_SIZE = 4.950

In [2]:
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Line3DCollection
from matplotlib import pylab as plt 
import root_numpy
import pandas as pd
import numpy
#import seaborn
%matplotlib inline
#%matplotlib notebook
import numpy as np



In [3]:
def open_shower_file(filename, start=0, stop=100, step=1):
    prefixMC = 'MCTrack'
    prefixTargetPoint = 'TargetPoint'
    showers_data_root = root_numpy.root2array(filename, treename='cbmsim', start=start, stop=stop, step=step,
                                                                    branches=[prefixMC+'.fPx', 
                                                                              prefixMC+'.fPy', 
                                                                              prefixMC+'.fPz', 
                                                                              prefixMC+'.fStartX', 
                                                                              prefixMC+'.fStartY', 
                                                                              prefixMC+'.fStartZ',
                                                                              prefixMC+'.fMotherId',
                                                                              prefixMC+'.fM',
                                                                              prefixMC+'.fStartT',
                                                                              prefixMC+'.fPdgCode',
                                                                              prefixTargetPoint+'.fPx', 
                                                                              prefixTargetPoint+'.fPy', 
                                                                              prefixTargetPoint+'.fPz', 
                                                                              prefixTargetPoint+'.fX', 
                                                                              prefixTargetPoint+'.fY', 
                                                                              prefixTargetPoint+'.fZ',
                                                                              prefixTargetPoint+'.fTime',
                                                                              prefixTargetPoint+'.fLength',
                                                                              prefixTargetPoint+'.fELoss',
                                                                              prefixTargetPoint+'.fDetectorID',
                                                                              prefixTargetPoint+'.fTrackID',
                                                                              prefixTargetPoint+'.fPdgCode'],
                                      )
    return showers_data_root

def extract_showers(showers_data_root, LIM=30, E_TRHESHOLD=0.01):
    len_mc = [[0]] * len(showers_data_root)
    len_sim = [] * len(showers_data_root)
    showers_mc = []
    showers_sim = []
    
    initial_indeces = []
    #z_mc = []
    #z_sim = []
    
    for index, shower_data_root in enumerate(showers_data_root):
        # extract data
        fPx_mc, fPy_mc, fPz_mc, fStartX_mc, fStartY_mc, fStartZ_mc, fMotherId_mc,\
        fM_mc, fStartT_mc, fPdgCode_mc,\
        fPx_sim, fPy_sim, fPz_sim, fStartX_sim, fStartY_sim, fStartZ_sim, fTime_sim, fLength_sim,\
        fELoss_sim, fDetectorID_sim, fTrackID_sim, fPdgCode_sim =\
        shower_data_root

        #print(fTrackID_sim, fMotherId_mc)
        # just full mask
        mask_sim = np.full_like(fPx_sim, fill_value=True, dtype=np.bool)

        shower_sim = {
            'PX':fPx_sim[mask_sim],
            'PY':fPy_sim[mask_sim],
            'PZ':fPz_sim[mask_sim],

            'TX': fStartX_sim[mask_sim],
            'TY': fStartY_sim[mask_sim],
            'TZ': fStartZ_sim[mask_sim],
            'Ttime': fTime_sim[mask_sim],
            'PdgCode': fPdgCode_sim[mask_sim],
            'AssociatedMCParticle': fTrackID_sim[mask_sim],
            'TrackLength': fLength_sim[mask_sim]
        }    

        shower_mc = {
            'PX':fPx_mc,
            'PY':fPy_mc,
            'PZ':fPz_mc,
            
            'TX': fStartX_mc,
            'TY': fStartY_mc,
            'TZ': fStartZ_mc,
            'MotherId': fMotherId_mc,
            'PdgCode': fPdgCode_mc
        }


        showers_sim.append(shower_sim)
        showers_mc.append(shower_mc)
        initial_indeces.append(index)
        #print("Shower length sim. Before preselection = {}, after = {}".format(len(mask_sim), sum(mask_sim)))
        #print("Shower length mc. Before preselection = {}, after = {}".format(len(mask_mc), sum(mask_mc)))
        #print()
    return showers_sim, showers_mc, initial_indeces

In [4]:
def select_shower_tracks(shower_sim, shower_mc, vertex_slice_to_look=5, z_cut_value=-3244.5):
    '''Select only EM shower part of event. Also, cut on Z variable, to select events only
    in brick(throw away CES).'''
    
    mask = np.logical_and(shower_mc['PdgCode'][:vertex_slice_to_look] == 11,
                          shower_mc['MotherId'][:vertex_slice_to_look] == 0)
    
    # Do not consider events with no electron tracks
    try:
        assert mask.sum() == 1
    except AssertionError:
        return {}
    initial_electron_id = np.where(mask)[0][0]
    
    shower_indeces = set([initial_electron_id])

    for index, mother_id in enumerate(shower_mc['MotherId']):
        if mother_id in shower_indeces:
            shower_indeces.add(index)
    
    shower_sim_indeces = np.isin(shower_sim["AssociatedMCParticle"], list(shower_indeces))
    
    selected_as_shower = {k: v[shower_sim_indeces] for k, v in shower_sim.items()}
    
    shower_sim_indeces = selected_as_shower["TZ"] < z_cut_value
    
    return {k: v[shower_sim_indeces] for k, v in selected_as_shower.items()}

In [5]:
showers_data_root = open_shower_file("./ship.conical.Genie-TGeant4.root", stop=-1)



In [6]:
showers_sim, showers_mc, initial_indeces = extract_showers(showers_data_root)

new_showers = [select_shower_tracks(shower_sim, shower_mc)
               for shower_sim, shower_mc in zip(showers_sim, showers_mc)]

In [7]:
# Vertex range at MCTrack to look for initial electron
vertex_slice_to_look = 10
# Energy thresold for each particles
E_THRESHOLD = 0.03

# lower bound on number of tracks left after all selection to
# save shower
LIM = 40
FV_margin = 0.5

strange_events = []
n_cuts = 6
tracks_in_shower = np.zeros((len(showers_mc), n_cuts))
ele_energy = -1 * np.ones(len(showers_mc))

selected_showers = []
selected_indeces = []

for index, mc_tracks in enumerate(showers_mc):
    mask = np.logical_and(mc_tracks['PdgCode'][:vertex_slice_to_look] == 11,
                          mc_tracks['MotherId'][:vertex_slice_to_look] == 0)
    
    # Do not consider events with no electron tracks
    try:
        assert mask.sum() == 1
    except AssertionError:
        strange_events.append(index)
        continue
    
    initial_electron_id = np.where(mask)[0][0]
    mc_energy_ele = np.linalg.norm(np.array([mc_tracks[P] for P in ["PZ", "PX", "PY"]]), axis=0)[initial_electron_id]
    ele_energy[index] = mc_energy_ele
    
    sim_tracks = new_showers[index]
    cut_number = 0
    mask_sim = np.full_like(sim_tracks['PZ'], fill_value=True, dtype=np.bool)
    tracks_in_shower[index, cut_number] = mask_sim.sum()
    cut_number += 1
    
    #FV cut (based on true info, but should be the same for basetrack)
    fv_flag = abs(mc_tracks['TX'][initial_electron_id]) + FV_margin < X_SIZE and\
              abs(mc_tracks['TY'][initial_electron_id]) + FV_margin < Y_SIZE
#              abs(mc_tracks['TZ'][initial_electron_id]) > 3251
        
    if not fv_flag:
        mask_sim = mask_sim & 0
    tracks_in_shower[index, cut_number] = mask_sim.sum()
    cut_number += 1
    
    # mask to get rid of tracks with tg(alpha) > 5
    mask_sim = mask_sim & (np.abs(sim_tracks['PX'] / sim_tracks['PZ']) <= 5.) &\
                          (np.abs(sim_tracks['PY'] / sim_tracks['PZ']) <= 5.)
    tracks_in_shower[index, cut_number] = mask_sim.sum()
    cut_number += 1
    
    # 0-length tracks looks bad
    mask_sim = mask_sim & (sim_tracks["TrackLength"] != 0)
    tracks_in_shower[index, cut_number] = mask_sim.sum()
    cut_number += 1

    # visability mask: Only tracks with P > E_THRESHOLD MeV are seen in emulson
    mask_sim = mask_sim & (np.linalg.norm(np.array([sim_tracks[P] for P in ["PZ", "PX", "PY"]]), axis=0) > E_THRESHOLD)
    tracks_in_shower[index, cut_number] = mask_sim.sum()
    cut_number += 1
    
    # Select shower with at least LIM basetracks
    if mask_sim.sum() < LIM:
        tracks_in_shower[index, cut_number] = 0
    else:
        tracks_in_shower[index, cut_number] = mask_sim.sum()
        selected_shower = {
            k: v[mask_sim] for k, v in new_showers[index].items()
        }
        selected_shower.update({
            'ele_P': mc_energy_ele,
            'ele_PX': mc_tracks['PX'][initial_electron_id],
            'ele_PY': mc_tracks['PY'][initial_electron_id],
            'ele_PZ': mc_tracks['PZ'][initial_electron_id],
            'ele_TX': mc_tracks['TX'][initial_electron_id],
            'ele_TY': mc_tracks['TY'][initial_electron_id],
            'ele_TZ': mc_tracks['TZ'][initial_electron_id],
        })
        selected_showers.append(selected_shower)
        selected_indeces.append(index)
    cut_number += 1

In [8]:
len(selected_showers)

2048

In [9]:
import pickle

In [10]:
with open('selected_showers.pickle', 'wb') as f:
    pickle.dump(selected_showers, f, protocol=pickle.HIGHEST_PROTOCOL)