# Energy Flow Notebook
### Goal:
The goal of this notebook is to simply set up the energyflow code such that we can copy out data into the same format and run a simple model
### Implementation: 
For now just pull charged pion data, we have yet to add the neutral pion and shuffle them
#### To Do:
- Add neutral pion data in the same manner as charged pion and shuffle them

- No idea as of yet how to incorporate the tracking information. Speculation is that this will be a separate network, and we will concatenate the results.
  - Update on this: For now, try adding the track as just an extra point in the point cloud

In [38]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize
import uproot as ur
import awkward as ak
import time as t
import copy
print("Awkward Array Version: "+str(ak.__version__))
print("Uproot Version: "+str(ur.__version__))

import energyflow as ef
from energyflow.archs import EFN
from energyflow.datasets import qg_jets
from energyflow.utils import data_split, to_categorical

from sklearn.metrics import roc_auc_score, roc_curve
import tensorflow as tf

Awkward Array Version: 1.2.2
Uproot Version: 4.0.7


#### TO DO: Make a nested git for utils and download the new version with graph_util

In [None]:
# import sys
# sys.path.append('/home/russbate/UBCPionML/LCStudies')
# from util import graph_util as gu

In [35]:
def find_sampling_events(_ak_cluster_cell_ID, _cellGeoID, _cellGeoSampling, _filter):
    ''' Inputs:
    _ak_cluster_cell_ID: awkward array with nested arrays of all the cell geometry IDs
    _cellGeoID: list of the cell IDs to find the index from, expects numpy format
    _cellGeoSampling: same as _cellGeoID but the sampling layer
    _filter: the numbered index of calorimetry layers
    '''

    # create unique list of sampling layers for every event
    _event_layers = []
    # list of event indices with one or more clusters in EMB1-3 or TileBar0-2
    _barrel_events = []
    
    for _evt_idx in range(len(_ak_cluster_cell_ID)):
        _cluster_cell_ID = ak_cluster_cell_ID["cluster_cell_ID"][_evt_idx]

        # create unique list of sampling layers for every cluster
        _event_layers = []

        for j in range(len(_cluster_cell_ID)):
            ''' loop through each cluster of the event '''

            for l in range(len(_cluster_cell_ID[j])):
                ''' loop through the cluster elements searching for new layer numbers'''
                # grab the first cell_geo_ID, l-th element of the j-th cluster
                _cellGeoTag = _cluster_cell_ID[j][l]

                # find the index of the cell_geo_ID, use [0][0] becuase of np.where return
                _cellGeo_idx = np.argwhere(np.asarray(_cellGeoID == _cellGeoTag))

                # find the sampling layer based on the index
                _this_layer = _cellGeoSampling[_cellGeo_idx]

                if not np.isin(_this_layer, _event_layers):
                    _event_layers.append(_this_layer)      

        # add event to list if one is in the barrel layer
        if np.any(np.isin(_event_layers, _filter)):
            _barrel_events.append(_evt_idx)

    return np.array(_barrel_events)

#### Events

In [2]:
cPionEvents = '/fast_scratch/atlas_images/v01-45/pipm/'
nPionEvents = '/fast_scratch/atlas_images/v01-45/pi0/'

In [3]:
cPion_502 = ur.open(cPionEvents+'user.angerami.24559744.OutputStream._000502.root')
nPion_502 = ur.open(nPionEvents+'user.angerami.24559740.OutputStream._000502.root')

In [4]:
cPionEvents = cPion_502["EventTree"]
nPionEvents = nPion_502["EventTree"]

#### Cell Geometry

In [5]:
cellGeoTree = cPion_502["CellGeo"]

### Load & Show Trees 
#### Events - Print particle ID for reference

In [12]:
# Show keys
cPionEvents.show(name_width=42,
                interpretation_width=60)

## Event information
## Charged Pions
ak_cluster_nCells = cPionEvents.arrays(filter_name="cluster_nCells")
ak_cluster_cell_ID = cPionEvents.arrays(filter_name="cluster_cell_ID")
ak_cluster_cell_E = cPionEvents.arrays(filter_name="cluster_cell_E")
nCluster = cPionEvents.arrays(library='np')["nCluster"]
eventNumber = cPionEvents.arrays(library='np')["eventNumber"]
nTrack = cPionEvents.arrays(library='np')["nTrack"]
cPionPart = cPionEvents.arrays(library='np')["nTruthPart"]
cPionId = cPionEvents.arrays(library='np')["truthPartPdgId"]
ak_cluster_Eta = cPionEvents.arrays(filter_name="cluster_Eta")
ak_cluster_Phi = cPionEvents.arrays(filter_name="cluster_Phi")

name                                       | typename                 | interpretation                                              
-------------------------------------------+--------------------------+-------------------------------------------------------------
runNumber                                  | int32_t                  | AsDtype('>i4')
eventNumber                                | int64_t                  | AsDtype('>i8')
lumiBlock                                  | int32_t                  | AsDtype('>i4')
coreFlags                                  | uint32_t                 | AsDtype('>u4')
mcEventNumber                              | int32_t                  | AsDtype('>i4')
mcChannelNumber                            | int32_t                  | AsDtype('>i4')
mcEventWeight                              | float                    | AsDtype('>f4')
nTruthPart                                 | int32_t                  | AsDtype('>i4')
G4PreCalo_n_EM                        

In [39]:
## Neutral Pions
nPionPart = nPionEvents.arrays(library='np')["nTruthPart"]
nPionId = nPionEvents.arrays(library='np')["truthPartPdgId"]

print("If single particle gun, then this should all be ones")
print("Charged Pions: ")
print(cPionPart[0:50])
print("Neutral Pions: ")
print(nPionPart[0:50]);print('.. pew pew ..'); print()

print("Check for particle data group identification of pi plus or pi minus")
print(cPionId[0:5]);print()
print("Check for particle data group identification of pi0")
print(nPionId[0:5])

If single particle gun, then this should all be ones
Charged Pions: 
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1]
Neutral Pions: 
[3 3 3 3 3 3 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3]
.. pew pew ..

Check for particle data group identification of pi plus or pi minus
[array([-211], dtype=int32) array([211], dtype=int32)
 array([211], dtype=int32) array([211], dtype=int32)
 array([-211], dtype=int32)]

Check for particle data group identification of pi0
[array([111,  22,  22], dtype=int32) array([111,  22,  22], dtype=int32)
 array([111,  22,  22], dtype=int32) array([111,  22,  22], dtype=int32)
 array([111,  22,  22], dtype=int32)]


Notes: Clearly something is up here that we need to investigate further.

#### Cell Geometry

In [13]:
cellGeoTree.show(name_width=42,
                interpretation_width=50)

name                                       | typename                 | interpretation                                    
-------------------------------------------+--------------------------+---------------------------------------------------
cell_geo_ID                                | std::vector<uint64_t>    | AsJagged(AsDtype('>u8'), header_bytes=10)
cell_geo_sampling                          | std::vector<uint16_t>    | AsJagged(AsDtype('>u2'), header_bytes=10)
cell_geo_eta                               | std::vector<float>       | AsJagged(AsDtype('>f4'), header_bytes=10)
cell_geo_phi                               | std::vector<float>       | AsJagged(AsDtype('>f4'), header_bytes=10)
cell_geo_rPerp                             | std::vector<float>       | AsJagged(AsDtype('>f4'), header_bytes=10)
cell_geo_deta                              | std::vector<float>       | AsJagged(AsDtype('>f4'), header_bytes=10)
cell_geo_dphi                              | std::vector<float>       

In [23]:
# Geometry information
cell_geo_ID = cellGeoTree.arrays(filter_name='cell_geo_ID')[0]["cell_geo_ID"].to_numpy()
cell_geo_eta = cellGeoTree.arrays(filter_name='cell_geo_eta')[0]["cell_geo_eta"].to_numpy()
cell_geo_phi = cellGeoTree.arrays(filter_name='cell_geo_phi')[0]["cell_geo_phi"].to_numpy()
cell_geo_rPerp = cellGeoTree.arrays(filter_name='cell_geo_rPerp')[0]["cell_geo_rPerp"].to_numpy()
cell_geo_deta = cellGeoTree.arrays(filter_name='cell_geo_deta')[0]["cell_geo_deta"].to_numpy()
cell_geo_dphi = cellGeoTree.arrays(filter_name='cell_geo_dphi')[0]["cell_geo_dphi"].to_numpy()
cell_geo_sampling = cellGeoTree.arrays(filter_name='cell_geo_sampling')[0]["cell_geo_sampling"].to_numpy()

### Find Barrel Events

In [36]:
## Find these events for neutral pions
barrel_filter = np.array([1,2,3,12,13,14])

barrel_events = find_sampling_events(_ak_cluster_cell_ID = ak_cluster_cell_ID,
                                     _cellGeoID = cell_geo_ID, _cellGeoSampling = cell_geo_sampling,
                                     _filter = barrel_filter)
print(barrel_events[0:10])

[ 0  2  3  6  7 11 14 20 22 26]


### Create labels for neutral vs charged pions
#### TO DO: Investigate 'one hot' data representation for classifiers

### Shuffle data points