In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from uproot_io import Events, View

In [2]:
# Interaction type lookup
import csv
interaction_dictionary = {}
with open('interactions.csv') as f:
    reader = csv.DictReader(f)
    for row in reader:
        key = int(row.pop('Idx'))
        interaction = row.pop('Interaction')
        interaction_dictionary[key] = interaction
# print(interaction_dictionary)

In [3]:
events = Events("/Users/mohammed/code/PandoraRecoFile_1.root")

FileNotFoundError: file not found

    '/Users/mohammed/code/PandoraRecoFile_1.root'

Files may be specified as:
   * str/bytes: relative or absolute filesystem path or URL, without any colons
         other than Windows drive letter or URL schema.
         Examples: "rel/file.root", "C:\abs\file.root", "http://where/what.root"
   * str/bytes: same with an object-within-ROOT path, separated by a colon.
         Example: "rel/file.root:tdirectory/ttree"
   * pathlib.Path: always interpreted as a filesystem path or URL only (no
         object-within-ROOT path), regardless of whether there are any colons.
         Examples: Path("rel:/file.root"), Path("/abs/path:stuff.root")

Functions that accept many files (uproot.iterate, etc.) also allow:
   * glob syntax in str/bytes and pathlib.Path.
         Examples: Path("rel/*.root"), "/abs/*.root:tdirectory/ttree"
   * dict: keys are filesystem paths, values are objects-within-ROOT paths.
         Example: {"/data_v1/*.root": "ttree_v1", "/data_v2/*.root": "ttree_v2"}
   * already-open TTree objects.
   * iterables of the above.


In [None]:
def x(event_number):
    idx = np.where(events.event_number == event_number)[0]
    
    particleType = np.array([])
    E = np.array([])
    
    for i in idx:
        
        x = events.reco_hits_x_w[i]
        z = events.reco_hits_w[i]
        adc = events.reco_adcs_w[i]  
        coords = np.vstack((x, z, adc)).T
        asc = coords[coords[:, 0].argsort()]
        
        sx = asc[:, 0]
        sz = asc[:, 1]
        sadc = asc[:, 2]
        
        # Discarding particles with no hits
        if sx.size <= 30:
            continue
            
        else:
            
            # Get the last 30% of the energy data
            percent = int(0.7 * len(sadc))
            X = sadc[percent:]
            Xsum = np.sum(X)
            
            total = np.sum(sadc)
            result = Xsum / total
            
            E = np.append(E, result)
            
            # Finding the particle type
            pdg = events.mc_pdg[i]
            particleType = np.append(particleType, pdg)   
            
            
    return E, particleType

In [None]:
x(0)

In [None]:
def energy(event_number):
    """needs to return energy in last X% over total energy"""
    
    particleType = np.array([])
    E = np.array([])
    
    idx = np.where(events.event_number == event_number)[0]
    
    for i in idx:
        
        x = events.reco_hits_x_w[i]
        z = events.reco_hits_w[i]
        adc = events.reco_adcs_w[i]        
        
        # Discarding particles with no hits
        if x.size <= 30:
            continue
            
        else:
            
            # Get the last 30% of the energy data
            percent = int(0.9 * len(adc))
            X = adc[percent:]
            Xsum = np.sum(X)
            
            total = np.sum(adc)
            result = Xsum / total
            
            E = np.append(E, result)
            
            # Finding the particle type
            pdg = events.mc_pdg[i]
            particleType = np.append(particleType, pdg)   
            
            
    return E, particleType

In [None]:
energy(1)

In [None]:
random_numbers = np.random.randint(0, max(events.event_number), 1000)

In [None]:
e = []
p = []
for i in random_numbers:
    e, p = np.append(e, x(i)[0]), np.append(p, x(i)[1])

In [None]:
inf = np.where(np.isinf(e))

# Create a boolean mask to keep or remove indices
keep_indices = np.ones(len(e), dtype=bool)
keep_indices[inf] = False

# Use boolean indexing to get the modified array
e = e[keep_indices]
p = p[keep_indices]

In [None]:
e.shape, p.shape

In [None]:
df = pd.DataFrame({"energy": e, "particle": p})

In [None]:
# Using 0 to indicate tracks, 1 to indicate showers.

mapping = {
    11: 1,
    -11: 1,
    22: 1
}

df["class"] = df["particle"].map(mapping).fillna(0)

In [None]:
tracksQuery = df['class'] == 0.0
showersQuery = df['class'] == 1.0

ftracks = df[tracksQuery]
fshowers = df[showersQuery]

tracks = ftracks['energy'].values
showers = fshowers['energy'].values

In [None]:
n_bins = 40
hist2, bin_edges = np.histogram(showers, bins=n_bins)
hist1, _ = np.histogram(tracks, bins=bin_edges)

bin_widths = np.diff(bin_edges)
    
hist1_normalized = hist1 / len(tracks)
hist2_normalized = hist2 / len(showers)

plt.bar(bin_edges[:-1], hist1_normalized, width=bin_widths, align='edge', alpha=0.75, label='Tracks')
plt.bar(bin_edges[:-1], hist2_normalized, width=bin_widths, align='edge', alpha=0.75, label='Showers')
plt.title("Using energy to distinguish tracks and showers")
plt.ylabel("Normalised frequency density")
plt.xlabel("Energy")

plt.legend()
plt.show()