In [2]:
import numpy as np
import matplotlib.pyplot as plt
import csv
import pyhepmc
import pandas as pd

In [3]:
# Load the event info
event_info = pd.read_csv("genie_tau_decays_event_info.csv")
particle_info = pd.read_csv("genie_tau_decays_particles.csv")

## Analyze particles in the event

### 4-momentum conservation

In [4]:
particle_info.head(13)

Unnamed: 0,event_num,pdg,E,px,py,pz
0,0,1000080160,14.895081,0.0,0.0,0.0
1,0,-16,3.730323,0.18526,3.295873,1.737299
2,0,-15,3.251366,-0.023894,2.329548,1.409583
3,0,211,0.72649,-0.435732,-0.01364,0.564145
4,0,-16,2.524877,0.411767,2.343313,0.845184
5,1,1000080160,14.895081,0.0,0.0,0.0
6,1,16,3.725654,-1.656092,-2.943322,1.573121
7,1,15,2.846514,-0.845274,-1.491209,1.416815
8,1,-211,0.395154,-0.365821,-0.047146,0.024889
9,1,-211,2.062102,-0.284192,-1.660413,1.181121


In [6]:
# Check if there are any events where two taus are produced
# There should not be any
assert not (particle_info[np.abs(particle_info['pdg']) == 15].groupby("event_num").count() > 1).any().any()
taus = particle_info[np.abs(particle_info["pdg"]) == 15]
assert (taus["event_num"] == np.arange(len(taus))).all()

In [7]:
taus

Unnamed: 0,event_num,pdg,E,px,py,pz
2,0,-15,3.251366,-0.023894,2.329548,1.409583
7,1,15,2.846514,-0.845274,-1.491209,1.416815
14,2,15,3.332381,-2.127187,-1.737024,-0.636785
19,3,15,3.509529,-0.032933,-0.487587,-2.986784
26,4,15,3.555693,-0.230564,-0.218090,-3.063518
...,...,...,...,...,...,...
447098,76638,-15,1003.199894,-392.660182,-607.363024,-695.222992
447104,76639,15,5031.544231,-4776.946270,-1257.860435,-956.559277
447110,76640,-15,75.985685,56.916040,-50.301859,-0.977036
447116,76641,15,1999.051556,-973.289879,1030.714386,1409.446214


In [21]:
np.unique(
    np.isclose(
        particle_info
            .drop("pdg", axis=1)
            .groupby("event_num")
            .nth[3:]
            .groupby("event_num")
            .sum(), 
        taus
            .set_index("event_num")
            .drop("pdg", axis=1)
        , rtol=0.01)
    .sum(axis=1)
    , return_counts=True
)

(array([0, 1, 2, 3, 4]), array([18011,   568,    61,   493, 57510]))

The number in the first array indicates how many of the 4-momentum components are within the given relative tolerance (rtol), while the number in the second array correspond to how many events have that number of components within the given relative tolerance.

We have a large collection of 4 (all components close in value) and 0 (none of the components are close in value), with a few events in between.

It is good that most events are 4, since that means that the 4-momentum is conserved.

The ones with 0 correspond to when the electron is not in the event, but the electron neutrino and tau neutrino are.

We now want to check if only the electron is missing, or if there are other events that look strange.

In [13]:
no_e_events = (
    particle_info
    .groupby("event_num")
    .filter(
        lambda df: set(np.abs(df.iloc[3:, 1]).tolist()) != {12, 16}
    )
)

In [17]:
no_e_eventnum = no_e_events["event_num"].unique()
tau_no_e = taus[taus["event_num"].isin(no_e_eventnum)]

In [18]:
tau_no_e

Unnamed: 0,event_num,pdg,E,px,py,pz
2,0,-15,3.251366,-0.023894,2.329548,1.409583
7,1,15,2.846514,-0.845274,-1.491209,1.416815
19,3,15,3.509529,-0.032933,-0.487587,-2.986784
26,4,15,3.555693,-0.230564,-0.218090,-3.063518
31,5,-15,3.758840,0.426761,3.153622,-0.918877
...,...,...,...,...,...,...
447098,76638,-15,1003.199894,-392.660182,-607.363024,-695.222992
447104,76639,15,5031.544231,-4776.946270,-1257.860435,-956.559277
447110,76640,-15,75.985685,56.916040,-50.301859,-0.977036
447116,76641,15,1999.051556,-973.289879,1030.714386,1409.446214


In [24]:
np.unique(np.isclose(no_e_events.drop("pdg", axis=1).groupby("event_num").nth[3:].groupby("event_num").sum(), 
           tau_no_e.set_index("event_num").drop("pdg", axis=1), rtol=0.1).sum(axis=1), return_counts=True)

(array([0, 1, 3, 4]), array([ 3592,     7,    48, 57926]))

There are still quite a lot of events that are not conserved. These will have to be investigated further.

In [32]:
n_correct_4m_components = np.isclose(
    no_e_events.drop("pdg", axis=1).groupby("event_num").nth[3:].groupby("event_num").sum(), 
    tau_no_e.set_index("event_num").drop("pdg", axis=1), 
    rtol=0.1
).sum(axis=1)

incorrect_event_num = tau_no_e[n_correct_4m_components == 0]["event_num"]

In [44]:
remaining_incorrect_events = no_e_events[no_e_events["event_num"].isin(incorrect_event_num)]
remaining_incorrect_events.groupby("event_num").nth[3:]

Unnamed: 0,event_num,pdg,E,px,py,pz
85437,14462,16,0.455603,0.227081,0.366221,-0.147955
86869,14705,-16,1.741897,1.335788,-0.653086,0.907389
91144,15425,211,7.821918,4.661051,5.020981,3.771907
91145,15425,211,4.364382,2.741274,2.651072,2.117920
91146,15425,-16,1.072879,0.628553,0.511290,0.703260
...,...,...,...,...,...,...
447095,76637,16,1653.625432,1446.779545,-506.715352,620.117384
447111,76640,211,4.219130,3.109955,-2.847443,-0.042789
447112,76640,211,9.417698,7.102254,-6.182930,-0.053989
447113,76640,-16,29.920689,22.462416,-19.763846,0.279041


There seem to be mostly events where the tau decays to a tau neutrino and nothing else. 

I would guess that this is because the tau neutrino decays to two photons and a tau neutrino, and then the photons are not included.
Since there are two particles that are unknown, it is not possible to determine their 4-momenta. They of course share half the missing energy each, but their directions are not known.

In [43]:
# Example:
particle_info.query("event_num == 14462")

Unnamed: 0,event_num,pdg,E,px,py,pz
85434,14462,1000080160,14.895081,0.0,0.0,0.0
85435,14462,16,32.364764,6.430823,23.039574,-21.801387
85436,14462,15,30.744208,5.603906,22.255583,-20.379756
85437,14462,16,0.455603,0.227081,0.366221,-0.147955


We now want to check the other events:

In [53]:
remaining_incorrect_events2 = (
    remaining_incorrect_events
    .groupby("event_num")
    .filter(
        lambda df: not (
            (len(df.iloc[3:, 1]) == 1) 
            and (np.abs(df.iloc[3, 1]) == 16)
        )
    )
)

In [58]:
remaining_incorrect_events2.groupby("event_num").filter(lambda df: sorted(np.abs(df.iloc[3:, 1]).tolist()) != [16, 211, 211])

Unnamed: 0,event_num,pdg,E,px,py,pz
91904,15555,1000080160,14.895081,0.000000,0.000000,0.000000
91905,15555,16,47.054507,-7.615202,-19.700151,-42.048061
91906,15555,15,43.495663,-6.100936,-18.933855,-38.639401
91907,15555,111,9.426505,-0.809797,-3.802363,-8.586445
91908,15555,16,2.649729,-0.410884,-1.350791,-2.242232
...,...,...,...,...,...,...
447007,76619,2212,0.938270,0.000000,0.000000,0.000000
447008,76619,16,1168.397089,-1108.226844,182.089858,322.223995
447009,76619,15,559.871910,-530.591632,82.873574,158.296837
447010,76619,-211,21.691559,-20.524020,3.361495,6.161916


In [70]:
set(tuple(a.tolist()) for i, a in remaining_incorrect_events2.groupby("event_num").nth[3:].groupby("event_num")["pdg"])

{(-211, -211, 16),
 (-211, -16),
 (-211, 16),
 (-211, 211, 16),
 (-16, 211),
 (16, -211),
 (111, -211, 16),
 (111, -16),
 (111, 16),
 (111, 111, -16),
 (111, 111, 16),
 (111, 211, -16),
 (211, -211, -16),
 (211, -16),
 (211, 16),
 (211, 211, -16)}

Decay modes that seem to be strange:

16

12, 16

211, 16

111, 16

211, 211, 16

111, 211, 16

111, 111, 16

## Branching ratios
Check branching ratios of the various decay modes to see if it matches what we expect from e.g., PDG

In [72]:
from collections import Counter

In [81]:
c = Counter(tuple(sorted(a.tolist())) for i, a in particle_info.groupby("event_num").nth[2:].groupby("event_num")["pdg"])

In [83]:
br = {decay_products: n/(particle_info["event_num"].iloc[-1]+1) for decay_products, n in c.items()}
br

{(-16, -15, 211): 0.03516302858708558,
 (-211, -211, 15, 16, 211): 0.06908654410709393,
 (-12, 15, 16): 0.13414140886969456,
 (-211, 15, 16, 111, 111): 0.06821236120715525,
 (-211, 15, 16): 0.08157300731965085,
 (-16, -15, 111, 111, 211): 0.029343840924807223,
 (-16, -15, 12): 0.062484506086661534,
 (-211, 15, 16, 111): 0.18600524509739963,
 (-14, 13, 15, 16): 0.1315840976997247,
 (-16, -15, 111, 211): 0.0812859621883277,
 (-16, -15, -13, 14): 0.05478647756481349,
 (-211, -16, -15, 211, 211): 0.03064859152173062,
 (-12, 11, 15, 16): 0.0006001852745847632,
 (-16, -15, -11, 12): 0.0001957125895385097,
 (15, 16): 0.01298226843938781,
 (-16, -15): 0.007528410944248007,
 (-16, -15, 211, 211): 0.0005219002387693593,
 (15, 16, 111): 0.0058061401563091215,
 (-211, -16, -15, 211): 0.0009263729238156126,
 (-16, -15, 111, 111): 0.0003914251790770194,
 (-211, 15, 16, 211): 0.0017875083177850554,
 (-16, -15, 111): 0.002700833735631434,
 (15, 16, 111, 111): 0.0008219928760617409,
 (-211, -211, 15, 1

### Nucleus

In [9]:
nucleus = particle_info.groupby("event_num").first()

In [11]:
nucleus.pdg.value_counts()

pdg
1000080160    69412
2212           7231
Name: count, dtype: int64

In [12]:
nucleus.E.value_counts()

E
14.895081    69412
0.938270      7231
Name: count, dtype: int64

We mostly have interactions with the oxygen nucleus, but we also have a few with a proton, i.e., hydrogen nucleus. This looks reasonable.

The energy is also what we expect.

## Analyze event information
Just for sanity checks

In [100]:
event_info.sum(axis=0)

event_num    2.937036e+09
cc           7.664300e+04
dis          4.067000e+04
qel          1.594000e+04
res          2.001700e+04
xsec         8.392645e+06
dtype: float64

There seems to be the same order of magnitude of DIS, QEL and RES events, with a majority of DIS events.

In [102]:
event_info["cc"].all()

True

All events are CC, which is good since those are the only ones we are interested in.