In [None]:
import uproot
import awkward as ak

import matplotlib.pylab as plt
import numpy as np

import hist
from hist import Hist

import time

# References

## BaBar

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Find_Data

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Physics

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Physics/skims

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Available_Data

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Available_Lists#Composite_Particle_Lists

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Lambda_Lists

## Physics

https://pdglive.lbl.gov/Viewer.action

https://pdglive.lbl.gov/ParticleGroup.action?init=0&node=BXXX020

https://pdglive.lbl.gov/Particle.action?init=0&node=S033&home=BXXX040

https://pdglive.lbl.gov/ParticleGroup.action?init=0&node=MXXX045



## Code

### Uproot
https://uproot.readthedocs.io/en/latest/basic.html

### Awkward arrays
https://awkward-array.org/doc/main/

https://awkward-array.org/doc/main/getting-started/index.html (see left-hand navigation bar with different tutorials)

### Histogram

https://hist.readthedocs.io/en/latest/




# Open the file and extract the data

Using straight `uproot` and some of the "original" BaBar files

*Note! This is not the approach we will use for the full analysis, but it is useful for you to see how to do this with the ROOT files, in case you need to do some preliminary checks that I missed.*

In [None]:
filename = 'SP-1005-LambdaVeryVeryLoose-Run3-R24a2-v03_COMBINED.root'

f = uproot.open(filename)

# This extracts the ROOT ntuple (TTree)
# From this we can extract awkward arrays
t = f['ntp1']

print(type(t))

In [None]:
# Display the variable names

keys = t.keys()

keys

Print out the variables in the TTree/awkward array in a neater way. 

In [None]:
keys = t.keys()

max_len = 80
output = ""
for k in keys:
    if len(output)<max_len:
        output = f"{output}{k:20s} "
    else:
        print(output)
        output = f"{k:20s} "

# Exploring the data

In [None]:
# You can access the values in an TTree array as follow
# Note that in each case, we need the .array at the end to actually get the values. 

# Get the proton energy. 
x = t['penergy'].array()

print(x)

# Get the number of protons in each event using an awkward function

n = ak.num(x)

print(n)

# Interfacing with the files we will use for analysis

These files have already been processed and only the awkward arrays have been stored. 

These files are [parquet files](https://www.databricks.com/glossary/what-is-parquet).

In [None]:
filename = 'Background_SP_Run1_only.parquet'

start = time.time()

data = ak.from_parquet(filename)

print(f"Took {time.time() - start} s")

print(type(data))

We can print out the variable names in the same way, but we use the `fields` data member. 

In [None]:
keys = data.fields

max_len = 80
output = ""
for k in keys:
    if len(output)<max_len:
        output = f"{output}{k:20s} "
    else:
        print(output)
        output = f"{k:20s} "

We can access the values now without the `.values()` at the end. 

In [None]:
data['penergy']

In [None]:
# Or like this

data.penergy

## Some other examples

In [None]:
x = data['penergy']

# Need to flatten it before we plot, if it is multidimensional

x = ak.flatten(x)

plt.hist(x,bins=100);

In [None]:
# Plot only the first instance in each event

x = data['BMass'][:,0]

# This is only 1-dimensional so we don't need to flatten it. 
plt.hist(x,bins=100, range=(5,6));

# Histogram

In [None]:
# Get all the SP modes

x = data['spmode']

spmodes = np.unique(x.to_list())

print(spmodes)

In [None]:
# Create a histogram
h = Hist.new.Reg(100, 5.2, 5.3, name="BpostFitMes", label=r"M$_{ES}$ [GeV/c$^2$]") \
         .StrCat([], name="SP", label="SP modes", growth=True)\
         .StrCat([], name="cuts", label="Cuts", growth=True)\
         .Weight()

# Fill the histogram
for spmode in spmodes:
    mask = data.spmode == spmode
    x = data[mask]['BpostFitMes'][:,0]
    h.fill(BpostFitMes=x, SP=spmode, cuts=f"0", weight=1)

Display it in different ways

In [None]:
h[:,"998","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")

In [None]:
h.stack('SP')[:].project('BpostFitMes').plot(stack=True, histtype="fill")

plt.legend()