In [None]:
import uproot
import awkward as ak

import matplotlib.pylab as plt
import numpy as np

import hist
from hist import Hist

import time

# References

## BaBar

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Find_Data

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Physics

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Physics/skims

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Available_Data

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Available_Lists#Composite_Particle_Lists

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Lambda_Lists

## Physics

https://pdglive.lbl.gov/Viewer.action

https://pdglive.lbl.gov/ParticleGroup.action?init=0&node=BXXX020

https://pdglive.lbl.gov/Particle.action?init=0&node=S033&home=BXXX040

https://pdglive.lbl.gov/ParticleGroup.action?init=0&node=MXXX045



## Code

### Uproot
https://uproot.readthedocs.io/en/latest/basic.html

### Awkward arrays
https://awkward-array.org/doc/main/

https://awkward-array.org/doc/main/getting-started/index.html (see left-hand navigation bar with different tutorials)

### Histogram

https://hist.readthedocs.io/en/latest/




# Open the file and extract the data

Using straight `uproot` and some of the "original" BaBar files

*Note! This is not the approach we will use for the full analysis, but it is useful for you to see how to do this with the ROOT files, in case you need to do some preliminary checks that I missed.*

In [None]:
topdir = '/mnt/qnap/babar_data/bnv_plambda'

filename = f'{topdir}/SP-1005-LambdaVeryVeryLoose-Run3-R24a2-v03_COMBINED.root'

f = uproot.open(filename)

print(f.keys())

# This extracts the ROOT ntuple (TTree)
# From this we can extract awkward arrays
t = f['ntp1']

print(type(t))

In [None]:
# Display the variable names

keys = t.keys()

keys

Print out the variables in the TTree/awkward array in a neater way. 

In [None]:
keys = t.keys()

max_len = 80
output = ""
for k in keys:
    if len(output)<max_len:
        output = f"{output}{k:20s} "
    else:
        print(output)
        output = f"{k:20s} "

# Exploring the data

In [None]:
# You can access the values in an TTree array as follow
# Note that in each case, we need the .array at the end to actually get the values. 

# Get the proton energy. 
x = t['penergy'].array()

print(x)

# Get the number of protons in each event using an awkward function

n = ak.num(x)

print(n)

In [None]:
t["nTRK"].array()

# Interfacing with the files we will use for analysis

These files have already been processed and only the awkward arrays have been stored. 

These files are [parquet files](https://www.databricks.com/glossary/what-is-parquet).

In [None]:
# Background
#filename = f'{topdir}/Background_SP_modes_Only_Run_1.parquet'

# Signal
filename = f'{topdir}/Signal_SP_mode.parquet'

start = time.time()

data = ak.from_parquet(filename)

print(f"Took {time.time() - start} s")

print(type(data))

We can print out the variable names in the same way, but we use the `fields` data member. 

In [None]:
keys = data.fields

max_len = 80
output = ""
for k in keys:
    if len(output)<max_len:
        output = f"{output}{k:20s} "
    else:
        print(output)
        output = f"{k:20s} "
print(output)

We can access the values now without the `.values()` at the end. 

In [None]:
data['penergy']

In [None]:
# Or like this

data.penergy

## Some other examples

In [None]:
x = data['penergy']

# Need to flatten it before we plot, if it is multidimensional

x = ak.flatten(x)

plt.hist(x,bins=100);
x

In [None]:
# Plot only the first instance in each event

x = data['BMass'][:,0]

# This is only 1-dimensional so we don't need to flatten it. 
plt.hist(x,bins=100, range=(0,6));

# Histogram

In [None]:
# Get all the SP modes

x = data['spmode']

spmodes = np.unique(x.to_list())

print(spmodes)

In [None]:
# Create a histogram
h = Hist.new.Reg(100, 5.2, 5.3, name="BpostFitMes", label=r"M$_{ES}$ [GeV/c$^2$]") \
         .StrCat([], name="SP", label="SP modes", growth=True)\
         .StrCat([], name="cuts", label="Cuts", growth=True)\
         .Weight()

# Fill the histogram
for spmode in spmodes:
    mask = data.spmode == spmode
    x = data[mask]['BpostFitMes'][:,0]
    h.fill(BpostFitMes=x, SP=spmode, cuts=f"0", weight=1)

Display it in different ways

In [None]:
#h[:,"998","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")
h[:,"-999","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")

In [None]:
#h[:,"1235","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")

In [None]:
#h.stack('SP')[:].project('BpostFitMes').plot(stack=True, histtype="fill")

#plt.legend()

Now for  the other thing!

In [None]:
# Create the histogram, this does not fill it in

j= Hist.new.Reg(100, -1,1, name= "BpostFitDeltaE", label= r"idk yet [GeV]?") \
        .StrCat([],name= "SP", label= "sp modes", growth= True)\
        .StrCat([], name= "cuts", label= "Cuts", growth= True)\
        .Weight()

# Fill 

for spmode in spmodes: 
    mask2= data.spmode== spmode
    y= data[mask2]["BpostFitDeltaE"][:,0]
    j.fill(BpostFitDeltaE=y, SP= spmode, cuts= f"0", weight=1)

In [None]:
#j[:,"998","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")
j[:,"-999","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")

In [None]:
j.stack('SP')[:].project('BpostFitDeltaE').plot(stack=True, histtype="fill")

plt.legend()

## Lambda 


Matplotlib histogram:

In [None]:
Lambda_mass= data.Lambda0_unc_Mass
Flight_length= data.Lambda0FlightLen

flat_Lambda_mass= ak.flatten(Lambda_mass)

Flight_length

In [None]:
plt.hist(flat_Lambda_mass, bins= 100);

hist class:

In [None]:
h_lambda_FL= Hist.new.Reg(100, -.25,10, name= "Lambda0FlightLen", label= r"flight length [cm]") \
        .StrCat([],name= "SP", label= "sp modes", growth= True)\
        .StrCat([], name= "cuts", label= "Cuts", growth= True)\
        .Weight()

h_lambda_mass= Hist.new.Reg(100,1.105, 1.126, name= "Lambda0_unc_Mass", label= r"Mass GeV") \
        .StrCat([],name= "SP", label= "sp modes", growth= True)\
        .StrCat([], name= "cuts", label= "Cuts", growth= True)\
        .Weight()




for spmode in spmodes: 
    modes= data.spmode== spmode
    x0 = data[modes]["Lambda0FlightLen"][:,0]
    x1 = data[modes]["Lambda0_unc_Mass"][:,0]

    cut_name = "org"
    h_lambda_FL.fill(Lambda0FlightLen=x0,   SP= spmode, cuts= cut_name, weight=1)
    h_lambda_mass.fill(Lambda0_unc_Mass=x1, SP= spmode, cuts= cut_name, weight=1)

    # First cut
    cut_name = "fl_gt_val"
    mask_fl = data[modes]["Lambda0FlightLen"][:,0] > 2.0

    h_lambda_FL.fill(Lambda0FlightLen=x0[mask_fl],   SP= spmode, cuts= cut_name, weight=1)
    h_lambda_mass.fill(Lambda0_unc_Mass=x1[mask_fl], SP= spmode, cuts= cut_name, weight=1)


In [None]:
h_lambda_FL[:,"998","org"].plot(histtype="fill", linewidth=1, edgecolor="grey")
h_lambda_FL[:,"998","fl_gt_val"].plot(histtype="fill", linewidth=1, edgecolor="grey")

In [None]:
h_lambda_FL[:,:,'org'].stack('SP').project('Lambda0FlightLen')

In [None]:
#h_lambda_FL.show()

In [None]:
plt.figure(figsize=(16,8))

plt.subplot(1,2,1)
h_lambda_FL[:,:,'org'].stack('SP')[:].project('Lambda0FlightLen').plot(stack=True, histtype="fill")
h_lambda_FL[:,:,'fl_gt_val'].stack('SP')[:].project('Lambda0FlightLen').plot(stack=True, histtype="fill")

plt.legend()

plt.subplot(1,2,2)
h_lambda_mass[:,:,'org'].stack('SP')[:].project('Lambda0_unc_Mass').plot(stack=True, histtype="fill")
h_lambda_mass[:,:,'fl_gt_val'].stack('SP')[:].project('Lambda0_unc_Mass').plot(stack=True, histtype="fill")



In [None]:
plt.figure(figsize=(16,8))

sp = '1005'

plt.subplot(1,2,1)
h_lambda_FL[:,sp,'org'].project('Lambda0FlightLen').plot(histtype="fill")
h_lambda_FL[:,sp,'fl_gt_val'].project('Lambda0FlightLen').plot(histtype="fill")

#plt.legend()

plt.subplot(1,2,2)
h_lambda_mass[:,sp,'org'].project('Lambda0_unc_Mass').plot(histtype="fill")
h_lambda_mass[:,sp,'fl_gt_val'].project('Lambda0_unc_Mass').plot(histtype="fill")

plt.tight_layout()

In [None]:
lambda_FL[:,:,'0']

In [None]:
x= data["BMass"][:,0]

plt.hist(x, bins=50, range=(5,6));

# Next steps

* Plot `BpostFitMes` and `BpostFitDeltaE`
* Do individual histograms
* Do a 2D histogram (try with `Hist`)

Bellis will get you a signal file so you can see how the signal looks different from all of these. 

In [None]:
x = data['nTRK']

mask = data['spmode'] == '1237'

plt.hist(x[mask],bins=26,range=(0,26));

In [None]:
data['spmode'];

In [None]:
BPFM= data["BpostFitMes"][:,0]
BPFDE= data["BpostFitDeltaE"][:,0]

plt.figure(figsize= (16,8))

plt.subplot(1,2,1)
plt.title("B post fit MES")
plt.hist(BPFM, bins= 100, range= (3.5,5.5));
plt.xlabel("Mass [GeV/c^2]")

plt.subplot(1,2,2)
plt.title("B post fit Delta E")
plt.hist(BPFDE, bins= 100, range=(-1,1));
plt.xlabel("E [GeV]")

In [None]:
plt.figure(figsize=(8, 8))

h= Hist(
    hist.axis.Regular(400,3,7,name= "BPFM", label= "mass [GeV/c^2]", flow= True),
    hist.axis.Regular(350,-.75,1,name= "BPFMDE", label= "energy [GeV]", flow= True),
)

# normal fill
h.fill(BPFM, BPFDE)

h.plot2d_full(
    main_cmap="coolwarm",
    top_ls="--",
    top_color="orange",
    top_lw=2,
    side_ls=":",
    side_lw=2,
    side_color="steelblue",
)

plt.xlim(5.2,5.3)
plt.ylim(-.4,.4)
plt.show()

In [None]:
BPFM= data["BpostFitMes"][:,0]
BPFDE= data["BpostFitDeltaE"][:,0]

plt.plot(BPFM, BPFDE, '.', markersize=0.5, alpha=0.1)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

Note that the mass shows a steady drop off (as is characteristic of using MES) and that the change in energy is symmetric about zero. 

The symmetricity about zero arises from the calculation of $\Delta E$= $E_{e^+e^-}-E_B$. Since $E_{e^+e^-}$ is an unchanging value and the mass of the B candidate should be close to that number. 

The drop off in the mass is a result of the same idea. For better resolution on the B mass, you can relate the momentum of the B to the energy of the beam, since the main particles produced in electron positron collisions at this energy are a B and an anti B. So, half of the energy of the beam goes into creating one B, and the other half goes into creating the B bar. The mass is calculated by the following: 

$M^2 = (E_p+E_\Lambda)^2-(p_p+p_\Lambda)^2$

where the $(E_p+E_\Lambda)$ is the energy of the B (given by the addition of its primary decay constituents) and the $(p_p+p_\Lambda)$ is the same but with the momentum instead of the energy. $(E_p+E_\Lambda)$ can be approximated as $\frac{1}{2}E_{e^+e^-}$!