In [None]:
%load_ext autoreload
%autoreload 2

import uproot
import awkward as ak

import matplotlib.pylab as plt
import numpy as np

import hist
from hist import Hist

import time

import seaborn as sns
import pandas as pd

import os

from analysis_variables import *  

# References

## BaBar

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Find_Data

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Physics

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Physics/skims

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Available_Data

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Available_Lists#Composite_Particle_Lists

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Lambda_Lists

## Physics

https://pdglive.lbl.gov/Viewer.action

https://pdglive.lbl.gov/ParticleGroup.action?init=0&node=BXXX020

https://pdglive.lbl.gov/Particle.action?init=0&node=S033&home=BXXX040

https://pdglive.lbl.gov/ParticleGroup.action?init=0&node=MXXX045



## Code

### Uproot
https://uproot.readthedocs.io/en/latest/basic.html

### Awkward arrays
https://awkward-array.org/doc/main/

https://awkward-array.org/doc/main/getting-started/index.html (see left-hand navigation bar with different tutorials)

### Histogram

https://hist.readthedocs.io/en/latest/




# Open the file and extract the data

Using straight `uproot` and some of the "original" BaBar files

*Note! This is not the approach we will use for the full analysis, but it is useful for you to see how to do this with the ROOT files, in case you need to do some preliminary checks that I missed.*

In [None]:
# At Siena
topdir = '/mnt/qnap/babar_data/bnv_plambda'

# At home (Bellis)
#topdir = '/home/bellis/babar_data/bnv_plambda'

filename = f'{topdir}/SP-1005-LambdaVeryVeryLoose-Run3-R24a2-v03_COMBINED.root'

f = uproot.open(filename)

print(f.keys())

# This extracts the ROOT ntuple (TTree)
# From this we can extract awkward arrays
t = f['ntp1']

print(type(t))

In [None]:
# Display the variable names

keys = t.keys()

keys;

Print out the variables in the TTree/awkward array in a neater way. 

In [None]:
keys = t.keys()

max_len = 80
output = ""
for k in keys:
    if len(output)<max_len:
        output = f"{output}{k:20s} "
    else:
        print(output)
        output = f"{k:20s} "

# Exploring the data

In [None]:
# You can access the values in an TTree array as follow
# Note that in each case, we need the .array at the end to actually get the values. 

# Get the proton energy. 
x = t['penergy'].array()

print(x)

# Get the number of protons in each event using an awkward function

n = ak.num(x)

print(n)

In [None]:
t["nTRK"].array()

# Interfacing with the files we will use for analysis

These files have already been processed and only the awkward arrays have been stored. 

These files are [parquet files](https://www.databricks.com/glossary/what-is-parquet).

In [None]:
# Background
filename1 = f'{topdir}/Background_SP_modes_Only_Run_1.parquet'
#filename1 = f'{topdir}/Background_and_signal_SP_modes_All_runs.parquet'

# Signal
filename2 = f'{topdir}/Signal_SP_mode.parquet'

start = time.time()

data = ak.from_parquet(filename1)
signal= ak.from_parquet(filename2)

print(f"Took {time.time() - start} s")

#print(type(data))

We can print out the variable names in the same way, but we use the `fields` data member. 

In [None]:
keys = data.fields

max_len = 80
output = ""
for k in keys:
    if len(output)<max_len:
        output = f"{output}{k:20s} "
    else:
        print(output)
        output = f"{k:20s} "
print(output)


# this displays the arrays included in the file we read in up top.

We can access the values now without the `.values()` at the end. 

In [None]:
data['penergy']

In [None]:
# Or like this

data.penergy

## Some other examples

In [None]:
x = data['penergy']

# Need to flatten it before we plot, if it is multidimensional (if there are multiple events in each event)

x = ak.flatten(x)

plt.hist(x,bins=100);
x

In [None]:
# Plot only the first instance in each event

x = data['BMass'][:,0]

# This is only 1-dimensional so we don't need to flatten it. 
plt.hist(x,bins=100, range=(0,6));

## Histogram

In [None]:
# Get all the SP modes

x = data['spmode']

spmodes = np.unique(x.to_list())

print(spmodes)

In [None]:
# Create a histogram
h = Hist.new.Reg(100, 5.2, 5.3, name="BpostFitMes", label=r"M$_{ES}$ [GeV/c$^2$]") \
         .StrCat([], name="SP", label="SP modes", growth=True)\
         .StrCat([], name="cuts", label="Cuts", growth=True)\
         .Weight()

# Fill the histogram
for spmode in spmodes:
    sp_mask = data.spmode == spmode
    x = data[sp_mask]['BpostFitMes'][:,0]
    h.fill(BpostFitMes=x, SP=spmode, cuts=f"0", weight=1)

Display it in different ways

In [None]:
#h[:,"998","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")
#h[:,"-999","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")

In [None]:
#h[:,"1235","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")

In [None]:
#h.stack('SP')[:].project('BpostFitMes').plot(stack=True, histtype="fill")

#plt.legend()

Now for  the other thing!

In [None]:
# Create the histogram, this does not fill it in

j= Hist.new.Reg(100, -1,1, name= "BpostFitDeltaE", label= r"idk yet [GeV]?") \
        .StrCat([],name= "SP", label= "sp modes", growth= True)\
        .StrCat([], name= "cuts", label= "Cuts", growth= True)\
        .Weight()

# Fill 

for spmode in spmodes: 
    sp_mask2= data.spmode== spmode
    y= data[sp_mask2]["BpostFitDeltaE"][:,0]
    j.fill(BpostFitDeltaE=y, SP= spmode, cuts= f"0", weight=1)

In [None]:
#j[:,"998","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")
j[:,"998","0"].plot(histtype="fill", linewidth=1, edgecolor="grey")

In [None]:
j.stack('SP')[:].project('BpostFitDeltaE').plot(stack=True, histtype="fill")

plt.legend()

## Lambda 


Matplotlib histogram:

In [None]:
Lambda_mass= data.Lambda0_unc_Mass
Flight_length= data.Lambda0FlightLen

flat_Lambda_mass= ak.flatten(Lambda_mass)

Flight_length

In [None]:
plt.hist(flat_Lambda_mass, bins= 100);

hist class:

In [None]:
h_lambda_FL= Hist.new.Reg(100, -.25,10, name= "Lambda0FlightLen", label= r"flight length [cm]") \
        .StrCat([],name= "SP", label= "sp modes", growth= True)\
        .StrCat([], name= "cuts", label= "Cuts", growth= True)\
        .Weight()

h_lambda_mass= Hist.new.Reg(100,1.105, 1.126, name= "Lambda0_unc_Mass", label= r"Mass GeV") \
        .StrCat([],name= "SP", label= "sp modes", growth= True)\
        .StrCat([], name= "cuts", label= "Cuts", growth= True)\
        .Weight()




for spmode in spmodes: 
    modes= data.spmode== spmode
    x0 = data[modes]["Lambda0FlightLen"][:,0]
    x1 = data[modes]["Lambda0_unc_Mass"][:,0]

    cut_name = "org"
    h_lambda_FL.fill(Lambda0FlightLen=x0,   SP= spmode, cuts= cut_name, weight=1)
    h_lambda_mass.fill(Lambda0_unc_Mass=x1, SP= spmode, cuts= cut_name, weight=1)

    # First cut
    cut_name = "fl_gt_val"
    flight_length_cutter = data[modes]["Lambda0FlightLen"][:,0] > 2.0

    h_lambda_FL.fill(Lambda0FlightLen=x0[flight_length_cutter],   SP= spmode, cuts= cut_name, weight=1)
    h_lambda_mass.fill(Lambda0_unc_Mass=x1[flight_length_cutter], SP= spmode, cuts= cut_name, weight=1)


In [None]:
#h_lambda_FL[:,"998","org"].plot(histtype="fill", linewidth=1, edgecolor="grey")
#h_lambda_FL[:,"998","fl_gt_val"].plot(histtype="fill", linewidth=1, edgecolor="grey")

In [None]:
h_lambda_FL[:,:,'org'].stack('SP').project('Lambda0FlightLen')

In [None]:
#h_lambda_FL.show()

In [None]:
plt.figure(figsize=(16,8))

plt.subplot(1,2,1)
h_lambda_FL[:,:,'org'].stack('SP')[:].project('Lambda0FlightLen').plot(stack=True, histtype="fill")
h_lambda_FL[:,:,'fl_gt_val'].stack('SP')[:].project('Lambda0FlightLen').plot(stack=True, histtype="fill")

plt.legend()

plt.subplot(1,2,2)
h_lambda_mass[:,:,'org'].stack('SP')[:].project('Lambda0_unc_Mass').plot(stack=True, histtype="fill")
h_lambda_mass[:,:,'fl_gt_val'].stack('SP')[:].project('Lambda0_unc_Mass').plot(stack=True, histtype="fill")



In [None]:
plt.figure(figsize=(16,8))

sp = '1005'

plt.subplot(1,2,1)
h_lambda_FL[:,sp,'org'].project('Lambda0FlightLen').plot(histtype="fill")
h_lambda_FL[:,sp,'fl_gt_val'].project('Lambda0FlightLen').plot(histtype="fill")

#plt.legend()

plt.subplot(1,2,2)
h_lambda_mass[:,sp,'org'].project('Lambda0_unc_Mass').plot(histtype="fill")
h_lambda_mass[:,sp,'fl_gt_val'].project('Lambda0_unc_Mass').plot(histtype="fill")

plt.tight_layout()

In [None]:
h_lambda_FL[:,:,'org']

In [None]:
x= data["BMass"][:,0]

plt.hist(x, bins=50, range=(5,6));

# Next steps

* Plot `BpostFitMes` and `BpostFitDeltaE`
* Do individual histograms
* Do a 2D histogram (try with `Hist`)

Bellis will get you a signal file so you can see how the signal looks different from all of these. 

In [None]:
x = data['nTRK']

SP_1237 = data['spmode'] == '1237'

plt.hist(x[SP_1237],bins=26,range=(0,26));

In [None]:
data['spmode'];

In [None]:
BPFM= data["BpostFitMes"][:,0]
BPFDE= data["BpostFitDeltaE"][:,0]

plt.figure(figsize= (16,8))

plt.subplot(1,2,1)
plt.title("B post fit MES")
plt.hist(BPFM, bins= 100, range= (3.5,5.5));
plt.xlabel("Mass [GeV/c^2]")

plt.subplot(1,2,2)
plt.title("B post fit Delta E")
plt.hist(BPFDE, bins= 100, range=(-1,1));
plt.xlabel("E [GeV]")

In [None]:
plt.figure(figsize=(8, 8))

h= Hist(
    hist.axis.Regular(400,3,7,name= "BPFM", label= "mass [GeV/c^2]", flow= True),
    hist.axis.Regular(350,-.75,1,name= "BPFMDE", label= "energy [GeV]", flow= True),
)

# normal fill
h.fill(BPFM, BPFDE)

h.plot2d_full(
    main_cmap="coolwarm",
    top_ls="--",
    top_color="orange",
    top_lw=2,
    side_ls=":",
    side_lw=2,
    side_color="steelblue",
)

plt.xlim(5.1,5.3)
plt.ylim(-.5,.5)
plt.show()

In [None]:
events_in_range= sum(5.2<x<5.3 for x in BPFM) & sum(-.2<x<.2 for x in BPFDE)

total_frac= events_in_range/len(BPFM)

In [None]:
BPFM

In [None]:
plt.plot(BPFM, BPFDE, '.', markersize=0.5, alpha=0.1)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

Note that the mass shows a steady drop off (as is characteristic of using MES) and that the change in energy is symmetric about zero. 

The symmetricity about zero arises from the calculation of $\Delta E$= $E_{e^+e^-}-E_B$. Since $E_{e^+e^-}$ is an unchanging value and the mass of the B candidate should be close to that number. 

The drop off in the mass is a result of the same idea. For better resolution on the B mass, you can relate the momentum of the B to the energy of the beam, since the main particles produced in electron positron collisions at this energy are a B and an anti B. So, half of the energy of the beam goes into creating one B, and the other half goes into creating the B bar. The mass is calculated by the following: 

$M^2 = (E_p+E_\Lambda)^2-(p_p+p_\Lambda)^2$

where the $(E_p+E_\Lambda)$ is the energy of the B (given by the addition of its primary decay constituents) and the $(p_p+p_\Lambda)$ is the same but with the momentum instead of the energy. $(E_p+E_\Lambda)$ can be approximated as $\frac{1}{2}E_{e^+e^-}$!

## Scaling and cross section data

In [None]:
import pandas as pd

In [None]:
dataset_information = pd.read_csv("dataset_statistics.csv")
cs_data= pd.read_csv("SP_cross_sections_and_labels.csv")

In [None]:
dataset_information

In [None]:
Data_and_Skim_mask = (dataset_information['Data or MC'] == 'Data') & (dataset_information['Skim'] != 'LambdaVeryVeryLoose')
dataset_information[Data_and_Skim_mask]

In [None]:
nevents = dataset_information[Data_and_Skim_mask]['# of events (Data or MC)'].sum()
nevents

In [None]:
nevents_other = dataset_information[Data_and_Skim_mask]['# of events (Data or MC) NOT SURE WHICH NUMBER TO USE'].sum()
nevents_other

In [None]:
int_lumi = dataset_information[Data_and_Skim_mask]['Luminosity (Data only) 1/pb'].sum()
int_lumi

In [None]:
cs_data

In [None]:
spmodes = np.unique(cs_data['SP Mode'].values)

spmodes = spmodes[spmodes != 2400]

spmodes

In [None]:
SP_not_2400 = cs_data['SP Mode'] != 2400

expected_events = cs_data[SP_not_2400]['Cross section [nb]'] * int_lumi * 1000 # The 1000 is because cs is in nb and lumi is pb^-1

expected_events

In [None]:
tot_expected_events = expected_events.sum()
tot_expected_events

In [None]:
cs_data[SP_not_2400]

In [None]:
data_wo_SP2400 = cs_data[SP_not_2400]#["Cross section [nb]"]

print(type(data_wo_SP2400))

sns.barplot(data_wo_SP2400, x = 'LaTeX output', y = 'Cross section [nb]')
plt.xticks(rotation=60);

In [None]:
data_wo_SP2400 = cs_data[SP_not_2400]#["Cross section [nb]"]

print(type(data_wo_SP2400))

sns.barplot(data_wo_SP2400, x = 'SP Mode', y = 'Cross section [nb]')

In [None]:
dataset_information.columns

In [None]:
Data_and_Skim_mask = (dataset_information['Data or MC'] == 'Data') & (dataset_information['Skim'] != 'LambdaVeryVeryLoose')


sns.barplot(dataset_information[Data_and_Skim_mask], x = 'Run', y = 'Luminosity (Data only) 1/pb')

In [None]:
spmask = data['spmode']=='998'

x = data[spmask]['BpostFitMes'][:,0]
x

plt.hist(x,bins=100, range=(5., 5.3));

In [None]:
#len(data[data['spmode'] == '1005'])

In [None]:
dataset_information.columns

In [None]:
spmode = 1005

SP_and_Skim = (dataset_information['SP mode'] == spmode) & (dataset_information['Skim'] != 'LambdaVeryVeryLoose')

dataset_information[SP_and_Skim]['# of events (Data or MC)']

nevents_mc = dataset_information[SP_and_Skim]['# of events (Data or MC)'].sum()

# Number of SP events of this mode generated for all runs
nevents_mc

In [None]:
csmask = cs_data['SP Mode'] == spmode
cs = cs_data[csmask]['Cross section [nb]']

cs = cs.values[0]

# Cross section for this process
print(cs)

number_generated_in_data = cs * int_lumi * 1000
print(number_generated_in_data)

scaling = number_generated_in_data / nevents_mc

print(scaling)

In [None]:
spmask = data['spmode']== str(spmode)

x = data[spmask]['BpostFitMes'][:,0]
x

plt.hist(x,bins=100, range=(5., 5.3), weights=scaling*np.ones(len(x)));

## Scaling Function 

In [None]:
def scaling_value(spmode,plot= False, verbose= False):
    mode= spmode 
    
    mc_mask= (dataset_information["SP mode"]== mode) & (dataset_information["Skim"] != "LambdaVeryVeryLoose")
    # Pulls out all unskimmed MC with desired SP mode
    
    nevents_mc= dataset_information[mc_mask]["# of events (Data or MC)"].sum()
    # Sums the number of unskimmed MC events of the desired SP mode
                
    cs_mask= cs_data["SP Mode"]== mode

    cs= cs_data[cs_mask]["Cross section [nb]"]

    cs= cs.values[0]

    mask = (dataset_information['Data or MC'] == 'Data') & (dataset_information['Skim'] != 'LambdaVeryVeryLoose')
    int_lumi = dataset_information[mask]['Luminosity (Data only) 1/pb'].sum()
    
    n_exp_in_data= cs* int_lumi*1000 
    # Factor of 1000 arises from multiplying nanobarnes by 1/picobarnes
    
    scaling= n_exp_in_data/nevents_mc
    # MC is intentionally overgenerated, so to figure out how to weight the MC, divide the number generated by the total number of MC events

    
    if plot== True:
        spmask = data['spmode']== str(mode)
        x = data[spmask]['BpostFitMes'][:,0]#plt.xlim(5.1,5.3)
#plt.ylim(-.5,.5)
#plt.show()

        plt.hist(x,bins=100, range=(5., 5.3), weights=scaling*np.ones(len(x)));
        plt.title(f"Scaling value for SP-{mode}: {scaling:.4f}")

    if verbose== True: 
        print(f"- Cross section for this SP mode is    {cs} nb")
        print(f"- # of events generated for SP-{mode}: {nevents_mc:13d}")
        print(f"- Number expected in data:             {n_exp_in_data:.1f}")
        print(f"- Integrated Luminosity:               {int_lumi:.1f} 1/pb")
        print("The scaling value for this SP mode is: ")

    return scaling


In [None]:
# test of function
scaling_value(998, plot= True, verbose= True)

In [None]:
#dataset_information

## Integrated Luminosity Plot 

In [None]:
# Integrated Luminosity plot- For each run- the luminosity increases for each run, as BaBar was upgraded 

## pulls out all the data for the LambdaVVLoose skim. Info has to be data because it doesn't make sense to look at the luminosity of the Monte Carlo
## and we choose all the data that hasn't been skimmed because we want the full number produced before any cuts are made 
mask1 = (dataset_information['Data or MC'] == 'Data') & (dataset_information["Skim"]!= "LambdaVeryVeryLoose")
unskimmed_data= dataset_information[mask1]


lumis= []
for i in np.arange(0,6):
    luminosity= unskimmed_data["Luminosity (Data only) 1/pb"][i]
    lumis.append(luminosity)

b = [sum(lumis[:i+1]) for i in range(len(lumis))]

xvals= np.arange(1,7)

In [None]:
plt.plot(xvals, b,linewidth=3)
plt.plot(xvals, b, "o",color= "steelblue", markersize= 8)
plt.xlabel("Run #")
plt.ylabel("Luminosity [1/pb]")
plt.title("Luminosity over Time")
plt.ylim(0)

## LaTeX

In [None]:
### the start and end of the latex tables we'll be needing for the documentation 

def latex_table_start(fmt):
    output = ""
    output += "\\begin{table}\n"
    output += "\\begin{tabular}{" + f"{fmt}" + "}"

    return output

def latex_table_end(fmt):
    output = ""
    output += "\\end{tabular}\n"
    output += "\\end{table}\n"

    return output

x = latex_table_start('c | c l l')
print(x)

x = latex_table_end('c | c l l')
print(x)


### Real function (including table body) already in BaBar_tools.py 
### And probably in creating_latex_examples.py 
### The real function doesn't need a latex_table_start or end function, since we are using 
### pandas.to_latex()

## Masked Plots

In [None]:
#### This masks delta E and Mes in the range we care about 

# signal and corresp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onding masks 
sig_BPFDE= signal["BpostFitDeltaE"]
sig_BPFM= signal["BpostFitMes"]

mask1_sig= (sig_BPFDE>-.5) & (sig_BPFDE < .5)
mask2_sig= (sig_BPFM>5.2) & (sig_BPFM < 5.3)

# Background and corresponding masks 
bkg_BPFM= data["BpostFitMes"]
bkg_BPFDE= data["BpostFitDeltaE"]

mask1_bkg= (bkg_BPFDE>-.5) & (bkg_BPFDE < .5)
mask2_bkg= (bkg_BPFM>5.2) & (bkg_BPFM < 5.3)

# Even if the mask is applied to a single variable it depends on *both* variables ###### Very important 
mask_sig_plot = mask1_sig & mask2_sig
mask_bkg_plot = mask1_bkg & mask2_bkg

#signal masked
sig_BPFM= ak.flatten(signal["BpostFitMes"][mask_sig_plot])
sig_BPFDE= ak.flatten(signal["BpostFitDeltaE"][mask_sig_plot])

bkg_BPFM= ak.flatten(data["BpostFitMes"][mask_bkg_plot])
bkg_BPFDE= ak.flatten(data["BpostFitDeltaE"][mask_bkg_plot])


In [None]:
#### 2D histogram of Mes and Delta E-- Background 

plt.figure(figsize=(8, 8))

h= Hist(
    hist.axis.Regular(100,5.2,5.3,name= "bkg_BPFM", label= "M$_{ES}$ [GeV/c$^2$]", flow= True),
    hist.axis.Regular(100,-.5,.5,name= "bkg_BPFMDE", label= "$\Delta$E [GeV]", flow= True),
)

# normal fill
h.fill(bkg_BPFM, bkg_BPFDE)

h.plot2d_full(
    main_cmap="coolwarm",
    top_ls="--",
    top_color="orange",
    top_lw=2,
    side_ls=":",
    side_lw=2,
    side_color="steelblue",
)

plt.xlabel(plt.gca().get_xlabel(), fontsize=18)
plt.ylabel(plt.gca().get_ylabel(), fontsize=18)

plt.tight_layout()

plt.savefig('bkg_de_vs_mes.png')

In [None]:
#### 2D histogram of Mes and Delta E-- Signal 

plt.figure(figsize=(8, 8))

h= Hist(
    hist.axis.Regular(100,5.2,5.3,name= "sig_BPFM", label= "M$_{ES}$ [GeV/c$^2$]", flow= True),
    hist.axis.Regular(100,-.5,.5,name= "sig_BPFMDE", label= "$\Delta$E [GeV]", flow= True),
)

# normal fill
h.fill(sig_BPFM, sig_BPFDE)

h.plot2d_full(
    main_cmap="coolwarm",
    top_ls="--",
    top_color="orange",
    top_lw=2,
    side_ls=":",
    side_lw=2,
    side_color="steelblue",
)

#plt.xlim(5.1,5.3)
#plt.ylim(-.5,.5)
#plt.show()

plt.xlabel(plt.gca().get_xlabel(), fontsize=18)
plt.ylabel(plt.gca().get_ylabel(), fontsize=18)

plt.tight_layout()

plt.savefig('sig_de_vs_mes.png')

## Exploring Awkward Arrays 

In [None]:
ak.Array([1,2,3])

In [None]:
## Arrays can be multidimensional, and can be different sizes

ak.Array([[1,2,3],
          [4],
          [9,1,3,1,2,5],
         ])

In [None]:
## They can also contain None or missing values

ak.Array([
    [1,2,None],
    [4],
    [None,3],
    [5,3.2],
])

In [None]:
import datetime
import numpy as np

In [None]:
## They can also work with datetime objects!!! 

ak.Array([
    [np.datetime64("1815-12-10"), np.datetime64("2002-11-21")],
    [np.datetime64("2004-06-19")]
])

In [None]:
## and, of course, strings: 

ak.Array([
    ["Josie Swann", "Oliver the Cat"],
    ["Mitchell Swann", "Baloo and Otis the Cats"],
    ["Samantha Swann", "Baloo and Otis the Cats"],
    ["William Swann", "Moose the Dog"],
    ["Jerilyn Swann", "Romina the Cat"],
])

In [None]:
## as we have seen, they can also have dictionary-like structure 

ak.Array([
    [{"name":"Josie", "age":"21"},{"name":"Oliver", "age":"14"}],
    [{"name":"Earth", "age":"4.5e9"}]
])

In [None]:
## they can represent a bunch of weird, ragged data. They can store data that mixes types and records. 


ak.Array([
    [
        {
fom
            "name":"Josie Swann",
            "age": 21, 
            "Institutions":["Siena College", "Alcoa High School"],
            "Aliases": None,
        }
    ]
])

## the tutorial also said they contain unions, but idk what that means 

In [None]:
x= ak.Array([
    [1,2,3],
    [4,5,6]
]);

#ak.sum(x) # sums all entries
ak.sum(x, axis= -1) #sums entries by internal array
# axis= 0 sums by column 

In [None]:
po2= ak.Array([
    [1,2,4],
    [None,8],
    [16]
]);

In [None]:
ak.sum(po2, axis= -1)

In [None]:
nobel_prize_winner = ak.Array(
    [
        [
            {"name": "Benjamin List", "age": 53},
            {"name": "David MacMillan", "age": 53},
        ],
        [
            {"name": "Emmanuelle Charpentier", "age": 52},
            {"name": "Jennifer A. Doudna", "age": 57},
        ],
        [
            {"name": "Akira Yoshino", "age": 73},
            {"name": "M. Stanley Whittingham", "age": 79},
            {"name": "John B. Goodenough", "age": 98},
        ],
    ]
);

In [None]:
## awkward arrays are also useful for pulling apart structures

nobel_prize_winner.name
nobel_prize_winner.age

In [None]:
large_array = ak.Array([[1, 2, 3], [], [4, 5]] * 1_000_000)

ak.sum(large_array)

In [None]:
flat_array= ak.ravel(large_array)
flat_array

In [None]:
##for the full table you'd call the bat function 

dataset_information = pd.read_csv("dataset_statistics.csv")
cs_data= pd.read_csv("SP_cross_sections_and_labels.csv")

#cs_data
caption = "Cross section information for the different SP modes. Used to scale the SP accordingly to the integrated luminosity in the data sample."

j= cs_data[['SP Mode', 'LaTeX output', 'Cross section [nb]', 'Uncertainty']].to_latex( \
    float_format="%0.2f",  index=False)

#print(j)


full_table = "\\begin{table}\n" # initializes the table before the beginning of the tabular 
full_table += "\centering\n" # initializes the table before the beginning of the tabular 
full_table += "\\caption{" + f"{caption}" + "}\n" 
full_table += j #includes the converted dataframe in the table
full_table += "\\end{table}" # ends the table, same purpose as begin{table} 

print(full_table)

name= "cs_data.tex"
filename= f"table_{name}"
path= f"tables/{filename}"

outfile = open(path, 'w')
outfile.write(full_table)
outfile.close()

In [None]:
cs_data[['SP Mode', 'LaTeX output', 'Cross section [nb]', 'Uncertainty']].to_latex(float_format="%0.2f", index=False)

In [None]:
path

In [None]:
#open(f"{path}","x")

In [None]:
open(f"{path}","w").write(f"{j}")

In [None]:
# Table for Definition of blinding regions

## Generate blinding and fitting plots 

In [None]:
plt.figure(figsize=(8, 8))

h= Hist(
    hist.axis.Regular(400,3,7,name= "BPFM", label= "mass [GeV/c^2]", flow= True),
    hist.axis.Regular(350,-.75,1,name= "BPFMDE", label= "energy [GeV]", flow= True),
)

# normal fill
h.fill(BPFM, BPFDE)

h.plot2d_full(
    main_cmap="coolwarm",
    top_ls="--",
    top_color="orange",
    top_lw=2,
    side_ls=":",
    side_lw=2,
    side_color="steelblue",
)

plt.xlim(5.1,5.3)
plt.ylim(-.5,.5)
plt.show()

In [None]:
#### 2D histogram of Mes and Delta E-- Signal 

plt.figure(figsize=(8, 8))

h= Hist(
    hist.axis.Regular(100,region_definitions['fitting MES'][0],5.3,name= "sig_BPFM", label= "M$_{ES}$ [GeV/c$^2$]", flow= True),
    hist.axis.Regular(100,-.5,.5,name= "sig_BPFMDE", label= "$\Delta$E [GeV]", flow= True),
)

# normal fill
h.fill(sig_BPFM, sig_BPFDE)

h.plot2d_full(
    main_cmap="coolwarm",
    top_ls="--",
    top_color="orange",
    top_lw=2,
    side_ls=":",
    side_lw=2,
    side_color="steelblue",
)

#plt.xlim(5.1,5.3)
#plt.ylim(-.5,.5)
#plt.show()

plt.xlabel(plt.gca().get_xlabel(), fontsize=18)
plt.ylabel(plt.gca().get_ylabel(), fontsize=18)
plt.plot([5.2002,5.2002,5.3,5.3,5.2],[-0.2,0.2,0.2,-0.2,-0.2], "w-", linewidth= 4)
plt.plot([5.27,5.27,5.3,5.3,5.27],[-0.07,0.07,0.07,-0.07,-0.07], "k--", linewidth= 4)


plt.tight_layout()

plt.savefig('sig_de_vs_mes.png')

In [None]:
x = data['spmode']

spmodes = np.unique(x.to_list())

print(spmodes)

In [None]:
'''

plt.figure(figsize=(8, 8))

for sp in x: 
    while data.spmode == sp: 
        h= Hist(
            hist.axis.Regular(100,region_definitions['fitting MES'][0],region_definitions['fitting MES'][1],name= "sig_BPFM", label= "M$_{ES}$ [GeV/c$^2$]", flow= True),
            hist.axis.Regular(100,-.5,.5,name= "sig_BPFMDE", label= "$\Delta$E [GeV]", flow= True),
        )
        
        # normal fill
        h.fill(sig_BPFM, sig_BPFDE)
        
        h.plot2d_full(
            main_cmap="coolwarm",
            top_ls="--",
            top_color="orange",
            top_lw=2,
            side_ls=":",
            side_lw=2,
            side_color="steelblue",
        )
        
        #plt.xlim(5.1,5.3)
        #plt.ylim(-.5,.5)
        #plt.show()
        
        plt.xlabel(plt.gca().get_xlabel(), fontsize=18)
        plt.ylabel(plt.gca().get_ylabel(), fontsize=18)
        plt.plot([5.2002,5.2002,5.3,5.3,5.2],[-0.2,0.2,0.2,-0.2,-0.2], "w-", linewidth= 4)
        plt.plot([5.27,5.27,5.3,5.3,5.27],[-0.07,0.07,0.07,-0.07,-0.07], "k--", linewidth= 4)
        
        
        plt.tight_layout()
'''

In [None]:
sig_BPFDE= signal["BpostFitDeltaE"]
sig_BPFM= signal["BpostFitMes"]

mask1_sig= (sig_BPFDE>-.5) & (sig_BPFDE < .5)
mask2_sig= (sig_BPFM>5.2) & (sig_BPFM < 5.3)

# Background and corresponding masks 
bkg_BPFM= data["BpostFitMes"]
bkg_BPFDE= data["BpostFitDeltaE"]

mask1_bkg= (bkg_BPFDE>-.5) & (bkg_BPFDE < .5)
mask2_bkg= (bkg_BPFM>5.2) & (bkg_BPFM < 5.3)

# Even if the mask is applied to a single variable it depends on *both* variables ###### Very important 
mask_sig_plot = mask1_sig & mask2_sig
mask_bkg_plot = mask1_bkg & mask2_bkg

sig_sp_mask= signal["spmode"]== "1237"
bkg_sp_mask= data["spmode"]== "1237"

print(len(mask_sig_plot))
print(len(sig_sp_mask))

#signal masked
sig_BPFM= ak.flatten(signal["BpostFitMes"][mask_sig_plot][sig_sp_mask])
sig_BPFDE= ak.flatten(signal["BpostFitDeltaE"][mask_sig_plot][sig_sp_mask])

bkg_BPFM= ak.flatten(data["BpostFitMes"][mask_bkg_plot][bkg_sp_mask])
bkg_BPFDE= ak.flatten(data["BpostFitDeltaE"][mask_bkg_plot][bkg_sp_mask])

In [None]:
h= Hist(
    hist.axis.Regular(100,region_definitions['fitting MES'][0],region_definitions['fitting MES'][1],name= "sig_BPFM", label= "M$_{ES}$ [GeV/c$^2$]", flow= True),
    hist.axis.Regular(100,-.5,.5,name= "bkg_BPFMDE", label= "$\Delta$E [GeV]", flow= True),
)

# normal fill
h.fill(bkg_BPFM, bkg_BPFDE)

h.plot2d_full(
    main_cmap="coolwarm",
    top_ls="--",
    top_color="orange",
    top_lw=2,
    side_ls=":",
    side_lw=2,
    side_color="steelblue",
)

#plt.xlim(5.1,5.3)
#plt.ylim(-.5,.5)
#plt.show()

plt.xlabel(plt.gca().get_xlabel(), fontsize=18)
plt.ylabel(plt.gca().get_ylabel(), fontsize=18)
plt.plot([5.2002,5.2002,5.3,5.3,5.2],[-0.2,0.2,0.2,-0.2,-0.2], "w-", linewidth= 4)
plt.plot([5.27,5.27,5.3,5.3,5.27],[-0.07,0.07,0.07,-0.07,-0.07], "k--", linewidth= 4)


plt.tight_layout()

In [None]:
x

In [None]:
for mode in spmodes:
    sig_sp_mask= signal["spmode"]== mode
    bkg_sp_mask= data["spmode"]== mode
    
    
    #signal masked
    sig_BPFM= ak.flatten(signal["BpostFitMes"][mask_sig_plot][sig_sp_mask])
    sig_BPFDE= ak.flatten(signal["BpostFitDeltaE"][mask_sig_plot][sig_sp_mask])
    
    bkg_BPFM= ak.flatten(data["BpostFitMes"][mask_bkg_plot][bkg_sp_mask])
    bkg_BPFDE= ak.flatten(data["BpostFitDeltaE"][mask_bkg_plot][bkg_sp_mask])

    plt.figure()
    h= Hist(
    hist.axis.Regular(100,region_definitions['fitting MES'][0],region_definitions['fitting MES'][1],name= "sig_BPFM", label= "M$_{ES}$ [GeV/c$^2$]", flow= True),
    hist.axis.Regular(100,-.5,.5,name= "bkg_BPFMDE", label= "$\Delta$E [GeV]", flow= True),
    )
    
    # normal fill
    h.fill(bkg_BPFM, bkg_BPFDE)
    
    h.plot2d_full(
        main_cmap="coolwarm",
        top_ls="--",
        top_color="orange",
        top_lw=2,
        side_ls=":",
        side_lw=2,
        side_color="steelblue",
    )
    
    #plt.xlim(5.1,5.3)
    #plt.ylim(-.5,.5)
    #plt.show()
    
    plt.xlabel(plt.gca().get_xlabel(), fontsize=18)
    plt.ylabel(plt.gca().get_ylabel(), fontsize=18)
    plt.plot([5.2002,5.2002,5.3,5.3,5.2],[-0.2,0.2,0.2,-0.2,-0.2], "w-", linewidth= 4)
    plt.plot([5.27,5.27,5.3,5.3,5.27],[-0.07,0.07,0.07,-0.07,-0.07], "k--", linewidth= 4)
    
    
    plt.tight_layout()
    
    plt.savefig(f'BNV_pLambda_plots/plot_{mode}_bkg_de_vs_mes.png')