In [None]:
%load_ext autoreload
%autoreload 2
    
import uproot
import awkward as ak

import matplotlib.pylab as plt

import numpy as np

import hist
from hist import Hist

import time

import myPIDselector

import math

import babar_analysis_tools as bat

from analysis_variables import *


# PID selectors and masks

For reference

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Physics/PID/PID_selectors_table


## Calculate the bits for the selector map for tracks

There are two functions we'll make use of in `babar_analysis_tools`

### `calculate_bits_for_PID_selector`

This function takes two or three inputs: 
* `trkidx` which is the index of the (charged) tracks to which a particle (e.g. proton) points back to
* `trk_selector_map` which is the bit-packed representation of the selectors for each track for a given particle hypothesis (proton, pion, etc.)
* `verbose` if you want more output set this to 1

It returns an integeer that represents the binary representation of the selectors as an `awkward` array. For example, `11001` or `1000001` but there are no leading zeros.


### `mask_PID_selection`

This function takes three inputs
* `bits` the binary representation of the selectors for a given track. This is the output of `calculate_bits_for_PID_selector`.
* `selector` is the particular selector you want to see if the tracks passed (e.g. `TightKMProtonSelection`)
* `pid_map_object` is the `PIDselector` class written by Bellis and located in `myPIDselector.py`. It contains information about what selectors are available for each particle hypothesis and what bit locations they map on to.

## Examples of usage

First grab a parquet file.


In [None]:
# Parquet

# This is what we have at Siena
#topdir = '/mnt/qnap/babar_data/bnv_plambda'

# On Bellis' laptop
topdir = '/home/bellis/babar_data/bnv_plambda/'

#Josie Laptop 
#topdir = "/Users/josieswann/Desktop/important documents"

# Background
#filename = f'{topdir}/Background_SP_modes_Only_Run_1.parquet'
filename = f'{topdir}/Background_and_signal_SP_modes_Only_Run_1.parquet'
#filename = f'{topdir}/Background_and_signal_SP_modes_All_runs.parquet'

# Signal
#filename = f'{topdir}/Signal_SP_mode.parquet'

start = time.time()

data = ak.from_parquet(filename)

print(f"Took {time.time() - start} s")

print(type(data))

'''
# Collision data
#filename = f'{topdir}/Background_SP_modes_Only_Run_1.parquet'
filename = f'{topdir}/Data_Only_Run_1_BLINDED.parquet'
#filename = f'{topdir}/Data_All_runs_BLINDED.parquet'
#filename = f'{topdir}/Data_All_runs_BLINDED.parquet'

start = time.time()

data_collision = ak.from_parquet(filename)

print(f"Took {time.time() - start} s")

print(type(data_collision))
'''

In [None]:
np.unique(data['spmode'].to_list())

### Using `calculate_bits_for_PID_selector`

Get the bits for the proton hypothesis for all the protons in our event.

Note that the number of protons is probably less than (definitely not more than!) the number of tracks. 

The number of protons just comes from our decay chain hypothesis. 

In [None]:
print("protons ====================")

# Get the track index for the protons 
# For everything we call a proton, this tells you where in the track list you would 
# find that charged track
trkidx = data['pTrkIdx']
print(f"trkidx: \n{trkidx}\n")

# We can get the selector map for all tracks for protons (for example)
trk_selector_map = data['pSelectorsMap']
print(f"trk_selector_map:\n{trk_selector_map}\n")

# Unpack the bits 
print("Unpack the bits...")
# Note that we are running the function with verbose=1 so there is some output from the function
pbits = bat.calculate_bits_for_PID_selector(trkidx, trk_selector_map, verbose=1)
print(f"pbits\n{pbits}\n")

print("Double check that the trk selector map and the returned `pbits` both have the same number of entries")
print(len(trk_selector_map))
print(len(pbits))
print()

## Using `mask_PID_selection`

We'll use this to create a mask for events based on whether or not the the protons pass some selector.

In [None]:
# We will use this
pps = myPIDselector.PIDselector("p")

pps.selectors

In [None]:
# Comment and uncomment the following to try different selectors.
#selector = 'LooseKMProtonSelection'
#selector = 'TightKMProtonSelection'
selector = 'VeryLooseLHProtonSelection'

print(f"What protons pass the {selector} threshold")

# We explained this part in the previous cell
trkidx = data['pTrkIdx']
print(f"trkidx: \n{trkidx}\n")

# We can get the selector map for all tracks for protons (for example)
trk_selector_map = data['pSelectorsMap']
print(f"trk_selector_map:\n{trk_selector_map}\n")

# Make use of the `calculate_bits_for_PID_selector` function, same as previous cell
# but we'll set the verbosity to 0
pbits = bat.calculate_bits_for_PID_selector(trkidx, trk_selector_map, verbose=0)
print(f"pbits\n{pbits}\n")

# Now we will use this to make a mask. First we need to get the selector information
# from our homegrown class.
pps = myPIDselector.PIDselector("p")

# Use this and the bits we calculated 
mask_pid = bat.mask_PID_selection(pbits, selector, pps)
print(f"mask_pid\n{mask_pid}\n")

print()
print("Before the mask")
print(data['penergy'])
print(ak.num(data['penergy']))
print(ak.num(data['penergy'], axis=0))
print(ak.sum(ak.num(data['penergy'])))
print()

print("After the mask")
print(data['penergy'][mask_pid])
print(ak.num(data['penergy'][mask_pid]))
print(ak.num(data['penergy'][mask_pid], axis=0))
print(ak.sum(ak.num(data['penergy'][mask_pid])))
print()

## Masking decay products

Now let's use this to mask decay products of our Lambda.

Each Lambda has two decay products: a proton and a pion. 

We access the index of the decay products using `Lambda0d1Idx` (or `Lambda0d2Idx`).

We access the Lund ID of the decay products using `Lambda0d1Lund` (or `Lambda0d2Lund`). ([reference for Lund ID scheme](https://pdg.lbl.gov/2007/reviews/montecarlorpp.pdf) )

We can then use this as we did in the previous cells. 

In [None]:
# Get the index of the decay products and the Lund IDs to see what we have. 
d1idx = data['Lambda0d1Idx']#[:,0]
d2idx = data['Lambda0d2Idx']#[:,0]
d1lund = data['Lambda0d1Lund']#[:,0]
d2lund = data['Lambda0d2Lund']#[:,0]

print("Daughter 1...")
print("d1idx")
print(d1idx)
print("d1lund")
print(d1lund)
print()

print("Daughter 2...")
print("d2idx")
print(d2idx)
print("d2lund")
print(d2lund)
print()

# It looks like the first daughter is always the proton and the second daughter
# is always the pion. 
print("\nGet the bits for the protons...")

# We explained this part in the previous cell
trkidx = data['pTrkIdx']
print(f"trkidx: \n{trkidx}\n")

# We can get the selector map for all tracks for protons (for example)
trk_selector_map = data['pSelectorsMap']
print(f"trk_selector_map:\n{trk_selector_map}\n")

# Calculate the binary representation
pbits = bat.calculate_bits_for_PID_selector(trkidx, trk_selector_map, verbose=0)
print(pbits)
print(pbits[d1idx])
print()

selector = 'LooseKMProtonSelection'
#selector = 'VeryTightKMProtonSelection'

print(f"Now trying to create a mask with {selector}")

# Now we will use this to make a mask. First we need to get the selector information
# from our homegrown class.
pps = myPIDselector.PIDselector("p") # This is a helpful toolbox!

# Use this and the bits we calculated 
mask_pid = bat.mask_PID_selection(pbits, selector, pps)
print(f"mask_pid\n{mask_pid}\n")
print()

# Need to use these for the protons that came from the Lambda
passing_pbits = pbits[d1idx][mask_pid]
print("\nGet the masked bits of the first daughter of the Lambda...")
print(pbits[d1idx][mask_pid])
print()


# Use the mask for the Lambda0 decay products 
mass = data['Lambda0_unc_Mass']

print("Lambda0_unc_Mass")
print(mass)
print()

# Make some plots!

plt.figure(figsize=(12,5));
plt.subplot(1,2,1)
plt.hist(ak.flatten(mass),bins=100)
plt.hist(ak.flatten(mass[mask_pid]),bins=100)

print(len(ak.flatten(mass)))
print(len(ak.flatten(mass[mask_pid])))
print()

plt.subplot(1,2,2)

# Weirdness

print("Try this for the B meson")

Bmes = data['BpostFitMes']#[:,0]
FL = data['Lambda0FlightLen']#[:,0]
print(FL)
print(Bmes)

# For now, we do this because of weird second Lambda
#mask_fl = (data['Lambda0FlightLen']>=0)

#mask = mask_pid

plt.hist(ak.flatten(Bmes),bins=100, range=(5,5.3))
plt.hist(ak.flatten(Bmes[mask_pid]),bins=100, range=(5,5.3))

# Or maybe just this?
#plt.hist(Bmes[:,0],bins=100, range=(5,5.3))
#plt.hist(Bmes[mask_][:,0],bins=100, range=(5,5.3))


print(len(ak.flatten(Bmes)))
print(len(ak.flatten(Bmes[mask_pid])))

;

# Antiproton antimask

In [None]:
pps.selectors

In [None]:
import babar_analysis_tools as bat

#spmask = data['spmode']=='-999'
spmask = data['spmode']=='998'
#spmask = data['spmode']=='1005'
#spmask = data['spmode']=='991'

pps = myPIDselector.PIDselector("p")

# To test
selectors_to_test = ['SuperLooseKMProtonSelection',
 'VeryLooseKMProtonSelection',
 'LooseKMProtonSelection',
 'TightKMProtaonSelection',
 'VeryTightKMProtonSelection',
 'SuperTightKMProtonSelection']

selector_to_test = "TightKMProtonSelection"

mask_no_antiprotons, ct = bat.build_antiproton_antimask(data[spmask], pps, selector_to_test, verbose=1)
print('\n',ct,'\n')
    
#test = ak.any(ct, axis=-1)
    
print('\n',mask_no_antiprotons,'\n')

print(selector_to_test,len(mask_no_antiprotons[mask_no_antiprotons]),len(mask_no_antiprotons),'\n')


In [None]:
import babar_analysis_tools as bat

#spmask = data['spmode']=='-999'
spmask = data['spmode']=='998'
#spmask = data['spmode']=='1005'
#spmask = data['spmode']=='991'

# 998 and 1000 to 1002 show that I am not counting things correctly. 
# Maybe works npe? 8/11/2024

pps = myPIDselector.PIDselector("p")

# To test
selectors_to_test = ['SuperLooseKMProtonSelection',
 'VeryLooseKMProtonSelection',
 'LooseKMProtonSelection',
 'TightKMProtonSelection',
 'VeryTightKMProtonSelection',
 'SuperTightKMProtonSelection']

for selector_to_test in selectors_to_test:
    mask_no_antiprotons, ct = bat.build_antiproton_antimask(data[spmask], pps, selector_to_test, verbose=0)
    #print('\n',ct,'\n')
    
    #test = ak.any(ct, axis=-1)
    
    #print('\n',test,'\n')
    ncut = len(mask_no_antiprotons[mask_no_antiprotons])
    nkeep = len(mask_no_antiprotons[~mask_no_antiprotons])
    norg = len(mask_no_antiprotons)
    print(f'{selector_to_test:36s} {ncut}  {nkeep}   {norg}    {100*nkeep/norg:.2f}')


# Test of Lambda0 selection

Do cuts on both flight length and mass

In [None]:
from analysis_variables import *

In [None]:
#spmode = '-999'
spmode = '998'

#spmodes = ['-999', '998', '1005', '1235', '1237', '991']
spmodes = ['998']#, '1005', '1235', '1237']

plt.figure(figsize=(12,12))

print(region_definitions['Lambda0 flightlen'])
lo = region_definitions['Lambda0 mass'][0]
hi = region_definitions['Lambda0 mass'][1]
#lam_world_average = region_definitions['


for idx,spmode in enumerate(spmodes):
    
    mask_sp = data['spmode']==spmode
    
    mask_lam, mask_event_lam = bat.get_lambda0_mask(data[mask_sp], region_definitions, flightlenvar='Lambda0postFitFlight')
    
    mask_duplicates = data[mask_sp]['Lambda0postFitFlight'] >= 0
    
    mass = data[mask_sp]['Lambda0_unc_Mass']

    norg = len(ak.flatten(mask_duplicates[mask_duplicates]))
    nremain = len(ak.flatten(mask_lam[mask_duplicates & mask_lam]))

    print(f"{spmode:6s}  {norg:8d}  {nremain:8d}   {100*nremain/norg:.2f}%")

    #print(mass)

    plt.subplot(3, 2, idx+1)
    plt.hist(ak.flatten(mass[mask_duplicates]), bins=100, range=(1.105, 1.125), label=f'{spmode}')
    
    #plt.hist(ak.flatten(mass[mask_event_lam][mask_lam[mask_event_lam]]), bins=100, range=(1.105, 1.125))
    plt.hist(ak.flatten(mass[mask_duplicates & mask_lam]), bins=100, range=(1.105, 1.125))

    plt.gca().axvline(x=lo, color='k', linestyle='--')
    plt.gca().axvline(x=hi, color='k', linestyle='--')
    plt.gca().axvline(x=lammass_world_average, color='c', linestyle='--')

    plt.locator_params(nbins=6)


    plt.legend()
    
    ;

plt.tight_layout();


In [None]:
mask_lam, mask_event_lam = bat.get_lambda0_mask(data[mask_sp], region_definitions, flightlenvar='Lambda0FlightLen')

m = data[mask_sp]['Lambda0_unc_Mass']
m2 = m[mask_lam]

nlambda0 = ak.num(m)
nlambda0_2 = ak.num(m2)

lamfl = data[mask_sp]['Lambda0FlightLen']
lamfl_2 = lamfl[mask_lam]

plt.figure(figsize=(12,8))

plt.subplot(2,2,1)
plt.hist(ak.flatten(m), bins=100)
plt.hist(ak.flatten(m2), bins=100)

plt.subplot(2,2,2)
plt.hist(nlambda0, bins=6, range=(0,6))
plt.hist(nlambda0_2, bins=6, range=(0,6))

plt.subplot(2,2,3)
plt.hist(ak.flatten(lamfl), bins=100, range=(0,6))
plt.hist(ak.flatten(lamfl_2), bins=100, range=(0,6))

;

In [None]:
data['Lambda0FlightLen']

In [None]:
#data[mask_sp]

# Test of nB mask

This function returns both a mask at the event level to select *events* where
there is only 1 B candidate after the B flight-len and mass cut has been made, and it also 
returns that cut for the B's on the flight-len and mass. 

In [None]:
import babar_analysis_tools as bat


mask = bat.get_duplicates_mask(data)



In [None]:
import babar_analysis_tools as bat

spmodes = ['-999', '998', '1005', '1235', '1237', '991']
#spmodes = ['998', '1005', '1235', '1237']

plt.figure(figsize=(12,12))


for idx,spmode in enumerate(spmodes):
    
    mask_sp = data['spmode']==spmode
    
    mask_nB, mask_duplicates= bat.get_duplicates_mask(data[mask_sp])

    #print(mask_nB)
    #data[mask_sp][mask_nB]
    
    nB = data[mask_sp]['nB']

    # This will cut at the event level
    norg = len(nB)
    nremain = len(nB[mask_nB])

    print(f"{spmode:6s}  {norg:8d}  {nremain:8d}   {100*nremain/norg:.2f}%")
    
    # We can also cut to select only B-candidates that pass the cut
    mes_org = data[mask_sp]['B_con_postFitMes']
    mes = data[mask_sp][mask_nB]['B_con_postFitMes'][mask_duplicates[mask_nB]]

    plt.subplot(3, 2, idx+1)
    plt.hist(ak.flatten(mes_org), bins=100, range=(5.2, 5.3), label=f'{spmode} - org')
    plt.hist(ak.flatten(mes), bins=100, range=(5.2, 5.3), label=f'{spmode} - after cuts')
    plt.legend()

plt.tight_layout()


In [None]:
len(mask_nB[mask_nB])

# Testing out PID masks

In [None]:
import babar_analysis_tools as bat


mask_bool_proton, mask_bool_pion, mask_bool_protonB = bat.PID_masks(data, \
              lamp_selector='SuperLooseKMProtonSelection', \
              lampi_selector='VeryTightKMPionMicroSelection', \
              Bp_selector='SuperTightKMProtonSelection', \
              verbosity=0)

spmodes = ['-999', '998', '1005', '1235', '1237', '991']
#spmodes = ['998', '1005', '1235', '1237']

plt.figure(figsize=(12,12))


for idx,spmode in enumerate(spmodes):

    # Make some plots
    
    spmask = (data['spmode']==spmode)
    
    lamfl_mask = (data['Lambda0FlightLen']>0)
    
    lammass = data['Lambda0_unc_Mass']
    mes = data['BpostFitMes']
    
    mask_pid =      mask_bool_proton & mask_bool_pion & mask_bool_protonB
    mask_particle = mask_pid[spmask] & lamfl_mask[spmask] 

    plt.subplot(3, 2, idx+1)

    plt.hist(ak.flatten(lammass[spmask & lamfl_mask]),bins=100, range=(1.105, 1.125),  label=f'{spmode}')
    plt.hist(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]),  bins=100, range=(1.105, 1.125))
    plt.locator_params(nbins=6)
    plt.legend()
    print(len(ak.flatten(lammass[spmask & lamfl_mask])))
    print(len(ak.flatten(lammass[spmask & lamfl_mask & mask_pid])))
    

# Testing the Mes vs DeltaE plot

In [None]:
import babar_analysis_tools as bat

#mask_sp = data['spmode']!='-999'
mask_sp = data['spmode']=='991'

mask_fit_region = bat.get_fit_mask(data[mask_sp], region_definitions)

mes= ak.flatten(data[mask_sp]["BpostFitMes"][mask_fit_region])
DeltaE= ak.flatten(data[mask_sp]["BpostFitDeltaE"][mask_fit_region])

#plt.figure(figsize=(6,4))
bat.plot_mes_vs_DeltaE(mes, DeltaE, draw_signal_region=True, tag=None, region_definitions=region_definitions)#, ax=plt.gca())


In [None]:
import babar_analysis_tools as bat


spmodes = ['-999', '998', '1005', '1235', '1237', '991']
#spmodes = ['998', '1005', '1235', '1237']

#plt.figure(figsize=(12,12))

pps = myPIDselector.PIDselector("p")
mask_no_antiprotons, ct = bat.build_antiproton_antimask(data, pps, "TightKMProtonSelection", verbose=0)

print("Created mask for no antiprotons")

for idx,spmode in enumerate(spmodes):

    # Make some plots
    
    mask_sp = (data['spmode']==spmode)


    mask_bool_proton, mask_bool_pion, mask_bool_protonB = bat.PID_masks(data[mask_sp & ~mask_no_antiprotons], \
                  lamp_selector='SuperLooseKMProtonSelection', \
                  lampi_selector='VeryTightKMPionMicroSelection', \
                  Bp_selector='SuperTightKMProtonSelection', \
                  verbosity=0)

    lamfl_mask = (data[mask_sp & ~mask_no_antiprotons]['Lambda0FlightLen']>0)
    
    mask_pid = mask_bool_proton & mask_bool_pion & mask_bool_protonB

    mask_fit_region = bat.get_fit_mask(data[mask_sp & ~mask_no_antiprotons], region_definitions)

    
    mask_particle = mask_pid & lamfl_mask & mask_fit_region

    
    mes= ak.flatten(data[mask_sp & ~mask_no_antiprotons]["BpostFitMes"][mask_particle])
    DeltaE= ak.flatten(data[mask_sp & ~mask_no_antiprotons]["BpostFitDeltaE"][mask_particle])
    
    #plt.subplot(3, 2, idx+1)
    print(f"{spmode:7d}  {len(mes):8d}  {len(DeltaE):8d}   {len(mask_sp)}   {len(mask_no_antiprotons)}")

    if len(mes)>0:
        #plt.gca()
        plt.figure()
        bat.plot_mes_vs_DeltaE(mes, DeltaE, draw_signal_region=True, tag=None, region_definitions=region_definitions, bins=50)#, ax=plt.gca())
        plt.title(spmode)


    #plt.hist(ak.flatten(lammass[spmask & lamfl_mask]),bins=100, range=(1.105, 1.125),  label=f'{spmode}')
    #plt.hist(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]),  bins=100, range=(1.105, 1.125))
    #plt.locator_params(nbins=6)

    #print(len(ak.flatten(lammass[spmask & lamfl_mask])))
    #print(len(ak.flatten(lammass[spmask & lamfl_mask & mask_pid])))
    

### Collision data

In [None]:
#x = data_collision['nB']
x = data_collision['nLambda0']

#x[x==0]

x[95:98]

In [None]:
#x = data_collision['Bd2Lund']
#x = data_collision['Bp3']
x = data_collision['Lambda0p3']


n = ak.num(x)
x[90:100]#[:,0]
#x[n>0][:,0]

#x[95:98]
n[95:98]

In [None]:
data_collision.fields

In [None]:
def check_and_return_mask_for_miscounting(data):
    
    nB = data['nB']
    Bp3 = data['Bp3']
    
    nBp3 = ak.num(Bp3)
    
    #print(nB, nBp3)
    
    mask_match = nB == nBp3
    
    print(len(nB[mask_match]), len(nB[~mask_match]))

check_and_return_mask_for_miscounting(data)
check_and_return_mask_for_miscounting(data_collision)

In [None]:
import babar_analysis_tools as bat


spmodes = ['0']
#spmodes = ['998', '1005', '1235', '1237']

#plt.figure(figsize=(12,12))

# NOT WORKING FOR COLLISION DATA
# The blinding makes it so that some B fields are empty, which messes up the code. 
# Let's cut these out first

nB = data_collision['nB']
Bp3 = data_collision['Bp3']
nBp3 = ak.num(Bp3)
mask_to_account_for_blinding = nB == nBp3
print(len(nB[mask_to_account_for_blinding]), len(nB[~mask_to_account_for_blinding]))

data_collision = data_collision[mask_to_account_for_blinding]

pps = myPIDselector.PIDselector("p")
mask_no_antiprotons, ct = bat.build_antiproton_antimask(data_collision, pps, "TightKMProtonSelection", IS_MC=False, verbose=1)


for idx,spmode in enumerate(spmodes):

    # Make some plots
    
    mask_sp = (data_collision['spmode']==spmode)


    mask_bool_proton, mask_bool_pion, mask_bool_protonB = bat.PID_masks(data_collision[mask_sp & ~mask_no_antiprotons], \
                  lamp_selector='SuperLooseKMProtonSelection', \
                  lampi_selector='VeryTightKMPionMicroSelection', \
                  Bp_selector='SuperTightKMProtonSelection', \
                  verbosity=0)

    
    lamfl_mask = (data_collision[mask_sp & ~mask_no_antiprotons]['Lambda0FlightLen']>1.25)
    
    #lammass = data[mask_sp]['Lambda0_unc_Mass']
    #mes = data['BpostFitMes']
    
    mask_pid = mask_bool_proton & mask_bool_pion & mask_bool_protonB

    mask_fit_region = bat.get_fit_mask(data_collision[mask_sp & ~mask_no_antiprotons], region_definitions)

    mask_particle = mask_pid & lamfl_mask & mask_fit_region
    
    mes= ak.flatten(data_collision[mask_sp & ~mask_no_antiprotons]["BpostFitMes"][mask_particle])
    DeltaE= ak.flatten(data_collision[mask_sp & ~mask_no_antiprotons]["BpostFitDeltaE"][mask_particle])
    


    #plt.subplot(3, 2, idx+1)
    #print(len(mes), len(DeltaE))
    print(f"{spmode:7d}  {len(mes):8d}  {len(DeltaE):8d}   {len(mask_sp)}   {len(mask_no_antiprotons)}")


    if len(mes)>0:
        #plt.gca()
        plt.figure()
        bat.plot_mes_vs_DeltaE(mes, DeltaE, draw_signal_region=True, tag=None, region_definitions=region_definitions, bins=50)#, ax=plt.gca())
        plt.title(spmode)


    #plt.hist(ak.flatten(lammass[spmask & lamfl_mask]),bins=100, range=(1.105, 1.125),  label=f'{spmode}')
    #plt.hist(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]),  bins=100, range=(1.105, 1.125))
    #plt.locator_params(nbins=6)

    #print(len(ak.flatten(lammass[spmask & lamfl_mask])))
    #print(len(ak.flatten(lammass[spmask & lamfl_mask & mask_pid])))
    