In [None]:
%load_ext autoreload
%autoreload 2

import uproot
import awkward as ak

import matplotlib.pylab as plt
import numpy as np

import time

import pandas as pd

from hist import Hist

import babar_analysis_tools as bat
import myPIDselector

import os 

In [None]:
start = time.time()

# At Siena
topdir = '/mnt/qnap/babar_data/bnv_plambda'

# At home (Bellis)
#topdir = '/home/bellis/babar_data/bnv_plambda'


### Read in SP- includes both signal and background 
#filename = f'{topdir}/Background_SP_modes_Only_Run_1.parquet'
filename = f'{topdir}/Background_and_signal_SP_modes_Only_Run_1.parquet'
#filename = f'{topdir}/Background_and_signal_SP_modes_All_runs.parquet'

### convert files from a parquet file to an awkward array
start = time.time()
ak_sp = ak.from_parquet(filename)
print(f"Took {time.time() - start} s")


# Read in data
#filename = f'{topdir}/Data_Only_Run_1_BLINDED.parquet'
#start = time.time()
#ak_data = ak.from_parquet(filename)
#print(f"Took {time.time() - start} s")


# Selectors

As a reminder, BaBar uses information from multiple detectors to try to determine what type of particle caused a given *track*. This information is fed into one of a variety of *multivariate machine learning* algorithms and the output is used to define different *selectors*

These selectors have different "levels" that give the analyst control over how pure their signal is. Sometimes, you might want to really cut out most of the background at the expense of losing some signal, in which case you would use a **Tight** or **VeryTight** selector. Other times, you might be fine with keeping some background, so long as you keep all or most of your signal, in which case you would use a **Loose** or **VeryLoose** or maybe even **SuperLoose** selector. 

You can see the list of selectors here.

https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Physics/PID/PID_Selector_List

We will be focusing on the Release 24 selectors (R24) which is the software release that processed the data we are working with. For the pions and protons, those are the last six selectors that all have **KM** in their name. 

I have written some code to keep track of all this information. Below I demonstrate how to use it, just to print out the *names* of the selectors. Nothing more. 

Note that we will use the `pps` and `pips` variables we create below, when we apply the selectors. 

In [None]:
# Do this once
# Get these maps first
pps = myPIDselector.PIDselector("p")
pips = myPIDselector.PIDselector("pi")

print(pips.selectors)
print()
print(pps.selectors)
print()

'''
# Print them a bit more cleanly
print(f"Proton selectors -----------------------")
for selector in pps.selectors:
    if selector != ' ':
        print(selector)
print()

print(f"Pion selectors -------------------------")
for selector in pips.selectors:
    if selector != ' ':
        print(selector)


''';


For our analysis, we would like to make sure that our three (3) final state particles are really what we think they are. 

$$B^+ \rightarrow p \Lambda^0$$

$$\Lambda^0 \rightarrow p \pi^-$$

So in our final state we have a proton, coming from the $B$ meson and a pion and a second proton, coming from the $\Lambda^0$ decay. 

Below, I've written a function that takes in:

* an awkward array `ak_arr`
* 3 strings representing the selectors that you would want to apply
    * to the proton coming from the $\Lambda^0$ `lamp_selector`
    * to the pion coming from the $\Lambda^0$ `lampi_selector`
    * to the proton coming from the $B$ `Bp_selector`
* The verbosity level

It returns a mask for each particle, saying whether or not the particle passed that selector. You can then chose how you want to apply these individual selectors or even if you want to apply them all at once. 

In [None]:
# Here is our function to apply the selectors 
# Let's try this for SP (Monte Carlo)

def get_info_for_PID_masks(ak_arr, verbosity=0):

    # Proton and pion information from the Lambda decay
    # These are the index of the proton (d1) and pion (d2) in those lists
    d1idx = ak_arr['Lambda0d1Idx']
    d2idx = ak_arr['Lambda0d2Idx']
    
    d1lund = ak_arr['Lambda0d1Lund']
    d2lund = ak_arr['Lambda0d2Lund']
    
    Bd2idx = ak_arr['Bd2Idx']
    Bd2lund = ak_arr['Bd2Lund']

    if verbosity==1:
        print(d1lund)
        print(d2lund)
        print(Bd2lund)
        print()
        
        print(d1idx)
        print(d2idx)
        print(Bd2idx)
        print()
    
    trkidx_proton = ak_sp['pTrkIdx']
    trk_selector_map_proton = ak_sp['pSelectorsMap']
    
    trkidx_pion = ak_sp['piTrkIdx']
    trk_selector_map_pion = ak_sp['piSelectorsMap']

    indices_and_maps = {}
    indices_and_maps['d1idx'] = d1idx
    indices_and_maps['d2idx'] = d2idx
    indices_and_maps['Bd2idx'] = Bd2idx

    indices_and_maps['trkidx_proton'] = trkidx_proton
    indices_and_maps['trk_selector_map_proton'] = trk_selector_map_proton

    indices_and_maps['trkidx_pion'] = trkidx_pion
    indices_and_maps['trk_selector_map_pion'] = trk_selector_map_pion

    return indices_and_maps


def PID_masks(indices_and_maps, \
              lamp_selector='SuperLooseKMProtonSelection', \
              lampi_selector='SuperLooseKMPionMicroSelection', \
              Bp_selector='SuperLooseKMProtonSelection', \
             verbosity=0):

    d1idx = indices_and_maps['d1idx']
    d2idx = indices_and_maps['d2idx']
    Bd2idx = indices_and_maps['Bd2idx']

    trkidx_proton = indices_and_maps['trkidx_proton']
    trk_selector_map_proton = indices_and_maps['trk_selector_map_proton']

    trkidx_pion = indices_and_maps['trkidx_pion']
    trk_selector_map_pion = indices_and_maps['trk_selector_map_pion']
    
    # Proton
    pbits = bat.calculate_bits_for_PID_selector(trkidx_proton, trk_selector_map_proton, verbose=verbosity)
    # Pion
    pibits = bat.calculate_bits_for_PID_selector(trkidx_pion, trk_selector_map_pion, verbose=verbosity)
    
    
    #selector_proton = 'TightKMProtonSelection'
    #selector_pion = 'TightKMPionMicroSelection'
    #print(f"Now trying to create a mask with {selector_proton}")
    #print(f"Now trying to create a mask with {selector_pion}")
    
    
    mask_bool_proton = bat.mask_PID_selection(pbits[d1idx], lamp_selector, pps)
    mask_bool_protonB = bat.mask_PID_selection(pbits[Bd2idx], Bp_selector, pps)
        
    mask_bool_pion = bat.mask_PID_selection(pibits[d2idx], lampi_selector, pips)

    return mask_bool_proton, mask_bool_pion, mask_bool_protonB




In [None]:
# This runs the above function
# We pass in the SP awkward array
# It returns a boolean mask for each of the 3 final state particles

indices_and_maps = get_info_for_PID_masks(ak_sp, verbosity=0)

mask_bool_proton, mask_bool_pion, mask_bool_protonB = PID_masks(indices_and_maps, \
              lamp_selector='SuperTightKMProtonSelection', \
              lampi_selector='SuperTightKMPionMicroSelection', \
              Bp_selector='LooseKMProtonSelection', \
              verbosity=0)


In [None]:
# New we can use those masks


# Let's look at just a specific SP mode
spmask = (ak_sp['spmode']=='998')

# We will also make a cute on Lambda flight length because we 
# know we will be doing htat
lamfl_mask = (ak_sp['Lambda0FlightLen']>1)

# Use all of the masks uncomment to see (comment the other)
mask_pid =      mask_bool_proton & mask_bool_pion & mask_bool_protonB
# Use two of the masks, uncomment to see
#mask_pid =      mask_bool_proton & mask_bool_pion


# Make some plots
# Get some variables to plot
lammass = ak_sp['Lambda0_unc_Mass']
mes = ak_sp['BpostFitMes']
#'''
# For each plot, we plot the variable with just the SP mode
# and the Lambda0 flight len, then we plot it again with the PID mask asdded

plt.figure(figsize=(12,5));

plt.subplot(1,2,1)
plt.hist(ak.flatten(lammass[spmask & lamfl_mask]),bins=100)
plt.hist(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]),bins=100)

plt.subplot(1,2,2)
plt.hist(ak.flatten(mes[spmask & lamfl_mask]),bins=100, range=(5.2,5.3))
plt.hist(ak.flatten(mes[spmask & lamfl_mask & mask_pid]),bins=100, range=(5.2,5.3))
#''';

# Let's count the number of events surviving the cuts
n_sp_lam_fl = len(ak.flatten(lammass[spmask & lamfl_mask]))
n_sp_lam_fl_PID = len(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]))

print(f"Before PID: {n_sp_lam_fl}")
print(f"After  PID: {n_sp_lam_fl_PID}")

;

# Your challenge

Here's what I would like you to try to do. 

For the signal SP (-999) and the two dominant background samples (1005 and 998), we want to know how much is cut out/retained by some combination of PID cuts. Ideally, we keep 100% of signal and retain 0% of background, but it won't be that perfect. 

We don't know how good is good enough, but let's just play around and see how good we can do. For example, if we kept 80% of signal and eliminated 99.9% of background, I'd be ecstatic! :)

There are 6 selectors for the pion and 6 selectors for the proton. Since we have 2 protons in the final state, there are 216 possible combinations of selectors to try (6 x 6 x 6). 

Loops are your friend! :)

I'd like you to try all 216 combinations for SP modes -999 (signal, 998 (uds), and 1005 (ccbar) and see what fraction is retained between the SP modes+lamba0 flight len cut and SP modes+lamba0 flight len+PID cuts, as demonstrated above. In the cell above, I cut and count, but you'll have do a bit more to keep track of the percent retained/eliminated. 

Should it be a table? A plot? I don't know, but just try it out. You can eyeball it and see how they do. 

Good luck and let me know if you have questions!

*Clarification*. In the above example, I make some plots for diagnotic purposes. You don't want to make a plot for ever single yone of the 216 combinations, but it is helpful to do one or two if you are debungging. 


In [None]:
pips.selectors

In [None]:
# Your work here!
#'''
proton_selectors_org= []
for i in pps.selectors:
  if "KMProton" in i:
      proton_selectors_org.append(i)

pion_selectors_org=[]
for i in pips.selectors:
  if "KMPion" in i:
      pion_selectors_org.append(i)
#'''
### Smaller list of selectors

#proton_selectors= ["SuperLooseKMProtonSelection","LooseKMProtonSelection","VeryTightKMProtonSelection",]
#pion_selectors= ["SuperLooseKMPionMicroSelection","LooseKMPionMicroSelection","VeryTightKMPionMicroSelection",]


In [None]:
print(proton_selectors_org)
print()

print(pion_selectors_org)

In [None]:
# Let's make sure we grab the mass here in the same cell, 
# just in case we ran some other cells. 
lammass = ak_sp['Lambda0_unc_Mass']

proton_selectors= ["SuperLooseKMProtonSelection","LooseKMProtonSelection","VeryTightKMProtonSelection",]
pion_selectors= ["SuperLooseKMPionMicroSelection","LooseKMPionMicroSelection","VeryTightKMPionMicroSelection",]

#proton_selectors = proton_selectors_org
#pion_selectors = pion_selectors_org

# Let's keep track of how many combinations we run over
icomb = 1
#mode_table= {}
bkg_table = {}

# Do this first
indices_and_maps = get_info_for_PID_masks(ak_sp, verbosity=0)


for i in proton_selectors: 
    print("new proton A-------")
    for j in proton_selectors:
        print("new proton B")
        for k in pion_selectors:
            mask_bool_proton, mask_bool_pion, mask_bool_protonB = PID_masks(indices_and_maps, \
                          lamp_selector= i, \
                          lampi_selector= k, \
                          Bp_selector= j, \
                      verbosity=0)
            for mode in ["-999","998","1005"]:
                
                spmask = (ak_sp['spmode']==mode)
                lamfl_mask = (ak_sp['Lambda0FlightLen']>1)
                mask_pid = mask_bool_proton & mask_bool_pion & mask_bool_protonB                
                
                n_sp_lam_fl = len(ak.flatten(lammass[spmask & lamfl_mask]))
                n_sp_lam_fl_PID = len(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]))

                #n_sp_lam_fl = 0
                #n_sp_lam_fl_PID = 0
                
                tag = 'bkg'

                # Check to make sure there are entries so we don't get divide by 0
                frac = 0
                if n_sp_lam_fl!=0:
                    frac = n_sp_lam_fl_PID/n_sp_lam_fl

                #print(f"{icomb}  {i:24s} {j:24s} {k:24s}   {tag} kept--> {n_sp_lam_fl_PID/n_sp_lam_fl:.3f}")
                bkg_table[f"{i}, {j}, {k}   {mode}"]= {f"{mode}": 100*frac}
                #mode_table[f"{i}, {j}, {k} {mode}"]= {f"{mode}":100*n_sp_lam_fl_PID/n_sp_lam_fl}
            icomb += 1

In [None]:
mode_df= pd.DataFrame.from_dict(bkg_table, orient= "index")
mode_df

# Bellis edits

In [None]:
proton_selectors= ["SuperLooseKMProtonSelection","LooseKMProtonSelection", "VeryTightKMProtonSelection",]
pion_selectors= ["SuperLooseKMPionMicroSelection","LooseKMPionMicroSelection", "VeryTightKMPionMicroSelection",]

#proton_selectors = proton_selectors_org
#pion_selectors = pion_selectors_org

# Let's make sure we grab the mass here in the same cell, 
# just in case we ran some other cells. 
lammass = ak_sp['Lambda0_unc_Mass']

mes = ak_sp['BpostFitMes']
de = ak_sp['BpostFitDeltaE']
print(mes)

# Define the dictionary first, which will be used to make the daaframe. 
table = {}
table['icomb'] = []
table['SP mode'] = []
table['Lambda proton selector'] = []
table['Lambda pion selector'] = []
table['B proton selector'] = []
table['# org remaining'] = []
table['# PID remaining'] = []
table['pct remaining'] = []
table['# fit area'] = []
table['# fit area PID'] = []
table['# signal area'] = []
table['# signal area PID'] = []
table['# sideband 1 area'] = []
table['# sideband 1 area PID'] = []
table['# sideband 2 area'] = []
table['# sideband 2 area PID'] = []


# Let's keep track of how many combinations we run over
icomb = 1

# Do this first
indices_and_maps = get_info_for_PID_masks(ak_sp, verbosity=0)

for i in proton_selectors: 
    print(f"new proton A-------  {i}")
    for j in proton_selectors:
        print(f"\tnew proton B -------- {j}")
        for k in pion_selectors:
            mask_bool_proton, mask_bool_pion, mask_bool_protonB = PID_masks(indices_and_maps, \
                          lamp_selector= i, \
                          lampi_selector= k, \
                          Bp_selector= j, \
                      verbosity=0)

            for mode in ["-999","998","1005"]:
                spmask = (ak_sp['spmode']==mode)
                lamfl_mask = (ak_sp['Lambda0FlightLen']>1)
                
                mask_pid = mask_bool_proton & mask_bool_pion & mask_bool_protonB                

                # Signal area and fit area mask
                mes_masked= mes[mask_pid & spmask & lamfl_mask]
                de_masked= de[mask_pid & spmask & lamfl_mask]

                signal_area_mask = (mes>5.27) & ((de>-.07) & (de<.07)) 
                fit_area_mask  = (mes>5.2) & ((de>-.2) & (de<.2)) 

                sideband1_mask = (mes>5.27) & ((de>-.14) & (de<-.07)) 
                sideband2_mask = (mes>5.27) & ((de< .14) & (de> .07)) 
                
                n_sp_lam_fl = len(ak.flatten(lammass[spmask & lamfl_mask]))
                n_sp_lam_fl_PID = len(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]))

                n_fit_area = len(ak.flatten(mes[spmask & lamfl_mask & fit_area_mask]))
                n_fit_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & fit_area_mask & mask_pid]))

                n_sig_area = len(ak.flatten(mes[spmask & lamfl_mask & signal_area_mask]))
                n_sig_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & signal_area_mask & mask_pid]))

                n_sideband1_area = len(ak.flatten(mes[spmask & lamfl_mask & sideband1_mask]))
                n_sideband1_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & sideband1_mask & mask_pid]))

                n_sideband2_area = len(ak.flatten(mes[spmask & lamfl_mask & sideband2_mask]))
                n_sideband2_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & sideband2_mask & mask_pid]))
                
                tag = 'bkg'

                # Check to make sure there are entries so we don't get divide by 0
                frac = 0
                if n_sp_lam_fl!=0:
                    frac = n_sp_lam_fl_PID/n_sp_lam_fl

                table['icomb'].append(icomb)
                table['SP mode'].append(mode)

                # Full names
                #table['Lambda proton selector'].append(i)
                #table['Lambda pion selector'].append(k)
                #table['B proton selector'].append(j)

                # Shortened names
                table['Lambda proton selector'].append(i.split('KM')[0])
                table['Lambda pion selector'].append(k.split('KM')[0])
                table['B proton selector'].append(j.split('KM')[0])
                
                table['# org remaining'].append(n_sp_lam_fl)
                table['# PID remaining'].append(n_sp_lam_fl_PID)
                table['pct remaining'].append(100*frac)

                table['# fit area'].append(n_fit_area)
                table['# fit area PID'].append(n_fit_area_PID)
                table['# signal area'].append(n_sig_area)
                table['# signal area PID'].append(n_sig_area_PID)
                table['# sideband 1 area'].append(n_sideband1_area)
                table['# sideband 1 area PID'].append(n_sideband1_area_PID)
                table['# sideband 2 area'].append(n_sideband2_area)
                table['# sideband 2 area PID'].append(n_sideband2_area_PID)

            icomb += 1

In [None]:
proton_selectors= ["SuperLooseKMProtonSelection","LooseKMProtonSelection", "VeryTightKMProtonSelection",]
pion_selectors= ["SuperLooseKMPionMicroSelection","LooseKMPionMicroSelection", "VeryTightKMPionMicroSelection",]

#proton_selectors = proton_selectors_org
#pion_selectors = pion_selectors_org

# Let's make sure we grab the mass here in the same cell, 
# just in case we ran some other cells. 
lammass = ak_sp['Lambda0_unc_Mass']

mes = ak_sp['BpostFitMes']
de = ak_sp['BpostFitDeltaE']

# Define the dictionary first, which will be used to make the daaframe. 
table = {}
table['icomb'] = []
table['SP mode'] = []
table['Lambda proton selector'] = []
table['Lambda pion selector'] = []
table['B proton selector'] = []

# Let's keep track of how many combinations we run over
icomb = 1

# Do this first
indices_and_maps = get_info_for_PID_masks(ak_sp, verbosity=0)

plt.figure(figsize=(24,24))
print("FOR SP MODE 1005")
for i in proton_selectors: 
    print(f"new proton A-------  {i}")
    for j in proton_selectors:
        print(f"\tnew proton B -------- {j}")
        for k in pion_selectors:
            mask_bool_proton, mask_bool_pion, mask_bool_protonB = PID_masks(indices_and_maps, \
                          lamp_selector= i, \
                          lampi_selector= k, \
                          Bp_selector= j, \
                      verbosity=0)
            for mode in ["1005"]:
                spmask = (ak_sp['spmode']==mode)
                lamfl_mask = (ak_sp['Lambda0FlightLen']>1)
                
                mask_pid = mask_bool_proton & mask_bool_pion & mask_bool_protonB                

                # Signal area and fit area mask
                mes_masked= mes[mask_pid & spmask & lamfl_mask]
                de_masked= de[mask_pid & spmask & lamfl_mask]

                signal_area_mask = (mes>5.27) & ((de>-.07) & (de<.07)) 
                fit_area_mask  = (mes>5.2) & ((de>-.2) & (de<.2)) 

                sideband1_mask = (mes>5.27) & ((de>-.14) & (de<-.07)) 
                sideband2_mask = (mes>5.27) & ((de< .14) & (de> .07)) 
                
                n_sp_lam_fl = len(ak.flatten(lammass[spmask & lamfl_mask]))
                n_sp_lam_fl_PID = len(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]))

                n_fit_area = len(ak.flatten(mes[spmask & lamfl_mask & fit_area_mask]))
                n_fit_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & fit_area_mask & mask_pid]))

                n_sig_area = len(ak.flatten(mes[spmask & lamfl_mask & signal_area_mask]))
                n_sig_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & signal_area_mask & mask_pid]))

                n_sideband1_area = len(ak.flatten(mes[spmask & lamfl_mask & sideband1_mask]))
                n_sideband1_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & sideband1_mask & mask_pid]))

                n_sideband2_area = len(ak.flatten(mes[spmask & lamfl_mask & sideband2_mask]))
                n_sideband2_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & sideband2_mask & mask_pid]))
                
                tag = 'bkg'

                # Check to make sure there are entries so we don't get divide by 0
                frac = 0
                if n_sp_lam_fl!=0:
                    frac = n_sp_lam_fl_PID/n_sp_lam_fl

                table['icomb'].append(icomb)
                table['SP mode'].append(mode)

                # Full names
                #table['Lambda proton selector'].append(i)
                #table['Lambda pion selector'].append(k)
                #table['B proton selector'].append(j)

                # Shortened names
                table['Lambda proton selector'].append(i.split('KM')[0])
                table['Lambda pion selector'].append(k.split('KM')[0])
                table['B proton selector'].append(j.split('KM')[0])

                title1= table['Lambda proton selector'][-1]
                title2= table['Lambda pion selector'][-1]
                title3= table['B proton selector'][-1]
                
                
                plt.subplot(6,5,icomb)
                plt.plot(ak.flatten(mes_masked),ak.flatten(de_masked),"ko", alpha= 0.4, markersize= 1)
                plt.plot([5.27, 5.3, 5.3, 5.27, 5.27],[.07,.07,-.07,-.07,.07], "r-", markersize=3, label= "Signal area?")
                plt.plot([5.27, 5.3, 5.3, 5.27, 5.27],[.14,.14,.07,.07,.14], "b-", markersize=2, label= "Sideband 1? ")
                plt.plot([5.27, 5.3, 5.3, 5.27, 5.27],[-.07,-.07,-.14,-.14,-.07], "c-", markersize=2, label= "Sideband 2? ")
                plt.ylim(-.2,.2)
                plt.xlim(5.2,5.3)
                plt.title(f"{title1, title2, title3}")
                plt.xlabel("$M_{ES}$")
                plt.ylabel("$\Delta E$")
            
            icomb += 1

In [None]:
proton_selectors= ["SuperLooseKMProtonSelection","LooseKMProtonSelection", "VeryTightKMProtonSelection",]
pion_selectors= ["SuperLooseKMPionMicroSelection","LooseKMPionMicroSelection", "VeryTightKMPionMicroSelection",]

#proton_selectors = proton_selectors_org
#pion_selectors = pion_selectors_org

# Let's make sure we grab the mass here in the same cell, 
# just in case we ran some other cells. 
lammass = ak_sp['Lambda0_unc_Mass']

mes = ak_sp['BpostFitMes']
de = ak_sp['BpostFitDeltaE']

# Define the dictionary first, which will be used to make the daaframe. 
table = {}
table['icomb'] = []
table['SP mode'] = []
table['Lambda proton selector'] = []
table['Lambda pion selector'] = []
table['B proton selector'] = []



# Let's keep track of how many combinations we run over
icomb = 1
print("FOR SP MODE 998")

# Do this first
indices_and_maps = get_info_for_PID_masks(ak_sp, verbosity=0)

plt.figure(figsize=(24,24))

for i in proton_selectors: 
    print(f"new proton A-------  {i}")
    for j in proton_selectors:
        print(f"\tnew proton B -------- {j}")
        for k in pion_selectors:
            mask_bool_proton, mask_bool_pion, mask_bool_protonB = PID_masks(indices_and_maps, \
                          lamp_selector= i, \
                          lampi_selector= k, \
                          Bp_selector= j, \
                      verbosity=0)
            for mode in ["998",]:
                spmask = (ak_sp['spmode']==mode)
                lamfl_mask = (ak_sp['Lambda0FlightLen']>1)
                
                mask_pid = mask_bool_proton & mask_bool_pion & mask_bool_protonB                

                # Signal area and fit area mask
                mes_masked= mes[mask_pid & spmask & lamfl_mask]
                de_masked= de[mask_pid & spmask & lamfl_mask]

                signal_area_mask = (mes>5.27) & ((de>-.07) & (de<.07)) 
                fit_area_mask  = (mes>5.2) & ((de>-.2) & (de<.2)) 

                sideband1_mask = (mes>5.27) & ((de>-.14) & (de<-.07)) 
                sideband2_mask = (mes>5.27) & ((de< .14) & (de> .07)) 
                
                n_sp_lam_fl = len(ak.flatten(lammass[spmask & lamfl_mask]))
                n_sp_lam_fl_PID = len(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]))

                n_fit_area = len(ak.flatten(mes[spmask & lamfl_mask & fit_area_mask]))
                n_fit_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & fit_area_mask & mask_pid]))

                n_sig_area = len(ak.flatten(mes[spmask & lamfl_mask & signal_area_mask]))
                n_sig_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & signal_area_mask & mask_pid]))

                n_sideband1_area = len(ak.flatten(mes[spmask & lamfl_mask & sideband1_mask]))
                n_sideband1_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & sideband1_mask & mask_pid]))

                n_sideband2_area = len(ak.flatten(mes[spmask & lamfl_mask & sideband2_mask]))
                n_sideband2_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & sideband2_mask & mask_pid]))
                
                tag = 'bkg'

                # Check to make sure there are entries so we don't get divide by 0
                frac = 0
                if n_sp_lam_fl!=0:
                    frac = n_sp_lam_fl_PID/n_sp_lam_fl

                table['icomb'].append(icomb)
                table['SP mode'].append(mode)

                # Full names
                #table['Lambda proton selector'].append(i)
                #table['Lambda pion selector'].append(k)
                #table['B proton selector'].append(j)

                # Shortened names
                table['Lambda proton selector'].append(i.split('KM')[0])
                table['Lambda pion selector'].append(k.split('KM')[0])
                table['B proton selector'].append(j.split('KM')[0])

                title1= table['Lambda proton selector'][-1]
                title2= table['Lambda pion selector'][-1]
                title3= table['B proton selector'][-1]
                
                
                plt.subplot(6,5,icomb)
                plt.plot(ak.flatten(mes_masked),ak.flatten(de_masked),"ko", alpha= 0.4, markersize= 1)
                plt.plot([5.27, 5.3, 5.3, 5.27, 5.27],[.07,.07,-.07,-.07,.07], "r-", markersize=3, label= "Signal area?")
                plt.plot([5.27, 5.3, 5.3, 5.27, 5.27],[.14,.14,.07,.07,.14], "b-", markersize=2, label= "Sideband 1? ")
                plt.plot([5.27, 5.3, 5.3, 5.27, 5.27],[-.07,-.07,-.14,-.14,-.07], "c-", markersize=2, label= "Sideband 2? ")
                
                plt.ylim(-.2,.2)
                plt.xlim(5.2,5.3)
                plt.title(f"{title1, title2, title3}")
                plt.xlabel("$M_{ES}$")
                plt.ylabel("$\Delta E$")
                plt.legend(loc= "upper left")
            icomb += 1

In [None]:
proton_selectors= ["SuperLooseKMProtonSelection","LooseKMProtonSelection", "VeryTightKMProtonSelection",]
pion_selectors= ["SuperLooseKMPionMicroSelection","LooseKMPionMicroSelection", "VeryTightKMPionMicroSelection",]

#proton_selectors = proton_selectors_org
#pion_selectors = pion_selectors_org

# Let's make sure we grab the mass here in the same cell, 
# just in case we ran some other cells. 
lammass = ak_sp['Lambda0_unc_Mass']

mes = ak_sp['BpostFitMes']
de = ak_sp['BpostFitDeltaE']

# Define the dictionary first, which will be used to make the daaframe. 
table = {}
table['icomb'] = []
table['SP mode'] = []
table['Lambda proton selector'] = []
table['Lambda pion selector'] = []
table['B proton selector'] = []



# Let's keep track of how many combinations we run over
icomb = 1

# Do this first
indices_and_maps = get_info_for_PID_masks(ak_sp, verbosity=0)
plt.figure(figsize=(24,24))
print("FOR SP MODE -999")

for i in proton_selectors: 
    print(f"new proton A-------  {i}")
    for j in proton_selectors:
        print(f"\tnew proton B -------- {j}")
        for k in pion_selectors:
            mask_bool_proton, mask_bool_pion, mask_bool_protonB = PID_masks(indices_and_maps, \
                          lamp_selector= i, \
                          lampi_selector= k, \
                          Bp_selector= j, \
                      verbosity=0)
            for mode in ["-999"]:
                spmask = (ak_sp['spmode']==mode)
                lamfl_mask = (ak_sp['Lambda0FlightLen']>1)
                
                mask_pid = mask_bool_proton & mask_bool_pion & mask_bool_protonB                

                # Signal area and fit area mask
                mes_masked= mes[mask_pid & spmask & lamfl_mask]
                de_masked= de[mask_pid & spmask & lamfl_mask]

                signal_area_mask = (mes>5.27) & ((de>-.07) & (de<.07)) 
                fit_area_mask  = (mes>5.2) & ((de>-.2) & (de<.2)) 

                sideband1_mask = (mes>5.27) & ((de>-.14) & (de<-.07)) 
                sideband2_mask = (mes>5.27) & ((de< .14) & (de> .07)) 
                
                n_sp_lam_fl = len(ak.flatten(lammass[spmask & lamfl_mask]))
                n_sp_lam_fl_PID = len(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]))

                n_fit_area = len(ak.flatten(mes[spmask & lamfl_mask & fit_area_mask]))
                n_fit_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & fit_area_mask & mask_pid]))

                n_sig_area = len(ak.flatten(mes[spmask & lamfl_mask & signal_area_mask]))
                n_sig_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & signal_area_mask & mask_pid]))

                n_sideband1_area = len(ak.flatten(mes[spmask & lamfl_mask & sideband1_mask]))
                n_sideband1_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & sideband1_mask & mask_pid]))

                n_sideband2_area = len(ak.flatten(mes[spmask & lamfl_mask & sideband2_mask]))
                n_sideband2_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & sideband2_mask & mask_pid]))
                
                tag = 'bkg'

                # Check to make sure there are entries so we don't get divide by 0
                frac = 0
                if n_sp_lam_fl!=0:
                    frac = n_sp_lam_fl_PID/n_sp_lam_fl

                table['icomb'].append(icomb)
                table['SP mode'].append(mode)

                # Full names
                #table['Lambda proton selector'].append(i)
                #table['Lambda pion selector'].append(k)
                #table['B proton selector'].append(j)

                # Shortened names
                table['Lambda proton selector'].append(i.split('KM')[0])
                table['Lambda pion selector'].append(k.split('KM')[0])
                table['B proton selector'].append(j.split('KM')[0])

                title1= table['Lambda proton selector'][-1]
                title2= table['Lambda pion selector'][-1]
                title3= table['B proton selector'][-1]
                
                plt.subplot(6,5,icomb)
                plt.plot(ak.flatten(mes_masked),ak.flatten(de_masked),"ko", alpha= 0.1, markersize= 1)
                plt.plot([5.27, 5.3, 5.3, 5.27, 5.27],[.07,.07,-.07,-.07,.07], "r-", markersize=3, label= "Signal area?")
                plt.plot([5.27, 5.3, 5.3, 5.27, 5.27],[.14,.14,.07,.07,.14], "b-", markersize=2, label= "Sideband 1? ")
                plt.plot([5.27, 5.3, 5.3, 5.27, 5.27],[-.07,-.07,-.14,-.14,-.07], "c-", markersize=2, label= "Sideband 2? ")
                plt.ylim(-.2,.2)
                plt.xlim(5.2,5.3)
                plt.title(f"{title1, title2, title3}")
                plt.xlabel("$M_{ES}$") 
                plt.ylabel("$\Delta E$")
            
            icomb += 1

In [None]:
df_cuts = pd.DataFrame.from_dict(table)
df_cuts

In [None]:
# Display all the rows
# Convert entire data frame as string and print
#print(df_cuts.to_string())

# This can be messy and not terribly well-formatted though

In [None]:
# Look at just a subset of the dataframe
filter = df_cuts['SP mode']=='1005'
df_cuts[filter]

In [None]:
# Look at just a subset of the dataframe
filter = df_cuts['SP mode']=='998'
df_cuts[filter]

In [None]:
# Look at just a subset of the dataframe
filter = df_cuts['SP mode']=='-999'
df_cuts[filter]

### I think this will give us what we want

In [None]:
#proton_selectors= ["SuperLooseKMProtonSelection","LooseKMProtonSelection", "VeryTightKMProtonSelection",]
#pion_selectors= ["SuperLooseKMPionMicroSelection","LooseKMPionMicroSelection", "VeryTightKMPionMicroSelection",]




proton_selectors = proton_selectors_org
pion_selectors = pion_selectors_org

# Let's make sure we grab the mass here in the same cell, 
# just in case we ran some other cells. 
lammass = ak_sp['Lambda0_unc_Mass']

mes = ak_sp['BpostFitMes']
de = ak_sp['BpostFitDeltaE']
print(mes)

# Define the dictionary first, which will be used to make the daaframe. 
table = {}
table['icomb'] = []
table['SP mode'] = []
table['Lambda proton selector'] = []
table['Lambda pion selector'] = []
table['B proton selector'] = []
table['# org remaining'] = []
table['# PID remaining'] = []
table['pct remaining'] = []
table['# fit area'] = []
table['# fit area PID'] = []
table['# signal area'] = []
table['# signal area PID'] = []
table['# sideband 1 area'] = []
table['# sideband 1 area PID'] = []
table['# sideband 2 area'] = []
table['# sideband 2 area PID'] = []


# Let's keep track of how many combinations we run over
icomb = 1

# Do this first
indices_and_maps = get_info_for_PID_masks(ak_sp, verbosity=0)

for i in proton_selectors: 
    print(f"new proton A-------  {i}")
    for j in proton_selectors:
        print(f"\tnew proton B -------- {j}")
        for k in pion_selectors:
            mask_bool_proton, mask_bool_pion, mask_bool_protonB = PID_masks(indices_and_maps, \
                          lamp_selector= i, \
                          lampi_selector= k, \
                          Bp_selector= j, \
                      verbosity=0)

            for mode in ["-999","998","1005"]:
                spmask = (ak_sp['spmode']==mode)
                lamfl_mask = (ak_sp['Lambda0FlightLen']>1)
                
                mask_pid = mask_bool_proton & mask_bool_pion & mask_bool_protonB                

                # Signal area and fit area mask
                mes_masked= mes[mask_pid & spmask & lamfl_mask]
                de_masked= de[mask_pid & spmask & lamfl_mask]

                signal_area_mask = (mes>5.27) & ((de>-.07) & (de<.07)) 
                fit_area_mask  = (mes>5.2) & ((de>-.2) & (de<.2)) 

                sideband1_mask = (mes>5.27) & ((de>-.14) & (de<-.07)) 
                sideband2_mask = (mes>5.27) & ((de< .14) & (de> .07)) 
                
                n_sp_lam_fl = len(ak.flatten(lammass[spmask & lamfl_mask]))
                n_sp_lam_fl_PID = len(ak.flatten(lammass[spmask & lamfl_mask & mask_pid]))

                n_fit_area = len(ak.flatten(mes[spmask & lamfl_mask & fit_area_mask]))
                n_fit_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & fit_area_mask & mask_pid]))

                n_sig_area = len(ak.flatten(mes[spmask & lamfl_mask & signal_area_mask]))
                n_sig_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & signal_area_mask & mask_pid]))

                n_sideband1_area = len(ak.flatten(mes[spmask & lamfl_mask & sideband1_mask]))
                n_sideband1_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & sideband1_mask & mask_pid]))

                n_sideband2_area = len(ak.flatten(mes[spmask & lamfl_mask & sideband2_mask]))
                n_sideband2_area_PID = len(ak.flatten(mes[spmask & lamfl_mask & sideband2_mask & mask_pid]))
                
                tag = 'bkg'

                # Check to make sure there are entries so we don't get divide by 0
                frac = 0
                if n_sp_lam_fl!=0:
                    frac = n_sp_lam_fl_PID/n_sp_lam_fl

                table['icomb'].append(icomb)
                table['SP mode'].append(mode)

                # Full names
                #table['Lambda proton selector'].append(i)
                #table['Lambda pion selector'].append(k)
                #table['B proton selector'].append(j)

                # Shortened names
                table['Lambda proton selector'].append(i.split('KM')[0])
                table['Lambda pion selector'].append(k.split('KM')[0])
                table['B proton selector'].append(j.split('KM')[0])
                
                table['# org remaining'].append(n_sp_lam_fl)
                table['# PID remaining'].append(n_sp_lam_fl_PID)
                table['pct remaining'].append(100*frac)

                table['# fit area'].append(n_fit_area)
                table['# fit area PID'].append(n_fit_area_PID)
                table['# signal area'].append(n_sig_area)
                table['# signal area PID'].append(n_sig_area_PID)
                table['# sideband 1 area'].append(n_sideband1_area)
                table['# sideband 1 area PID'].append(n_sideband1_area_PID)
                table['# sideband 2 area'].append(n_sideband2_area)
                table['# sideband 2 area PID'].append(n_sideband2_area_PID)

            icomb += 1

In [None]:
df_cuts = pd.DataFrame.from_dict(table)
df_cuts

In [None]:
grouped = df_cuts.groupby('SP mode')
pct998 = grouped.get_group('998')[['pct remaining']]
pct1005 = grouped.get_group('1005')[['pct remaining']]
pct999 = grouped.get_group('-999')[['pct remaining']]

# Get one of the groupings, it could be any
df_grp = grouped.get_group('1005')
df_grp = df_grp.drop(['# org remaining', '# PID remaining', 'pct remaining', 'SP mode'], axis=1)

# We use .values here, otherwise it tries to maktch up the indices within the dataframe
# and then it doesn't line up and we get NaNs
df_grp['pct 998'] = pct998.values
df_grp['pct 1005'] = pct1005.values
df_grp['pct -999'] = pct999.values

df_grp

In [None]:
df_grp.plot.scatter(x='icomb', y='pct -999')

plt.ylim(0,100)

In [None]:
filter998 = df_cuts['SP mode']=='998'
filter1005 = df_cuts['SP mode']=='1005'
filter999 = df_cuts['SP mode']=='-999'

icomb = df_cuts[filter998]['icomb'].values
lamprot = df_cuts[filter998]['Lambda proton selector'].values
lampion = df_cuts[filter998]['Lambda pion selector'].values
Bprot = df_cuts[filter998]['B proton selector'].values


bkg998 = df_cuts[filter998]['# signal area PID'].values*10*(0.3)
bkg1005 = df_cuts[filter1005]['# signal area PID'].values*10*(0.25)

sigorg = df_cuts[filter999]['# signal area'].values
sigPID = df_cuts[filter999]['# signal area PID'].values

eff = sigPID/sigorg

# Punzi figure of merit
a = 4
fom = eff/np.sqrt(bkg998 + bkg1005 + (a/2))

plt.figure(figsize=(18,9))
plt.subplot(3,1,1)
plt.plot(icomb, fom, 'o')
plt.ylabel('FOM')

plt.subplot(3,1,2)
plt.plot(icomb, eff, 'o')
plt.ylim(0,1)
plt.ylabel('Signal efficiency')

plt.subplot(3,1,3)
plt.plot(icomb, bkg998 + bkg1005, 'o')
plt.ylim(0)
plt.ylabel('# of bkg in signal region')

#print(eff)
#bkg1005
;

idx = fom.tolist().index(max(fom))
idx

print(f"Max fom: {max(fom)}")
print(f"Max fom: {fom[idx]}")
print(f"Eff: {eff[idx]}")
print(f"Bkg: {bkg998[idx] + bkg1005[idx]}")
print(f"Lambda proton: {lamprot[idx]}")
print(f"Lambda pion:   {lampion[idx]}")
print(f"B proton:      {Bprot[idx]}")



In [None]:
df_grp

In [None]:
nsigarea= df_grp["# signal area PID"]
nsideband1= df_grp["# sideband 1 area PID"]
nsideband2= df_grp["# sideband 2 area PID"]
sidetot= nsideband1+nsideband2

leftover= nsigarea- sidetot

In [None]:
plt.plot(df_grp["icomb"], leftover, "o")
plt.xlabel("icomb")
plt.ylabel("sig area (PID) - $\Sigma$ sidebands (PID)")