In [None]:
%load_ext autoreload
%autoreload 2
    
import uproot
import awkward as ak

import matplotlib.pylab as plt
import numpy as np

import time

from hist import Hist

import babar_analysis_tools as bat

import pandas as pd

from analysis_variables import *

import os

In [None]:
start = time.time()

# At Siena
#topdir = '/mnt/qnap/babar_data/bnv_plambda'
# At Siena on villa-straylight
#topdir = '/home/bellis/'


# At Bellis' home
topdir = '/home/bellis/babar_data/bnv_plambda'

# On Bellis' laptop
#topdir = './'

#filename = f'{topdir}/Background_SP_modes_Only_Run_1.parquet'
filename = f'{topdir}/Background_and_signal_SP_modes_Only_Run_1.parquet'

data = ak.from_parquet(filename)

print(f"Took {time.time() - start} s")


In [None]:
#idx = data['Bd2Idx']

#data['pTrkIdx'][0]

#data['TRKp3CM']

# Producing tables for the skims statistics

In [None]:
df = bat.read_in_dataset_statistics()
df

In [None]:
dfspinfo = bat.get_SP_cross_sections_and_labels()

dfspinfo

In [None]:
mask = dfspinfo['SP Mode']==1235
bbbar_xsec = dfspinfo[mask]['Cross section [nb]'].values[0]

mask = dfspinfo['SP Mode']==1237
bbbar_xsec += dfspinfo[mask]['Cross section [nb]'].values[0]

print(f"The total BBbar cross section is {bbbar_xsec} nb")

In [None]:
mask = df['Data or MC']=='Data'
df[mask]

In [None]:
spmodes = df['SP mode'].unique()

spmodes

In [None]:
for spmode in spmodes:
    mask = df['SP mode']==spmode
    #df[mask]
    mask_skim = df['Skim'] != 'LambdaVeryVeryLoose'
    nevents = df[mask]['# of events (Data or MC)'][mask_skim].sum()

    mask_skim = df['Skim'] == 'LambdaVeryVeryLoose'
    nevents_skim = df[mask]['# of events (Data or MC)'][mask_skim].sum()
    
    print(f'{spmode:8d}  {nevents}  {nevents_skim}  {100*nevents_skim/nevents}')

In [None]:
mask_skim = df['Skim'] != 'LambdaVeryVeryLoose'
df[mask]['# of events (Data or MC)'][mask_skim].sum()

In [None]:
mask = (df['Data or MC']=='Data') & (df['Skim']=='LambdaVeryVeryLoose')
dftmp = df[mask][['Run', 'Luminosity (Data only) 1/pb','# of events (Data or MC)', '# of events (Data or MC) NOT SURE WHICH NUMBER TO USE']]

dftmp['# of BBbar pairs'] = dftmp['Luminosity (Data only) 1/pb']*bbbar_xsec*1000

dftmp

In [None]:

#dftmp.style.concat(df.agg(['sum']).style)

dftmp['Run'] = dftmp['Run'].astype(int).astype(str)
dftmp.loc['Total'] = dftmp.sum(numeric_only=True)

#dftmp.iloc['Total']['Run'] = 'Total'

dftmp

In [None]:
dftmp.at['Total','Run'] = 'Total'

In [None]:
header = []
header.append('Run')
header.append('Luminosity (1/pb)')
header.append('\# skimmed events')
header.append('\# org. events')
header.append('\# BB pairs')

caption = "Details of the numbers of events and luminosity from the {\\tt LambdaVeryVeryLoose} skim used in this analysis."
label = 'tab:dataskims'

df.style.to_latex(position_float='centering')

output = dftmp.to_latex(index=False, header=header, float_format="%.1f", caption=caption, label=label)

# Add in centering by replacing the first EOL with "EOL + \centering + EOL"
output = output.replace('\n','\n\centering\n', 1)

# Add an hline learn the bottom above the total
output = output.replace('Total','\hline\nTotal', 1)



#styler = dftmp.style
#output = styler.to_latex(caption=caption, label=label)

print(output)

# Write it out
current_dir= os.getcwd()
print(f"Writing to {current_dir}")
directory = "tables"
path= os.path.join(current_dir,directory)
if os.path.isdir(path)== False:
    os.mkdir(path)

print(f"Writing to {directory}")

outfilename = current_dir+"/"+directory+"/table_data_skim_statistics.tex"
outfile = open(outfilename,'w+')
outfile.write(output)
outfile.close()


# Flight length

## Function

In [None]:
savedir = './'

def study_flight_length_effect_on_lambda0_purity(data, spmode='-999', tag='DEFAULT'):
    
    mask = data['spmode'] == spmode

    # Pull out all the data we might need
    bvtx = data[mask]['B_con_Vtxx']
    bvty = data[mask]['B_con_Vtxy']
    bvtz = data[mask]['B_con_Vtxz']
    
    lamvtx = data[mask]['Lambda0Vtxx']
    lamvty = data[mask]['Lambda0Vtxy']
    lamvtz = data[mask]['Lambda0Vtxz']
    
    lamconfl =    data[mask]['Lambda0postFitFlight']
    lamconflsig = data[mask]['Lambda0postFitFlightSignificance']
    
    bconfl =    data[mask]['BpostFitFlight']
    bconflsig = data[mask]['BpostFitFlightSignificance']
    
    blamfldx = bvtx - lamvtx
    blamfldy = bvty - lamvty
    
    blamfltrans2 = blamfldx**2 + blamfldx**2
    blamfltrans = np.sqrt(blamfltrans2)
    
    lamfltrans2 = lamvtx**2 + lamvtx**2
    lamfltrans = np.sqrt(lamfltrans2)

    lamuncmass = data[mask]['Lambda0_unc_Mass']

    # Draw some plots of the original distributions
    plt.figure()
    plt.hist(ak.flatten(lamuncmass),bins=100)
    plt.xlabel(f'$\Lambda^0$ mass (GeV/c$^2$)', fontsize=18)
    outfile = f'{savedir}/STUDY_flight_len_effect_on_Lambda_purity_Lambda_unc_mass_{spmode}.png'
    plt.savefig(outfile)

    # Flight length variables

    plt.figure()    
    plt.hist(ak.flatten(lamconfl),bins=100, range=(0,60))
    plt.xlabel(f'$\Lambda^0$ flight length (cm)')

    outfile = f'{savedir}/STUDY_flight_len_effect_on_Lambda_purity_Lambda_flightlen_{spmode}.png'
    plt.savefig(outfile)

    
    plt.figure(figsize=(8,8))
    
    plt.subplot(2,2,1)
    plt.hist(ak.flatten(lamconfl),bins=100, range=(0,60))
    plt.xlabel(f'$\Lambda^0$ flight length (cm)')
    
    plt.subplot(2,2,2)
    plt.hist(ak.flatten(lamconflsig),bins=100, range=(0,300))
    plt.xlabel(f'$\Lambda^0$ flight length significance')
    
    plt.subplot(2,2,3)
    plt.hist(ak.flatten(bconfl),bins=100, range=(0,.2))
    plt.xlabel(f'$B$ flight length (cm)')
    
    plt.subplot(2,2,4)
    plt.hist(ak.flatten(bconflsig),bins=100, range=(0,4))
    plt.xlabel(f'$B$ flight length significance')
    
    # Transverse len
    plt.figure(figsize=(12,4))
    
    plt.subplot(1,3,1)
    plt.hist(ak.flatten(lamconfl),bins=100, range=(0,30))
    
    plt.subplot(1,3,2)
    plt.hist(ak.flatten(blamfltrans),bins=100, range=(0,5))
    
    plt.subplot(1,3,3)
    plt.hist(ak.flatten(lamfltrans),bins=100, range=(0,5))

    #return 0
    
    ####### Do the study with many cuts
    nrows = 4
    ncols=5
    
    fig1,axes1 = plt.subplots(figsize=(12,8),nrows=nrows, ncols=ncols, sharex=True, sharey=True)
    fig2,axes2 = plt.subplots(figsize=(12,8),nrows=nrows, ncols=ncols, sharex=True, sharey=True,)

    #fig, axes = plt.subplots(ncols=2, figsize=(8,4))

    #print(fig1)
    #print(axes1)
    
    
    peaks = []
    allentries = []
    sigs = []
    bkgs = []
    pcts = []
    cuts = []

    # Define the mass cuts around the lambda
    lammass_world_average = 1.115683
    width = 0.003 # GeV/c^2
    
    lo = lammass_world_average - width
    hi = lammass_world_average + width
    
    for i in range(0,20):

        # Cut on our flight-length variable
        cut = i*0.1
    
        #mask_fl = blamfltrans > cut
        #mask_fl = lamconfl > cut
        
        cutvariable = blamfltrans
        #cutvariable = lamconfl

        mask_fl = cutvariable>cut
        
        #mask = blamfltrans>1.0
        #mask = lamconfl>1.0
        #mask = lamconflsig > 1.0
        
        m = lamuncmass
        mask = (m>lo) & (m<hi) & mask_fl
        
        mask_lo_sideband = (m<=lo) & (m>(lo-2*width)) & mask_fl
        mask_hi_sideband = (m>=hi) & (m<(hi+2*width)) & mask_fl
    
        nall = len(ak.flatten(m))
        npeak = len(ak.flatten(m[mask]))
        nbkglo = len(ak.flatten(m[mask_lo_sideband]))
        nbkghi = len(ak.flatten(m[mask_hi_sideband]))
        nbkg = (nbkglo + nbkghi)/2.0
        nsig = npeak - nbkg
                     
        print(f"nall: {nall}     npeak: {npeak}   nbkg: {nbkg}     nsig: {nsig}   {nbkglo}   {nbkghi}")
        peaks.append(npeak)
        sigs.append(nsig)
        bkgs.append(nbkg)
        cuts.append(cut)
        allentries.append(nall)

        # For the different subplots
        row = int(i/ncols)
        col = i%ncols
        
        plt.sca(axes1[row][col])
        
        plt.hist(ak.flatten(m),      bins=100,range=(1.105, 1.125))
        plt.hist(ak.flatten(m[mask]),bins=100,range=(1.105, 1.125), label=f'flcut>{cut:.1f}')
        plt.hist(ak.flatten(m[mask_lo_sideband]),bins=100,range=(1.105, 1.125), color='yellow')
        plt.hist(ak.flatten(m[mask_hi_sideband]),bins=100,range=(1.105, 1.125), color='yellow')
        plt.legend()
        
        #fig2.add_subplot(3,4,i+1)
        plt.sca(axes2[row][col])
        if row == nrows-1:
            axes1[row][col].set_xlabel(r'$\Lambda^0$ mass [GeV/c$^2$]')
            axes2[row][col].set_xlabel(r'$\Lambda^0$ flight len [cm]')
    
        plt.hist(ak.flatten(cutvariable),         bins=100,range=(0.0, 30))
        plt.hist(ak.flatten(cutvariable[mask_fl]),bins=100,range=(0.0, 30))

    fig1.subplots_adjust(wspace=0, hspace=0)#left=0.1, right=0.1, bottom=0.1, top=0.1, wspace=0.1, hspace=0.1)#wspace=0, hspace=0)
    fig2.subplots_adjust(wspace=0, hspace=0)#left=0.1, right=0.1, bottom=0.1, top=0.1, wspace=0.1, hspace=0.1)#wspace=0, hspace=0)

    #fig1.subplots_adjust(0,0,1,1,0,0)
    #fig2.subplots_adjust(0,0,1,1,0,0)

    fig1.tight_layout()
    fig2.tight_layout()
    
    sigs = np.array(sigs)
    bkgs = np.array(bkgs)
    peaks = np.array(peaks)
    
    pcts = sigs/sigs[0]
    bkg_under_peak = peaks - sigs
    pct_bkg_under_peak = bkg_under_peak / bkg_under_peak[0]

    outfile = f'{savedir}/STUDY_flight_len_effect_on_Lambda_purity_Lambda_masses_with_cuts_{spmode}.png'
    fig1.savefig(outfile)
    outfile = f'{savedir}/STUDY_flight_len_effect_on_Lambda_purity_Lambda_flightlens_with_cuts_{spmode}.png'
    fig2.savefig(outfile)


    
    print()
    print(f"Cutting around the peak alone reduces the number of signal to {100*peaks[0]/nall:.2f} %")

    ############################################################
    # Summarize the values
    
    plt.figure(figsize=(12, 8))
    plt.subplot(3,2,1)
    plt.plot(cuts, peaks,'o')
    plt.ylabel('# under peak')

    
    plt.subplot(3,2,2)
    plt.plot(cuts, sigs,'o')
    plt.ylabel('# of signal in peak (# in peak - # est background')

    plt.subplot(3,2,3)
    plt.plot(cuts, bkgs,'o')
    plt.ylabel('# est background')
    
    plt.subplot(3,2,4)
    plt.plot(cuts, pcts,'o')
    plt.ylim(0.7,1.1)
    plt.ylabel('% sig remaining')
    
    plt.subplot(3,2,5)
    plt.plot(cuts, pct_bkg_under_peak,'o')
    #plt.ylim(0.7,1.1)
    plt.ylabel('% bkg under peak')
    
    # Naive significance
    # Multiply by 10 if we are only doing Run 1
    plt.subplot(3,2,6)
    plt.plot(cuts, 10*sigs/np.sqrt(10*bkg_under_peak),'o')
    plt.ylabel(r'Naive significance ($S/\sqrt{ B }$)')

    outfile = f'{savedir}/STUDY_flight_len_effect_on_Lambda_purity_summaries_{spmode}.png'
    plt.savefig(outfile)


    return 0


In [None]:
retval = study_flight_length_effect_on_lambda0_purity(data, spmode='998')


In [None]:
retval = study_flight_length_effect_on_lambda0_purity(data, spmode='1005')


In [None]:
retval = study_flight_length_effect_on_lambda0_purity(data, spmode='-999')

In [None]:
#fig1.tight_layout()

## Testing

In [None]:
mask = data['spmode'] == '-999'
#mask = data['spmode'] == '998'
#mask = data['spmode'] == '1005'
#mask = data['spmode'] == '-1'


bvtx = data[mask]['B_con_Vtxx']
bvty = data[mask]['B_con_Vtxy']
bvtz = data[mask]['B_con_Vtxz']

lamvtx = data[mask]['Lambda0Vtxx']
lamvty = data[mask]['Lambda0Vtxy']
lamvtz = data[mask]['Lambda0Vtxz']

lamconfl =    data[mask]['Lambda0postFitFlight']
lamconflsig = data[mask]['Lambda0postFitFlightSignificance']

bconfl =    data[mask]['BpostFitFlight']
bconflsig = data[mask]['BpostFitFlightSignificance']


blamfldx = bvtx - lamvtx
blamfldy = bvty - lamvty

blamfltrans2 = blamfldx**2 + blamfldx**2
blamfltrans = np.sqrt(blamfltrans2)

lamfltrans2 = lamvtx**2 + lamvtx**2
lamfltrans = np.sqrt(lamfltrans2)

print(bvtx.type)
print(lamvtx.type)
print(blamfltrans.type)
print(blamfltrans.type)


lamuncmass = data[mask]['Lambda0_unc_Mass']

In [None]:
plt.figure()
plt.hist(ak.flatten(lamuncmass),bins=100)



plt.figure(figsize=(8,8))

plt.subplot(2,2,1)
plt.hist(ak.flatten(lamconfl),bins=100, range=(0,60))
plt.xlabel('Lambda constrained flight length (cm)')

plt.subplot(2,2,2)
plt.hist(ak.flatten(lamconflsig),bins=100, range=(0,300))
plt.xlabel('Lambda constrained flight length significance')

plt.subplot(2,2,3)
plt.hist(ak.flatten(bconfl),bins=100, range=(0,.2))
plt.xlabel('B constrained flight length (cm)')

plt.subplot(2,2,4)
plt.hist(ak.flatten(bconflsig),bins=100, range=(0,4))
plt.xlabel('B constrained flight length significance')



# Transverse len
plt.figure(figsize=(12,4))

plt.subplot(1,3,1)
plt.hist(ak.flatten(lamconfl),bins=100, range=(0,30))

plt.subplot(1,3,2)
plt.hist(ak.flatten(blamfltrans),bins=100, range=(0,5))

plt.subplot(1,3,3)
plt.hist(ak.flatten(lamfltrans),bins=100, range=(0,5))




;

In [None]:
print(blamfltrans.type)

print(lamuncmass.type)

In [None]:
mask = blamfltrans>1.0
#mask = lamconfl>1.0
#mask = lamconflsig > 1.0

plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.hist(ak.flatten(lamuncmass),      bins=100,range=(1.105, 1.125))
plt.hist(ak.flatten(lamuncmass[mask]),bins=100,range=(1.105, 1.125))


;

In [None]:
#lamconfl[:,0]

In [None]:
#fig1,axes1 = plt.subplots(figsize=(12,8),nrows=3, ncols=4)



In [None]:
# Count number under peak and number of estimated background

# Mass of lambda is 1.115683


#mask = blamfltrans>1.0
#mask = lamconfl>1.0
#mask = lamconflsig > 1.0


#fig1 = plt.figure(figsize=(12,8))
#fig2 = plt.figure(figsize=(12,8))

nrows = 4
ncols=5

fig1,axes1 = plt.subplots(figsize=(12,8),nrows=nrows, ncols=ncols)
fig2,axes2 = plt.subplots(figsize=(12,8),nrows=nrows, ncols=ncols)

#print(fig1)
#print(axes1)


peaks = []
allentries = []
sigs = []
bkgs = []
pcts = []
cuts = []



for i in range(0,20):
    cut = i*0.1

    #mask_fl = blamfltrans > cut
    #mask_fl = lamconfl > cut
    
    cutvariable = blamfltrans
    mask_fl = cutvariable>cut
    
    width = 0.003
    
    lo = 1.115683 - width
    hi = 1.115683 + width
    
    #mask = blamfltrans>1.0
    #mask = lamconfl>1.0
    #mask = lamconflsig > 1.0
    
    m = lamuncmass
    mask = (m>lo) & (m<hi) & mask_fl
    
    mask_lo_sideband = (m<=lo) & (m>(lo-2*width)) & mask_fl
    mask_hi_sideband = (m>=hi) & (m<(hi+2*width)) & mask_fl

    nall = len(ak.flatten(m))
    npeak = len(ak.flatten(m[mask]))
    nbkglo = len(ak.flatten(m[mask_lo_sideband]))
    nbkghi = len(ak.flatten(m[mask_hi_sideband]))
    nbkg = (nbkglo + nbkghi)/2.0
    nsig = npeak - nbkg
                 
    print(f"nall: {nall}     npeak: {npeak}   nbkg: {nbkg}     nsig: {nsig}   {nbkglo}   {nbkghi}")
    peaks.append(npeak)
    sigs.append(nsig)
    bkgs.append(nbkg)
    cuts.append(cut)
    allentries.append(nall)

    #plt.subplot(3,4,i+1)
    #fig1.add_subplot(3,4,i+1)
    #plt.gcf().gca()
    #print(plt.gca())

    row = int(i/ncols)
    col = i%ncols
    
    plt.sca(axes1[row][col])
    
    plt.hist(ak.flatten(m),      bins=100,range=(1.105, 1.125))
    plt.hist(ak.flatten(m[mask]),bins=100,range=(1.105, 1.125))
    plt.hist(ak.flatten(m[mask_lo_sideband]),bins=100,range=(1.105, 1.125), color='yellow')
    plt.hist(ak.flatten(m[mask_hi_sideband]),bins=100,range=(1.105, 1.125), color='yellow')

    #fig2.add_subplot(3,4,i+1)
    plt.sca(axes2[row][col])

    plt.hist(ak.flatten(cutvariable),         bins=100,range=(0.0, 30))
    plt.hist(ak.flatten(cutvariable[mask_fl]),bins=100,range=(0.0, 30))

    
    ;

sigs = np.array(sigs)
bkgs = np.array(bkgs)
peaks = np.array(peaks)

pcts = sigs/sigs[0]
bkg_under_peak = peaks - sigs
pct_bkg_under_peak = bkg_under_peak / bkg_under_peak[0]

print()
print(f"Cutting around the peak alone reduces the number of signal to {100*peaks[0]/nall:.2f} %")

In [None]:

plt.figure(figsize=(12, 8))
plt.subplot(3,2,1)
plt.plot(cuts, peaks,'o')

plt.subplot(3,2,2)
plt.plot(cuts, sigs,'o')

plt.subplot(3,2,3)
plt.plot(cuts, bkgs,'o')

plt.subplot(3,2,4)
plt.plot(cuts, pcts,'o')
plt.ylim(0.7,1.1)

plt.subplot(3,2,5)
plt.plot(cuts, pct_bkg_under_peak,'o')
#plt.ylim(0.7,1.1)

# Naive significance
# Multiply by 10 if we are only doing Run 1
plt.subplot(3,2,6)
plt.plot(cuts, 10*sigs/np.sqrt(10*bkg_under_peak),'o')

In [None]:
#data.fields

In [None]:
idx = data['Lambda0d1Idx']

x = data['pcosth'][idx]

plt.hist(ak.flatten(x), bins=100)

;

# Duplicates study

