In [None]:
%load_ext autoreload
%autoreload 2

import uproot
import awkward as ak

import matplotlib.pylab as plt
import numpy as np

import time

from hist import Hist

import babar_analysis_tools as bat

import pandas as pd

from analysis_variables import *

import os

# Open the data file or files

In [None]:
#####################################################################
# Where are we running this?
#####################################################################
## Bellis computer
topdir= "/home/bellis/babar_data_local/bnv_plambda"

## My laptop
#topdir= "/Users/josieswann/BaBar_analyses/BNV_pLambda/"
#####################################################################


#####################################################################
# Get the BNV data
#####################################################################
data, data_collision = bat.load_datasets(topdir=topdir, subset='Run1')

#####################################################################
# Get the BNC data
#####################################################################
#topdir= "/home/bellis/babar_data/bnv_plambda_bnc"
#data, data_collision = bat.load_datasets(topdir=topdir, BNC=True, subset='all')
#data, data_collision = bat.load_datasets(topdir=topdir, BNC=True, subset='Run1')

# Plots and tables for signal and blinding area

In [None]:
from analysis_variables import *
region_definitions

In [None]:
def table_from_df(df):
    output = df.to_latex(index=False,
                  float_format="{:.4f}".format,
    )  # converts dataframe into latex readable text
    full_table = "\\begin{table}\n" # initializes the table before the beginning of the tabular 
    full_table += "\\caption{This could be the caption}\n" 
    full_table += output #includes the converted dataframe in the table
    full_table += "\\end{table}" # ends the table, same purpose as begin{table} 
    return full_table #make sure to return the print() of the full_table, otherwise it'll be one big string that latex can't handle

In [None]:
rd = region_definitions

dict_temp = {}
dict_temp['Region'] = ['Fitting', 'Blinding']
dict_temp['MES low'] = [rd['fitting MES'][0], rd['signal MES'][0]]
dict_temp['MES high'] = [rd['fitting MES'][1], rd['signal MES'][1]]

dict_temp['DeltaE low'] = [rd['fitting DeltaE'][0], rd['signal DeltaE'][0]]
dict_temp['DeltaE high'] = [rd['fitting DeltaE'][1], rd['signal DeltaE'][1]]


dftmp = pd.DataFrame.from_dict(dict_temp)

dftmp

In [None]:
#table = table_from_df(dftmp)

header = ['Region', '$MES$ low', '$MES$ high', '$\Delta E$ low', '$\Delta E$ high']

output = dftmp.to_latex(index=False, header=header, 
              float_format="{:.2f}".format,
)  # converts dataframe into latex readable text

full_table = "\\begin{table}\n" # initializes the table before the beginning of the tabular 
full_table += "\\centering"
full_table += "\\caption{Definition of the fitting region and blinding region for this analysis.\\label{tab:def_regions}}\n" 
full_table += output #includes the converted dataframe in the table
full_table += "\\end{table}" # ends the table, same purpose as begin{table} 
#return full_table #make sure to return the print() of the full_table, otherwise it'll be one big string that latex can't handle

table = full_table


print(table)

outfilename = 'tables/table_def_regions.tex'
outfile = open(outfilename, 'w')
outfile.write(table)
outfile.close()


# Tables of dataset statistics

Make LaTeX tables for the number of entries in each file.

In [None]:
output = bat.create_table_of_data_skims_statistics()

print(output)
print()

# Write it out
current_dir= os.getcwd()
print(f"Writing to {current_dir}")
directory = "tables"
path= os.path.join(current_dir,directory)
if os.path.isdir(path)== False:
    os.mkdir(path)

print(f"Writing to {directory}")

outfilename = current_dir+"/"+directory+"/table_data_skim_statistics.tex"
outfile = open(outfilename,'w+')
outfile.write(output)
outfile.close()

In [None]:
### information about cross section --> what we'll use to calculate scaling values for histograms 

dataset_information = pd.read_csv("dataset_statistics.csv")
cs_data= pd.read_csv("SP_cross_sections_and_labels.csv")


no_notes= cs_data.drop(["Uncertainty","Note: cross sections found at https://babar-wiki.heprc.uvic.ca/bbr_wiki/index.php/Physics/Cross_sections,_luminosities,_and_other_vital_stats"], axis= 1)
no_notes

bat.table_from_df(no_notes,"shortened_stats", \
                  label='tab:shstat', \
                  caption='Summary of meaning of SP mode codes and the relevant cross sections used for scaling Monte Carlo and collision data')

In [None]:
cs_data

In [None]:
# Weights
bkg_spmodes = ['998', '1005', '1235', '1237', '3981']
sig_spmodes = ['-999']

spmodes = bkg_spmodes + sig_spmodes

weights = {}
for sp in spmodes:
    weights[sp] = bat.scaling_value(int(sp), dataset_information=dataset_information, cs_data=cs_data, plot=False, verbose=False)
    #weights[sp] = 1

for sp,weight in weights.items():
    print(f"{sp:6s}   {weight:.2f}     {1/weight:.2f}")

In [None]:
# SP

dataset_information

In [None]:
df= bat.read_in_dataset_statistics()
dfspinfo = bat.get_SP_cross_sections_and_labels()


In [None]:
df['SP mode'].unique()

In [None]:
for spmode in [998, 1005]:
    
    mask = (df['Data or MC']=='MC')# & (df['Skim']=='LambdaVeryVeryLoose')
    mask = mask & (df['SP mode']==spmode)
    mask1 = mask & (df['Skim']!= 'LambdaVeryVeryLoose')
    mask2 = mask & (df['Skim']== 'LambdaVeryVeryLoose')
    
    nevents_col = '# of events (Data or MC)'
    
    nevents_org =  df[mask1][nevents_col].sum()
    nevents_skim = df[mask2][nevents_col].sum()

    wt = weights[str(spmode)]
    
    print(f'{spmode:6d}   {nevents_org} {nevents_org/1e6:.1f}   {nevents_skim} {nevents_skim/1e6:.1f}   {wt:.2f}  {1/wt:.2f}')

In [None]:
def create_table_of_SP_skims_statistics():
    df = bat.read_in_dataset_statistics()

    dfspinfo = bat.get_SP_cross_sections_and_labels()

    mask = dfspinfo['SP Mode']==1235
    bbbar_xsec = dfspinfo[mask]['Cross section [nb]'].values[0]

    mask = dfspinfo['SP Mode']==1237
    bbbar_xsec += dfspinfo[mask]['Cross section [nb]'].values[0]

    print(f"The total BBbar cross section is {bbbar_xsec} nb")

    #mask = df['Data or MC']=='MC'
    #df[mask]

    mask = (df['Data or MC']=='MC') & (df['Skim']=='LambdaVeryVeryLoose')
    dftmp = df[mask][['Run', 'Luminosity (Data only) 1/pb','# of events (Data or MC)', '# of events (Data or MC) NOT SURE WHICH NUMBER TO USE']]

    dftmp['# of BBbar pairs'] = dftmp['Luminosity (Data only) 1/pb']*bbbar_xsec*1000

    dftmp['Run'] = dftmp['Run'].astype(int).astype(str)
    dftmp.loc['Total'] = dftmp.sum(numeric_only=True)

    dftmp.at['Total','Run'] = 'Total'

    header = []
    header.append('Run')
    header.append('Luminosity (1/pb)')
    header.append('\# skimmed events')
    header.append('\# org. events')
    header.append('\# BB pairs')

    caption = "Details of the numbers of events and luminosity from the {\\tt LambdaVeryVeryLoose} skim used in this analysis."
    label = 'tab:dataskims'

    df.style.to_latex(position_float='centering')

    output = dftmp.to_latex(index=False, header=header, float_format="%.1f", caption=caption, label=label, position='h')

    # Add in centering by replacing the first EOL with "EOL + \centering + EOL"
    output = output.replace('\n','\n\centering\n', 1)

    # Add an hline learn the bottom above the total
    output = output.replace('Total','\hline\nTotal', 1)

    return output


##############################################################
output = create_table_of_SP_skims_statistics()

print(output)
print()

# Write it out
current_dir= os.getcwd()
print(f"Writing to {current_dir}")
directory = "tables"
path= os.path.join(current_dir,directory)
if os.path.isdir(path)== False:
    os.mkdir(path)

print(f"Writing to {directory}")

outfilename = current_dir+"/"+directory+"/table_SP_skim_statistics.tex"
outfile = open(outfilename,'w+')
outfile.write(output)
outfile.close()