In [None]:
%load_ext autoreload
%autoreload 2

import uproot
import awkward as ak

import matplotlib.pylab as plt
import numpy as np

import time

import pandas as pd

from hist import Hist

import babar_analysis_tools as bat

import os 

## Reading in files

In [None]:
start = time.time()

# At Siena
topdir = '/mnt/qnap/babar_data/bnv_plambda'

# At home (Bellis)
#topdir = '/home/bellis/babar_data/bnv_plambda'


### Read in SP- includes both signal and background 
#filename = f'{topdir}/Background_SP_modes_Only_Run_1.parquet'
filename = f'{topdir}/Background_and_signal_SP_modes_Only_Run_1.parquet'
#filename = f'{topdir}/Background_and_signal_SP_modes_All_runs.parquet'

### convert files from a parquet file to an awkward array
start = time.time()
ak_sp = ak.from_parquet(filename)
print(f"Took {time.time() - start} s")


# Read in data
filename = f'{topdir}/Data_Only_Run_1_BLINDED.parquet'
#filename = f'{topdir}/Data_All_runs_BLINDED.parquet'
start = time.time()
ak_data = ak.from_parquet(filename)
print(f"Took {time.time() - start} s")


In [None]:
### READ IN THE BIG SP FILE--> ALL RUNS! Doesn't work on Luminous 
'''
big_file= f"{topdir}/Background_and_signal_SP_modes_All_runs.parquet"

start= time.time()
ak_all_runs_blinded= ak.from_parquet(big_file)
print(f"Yikes! Took {time.time()-start} s")
''';

In [None]:
### READ IN THE BIG DATA FILE--> ALL RUNS! 
'''
big_file= f"{topdir}/Data_All_runs_BLINDED.parquet"

start= time.time()
ak_all_runs_blinded= ak.from_parquet(big_file)
print(f"Phew! Took {time.time()-start} s")
''';

In [None]:
### information about cross section --> what we'll use to calculate scaling values for histograms 

dataset_information = pd.read_csv("BaBar_analyses_BNV_pLambda_data_for_scaling.csv")
cs_data= pd.read_csv("BaBar_analyses_BNV_pLambda_cross_section_data.csv")

cs_data;

## Hist Creation and Filling 

In [None]:
hist_defs = {}


### Entries followed by lots of hashtags have a weird range that I want to ask Bellis about 
  
hist_defs['BSphr'] =                           {"nbins":100, "lo":0,     "hi":0.2,   "label":"Sphericity"}
hist_defs["BpostFitDeltaE"]=                   {"nbins":100, "lo":-1.0,  "hi":1.0,   "label":"$\Delta$ E"}
hist_defs["BpostFitMes"]=                      {"nbins":100, "lo":5.2,   "hi":5.3,   "label":"$M_{ES}$ [GeV/c$^2$]"}
hist_defs["BThrust"]=                          {"nbins":100, "lo":0.9,   "hi":1.05,  "label":"B thrust"}
hist_defs["BCosThetaS"]=                       {"nbins":150, "lo":-1.05, "hi":1.05,  "label":"B Cos Theta S"}
hist_defs["Lambda0_unc_Mass"]=                 {"nbins":100, "lo":1.105, "hi":1.125, "label":"$\Lambda^0$ mass [GeV/c$^2$]"}
hist_defs["nTracks"]=                          {"nbins":100, "lo":0,     "hi":18,    "label":"number of tracks"}
hist_defs["nGoodTrkLoose"]=                    {"nbins":100, "lo":0,     "hi":14,    "label":"number of Good Tracks- Loose"}
hist_defs["nChargedTracks"]=                   {"nbins":100, "lo":0,     "hi":0.2,   "label":"number of Charged Tracks"}
hist_defs["R2"]=                               {"nbins":100, "lo":0,     "hi":1.05,  "label":"R2"}
hist_defs["R2All"]=                            {"nbins":100, "lo":0,     "hi":1,     "label":"R2All"}
hist_defs["thrustMag"]=                        {"nbins":100, "lo":.6,    "hi":1,     "label":"thrustMag"}
hist_defs["thrustMagAll"]=                     {"nbins":100, "lo":.6,    "hi":1,     "label":"thrustMagAll"}
hist_defs["thrustCosTh"]=                      {"nbins":100, "lo":0,     "hi":1,     "label":"thrustCosTh"}
hist_defs["thrustCosThAll"]=                   {"nbins":100, "lo":0,     "hi":1,     "label":"thrustCosThAll"}
hist_defs["sphericityAll"]=                    {"nbins":100, "lo":0,     "hi":0.75,  "label":"Sphericity"}
hist_defs["BCosSphr"]=                         {"nbins":100, "lo":-.8,   "hi":1,     "label":"BCosSphr"}
hist_defs["BCosThetaT"]=                       {"nbins":100, "lo":-1,    "hi":1,     "label":"BCosThetaT"}
hist_defs["BCosThrust"]=                       {"nbins":100, "lo":0,     "hi":1,     "label":"BCosThrust"}
hist_defs["BLegendreP2"]=                      {"nbins":100, "lo":0,     "hi":0.2,   "label":"BLegendreP2"}#################
hist_defs["BR2ROE"]=                           {"nbins":100, "lo":0,     "hi":1,     "label":"BR2ROE"}
hist_defs["BSphrROE"]=                         {"nbins":100, "lo":0,     "hi":1,     "label":"BSphrROE"}
hist_defs["BThrustROE"]=                       {"nbins":100, "lo":0.5,   "hi":1,     "label":"BThrustROE"}
hist_defs["Lambda0postFitFlight"]=             {"nbins":100, "lo":0,     "hi":0.2,   "label":"Lambda0postFitFlight"}####################
hist_defs["Lambda0postFitFlightSignificance"]= {"nbins":100, "lo":0,     "hi":0.2,   "label":"Lambda0postFitFlightSignificance"}###############
hist_defs["nTRK"]=                             {"nbins":20, "lo":0,      "hi":20,    "label":"# of charged tracks"}

In [None]:
def create_empty_histograms(hist_defs): 
    ### Creates empty Hist object histograms based on the information in the dictionary above
    ### Then overwrites the information in the dictionary to be the hist object. All_hists goes from containing unconnected
    ### info about each variable to a single object containing all the same info 
    
    all_hists={}
    for var in hist_defs.keys():
        h = Hist.new.Reg(hist_defs[var]["nbins"], hist_defs[var]["lo"], hist_defs[var]["hi"], name='var', label=f"{hist_defs[var]['label']}") \
                 .StrCat([], name="SP", label="SP modes", growth=True)\
                 .StrCat([], name="cuts", label="Cuts", growth=True)\
                 .Weight()
    
        all_hists[var] = h

    return all_hists


def fill_histograms(ak_arr, empty_hists, spmodes=['998'], weights=[1.0]):
    ### Takes the dictionary of objects we made before and fills them 
    ### with the correct information, based on SP mode and Cut. 
    ### Each cut pares down the background and hopefully makes the signal more apparent


    
    for key in empty_hists.keys(): 
        print(key)

        for spmode in spmodes:
            weight = 1
            if spmode=='-999':
                weight = .005
            else:
                weight = weights[spmode]

            # Divide these up by sp
            mask_sp= (ak_arr.spmode== spmode)
            
            # Cut on reasonable Lambda0, they might be duplicated in both SP and Data
            mask_fl = ak_arr[mask_sp]['Lambda0FlightLen']>=0

            # First order cuts ##################################################           
            # Make a flight-len cut on Lambda for B and Lambda0 variables
            if key[0]=='B' or key.find('Lambda0')==0:
                x = ak.flatten(ak_arr[mask_sp][key][mask_fl])
            else:
                x = ak_arr[mask_sp][key]
            
            empty_hists[key].fill(var=x, SP= spmode, cuts= f"0", weight= weight)

            # Second order cuts ##################################################
            mask_ntrk = (ak_arr['nTRK'] >= 5)

            mask_event = mask_sp & mask_ntrk
            
            # Cut on reasonable Lambda0, they might be duplicated in both SP and Data
            mask_fl = ak_arr[mask_event]['Lambda0FlightLen']>=0
            
            # Make a flight-len cut on Lambda for B and Lambda0 variables
            if key[0]=='B' or key.find('Lambda0')==0:
                x = ak.flatten(ak_arr[mask_event][key][mask_fl])
            else:
                x = ak_arr[mask_event][key]
            
            empty_hists[key].fill(var=x, SP= spmode, cuts= f"1", weight= weight)


            # Third order cuts ##################################################
            mask_event = mask_sp & mask_ntrk            
            mask_fl = ak_arr[mask_event]['Lambda0FlightLen']>=1

            # Make a flight-len cut on Lambda for B and Lambda0 variables
            if key[0]=='B' or key.find('Lambda0')==0:
                x = ak.flatten(ak_arr[mask_event][key][mask_fl])
            else:
                x = ak_arr[mask_event][key]
            
            empty_hists[key].fill(var=x, SP= spmode, cuts= f"2", weight= weight)

            ###################################################
            mask_event = mask_sp & mask_ntrk            
            mask_fl = ak_arr[mask_event]['Lambda0FlightLen']>=1

            mes = ak_arr[mask_event]['BpostFitMes']
            de  = ak_arr[mask_event]['BpostFitDeltaE']           
            
            # Stuff
            blinding_mask = (mes>5.27) & ((de>-.07) & (de<.07)) 
            fitarea_mask   =  (mes>5.2) & ((de>-.2) & (de<.2)) 

            var_mask = mask_fl & ~blinding_mask & fitarea_mask
            if spmode=='-999': # Don't blind signal
                var_mask = mask_fl & fitarea_mask

            # Make a flight-len cut on Lambda for B and Lambda0 variables
            if key[0]=='B' or key.find('Lambda0')==0:
                x = ak.flatten(ak_arr[mask_event][key][var_mask])
            else:
                x = ak_arr[mask_event][key]
            
            empty_hists[key].fill(var=x, SP= spmode, cuts= f"3", weight= weight)
    
    
    return "done"


In [None]:
all_hists = create_empty_histograms(hist_defs)

In [None]:
#spmodes=['998', '1005', '1235', '1237', '-999']
bkg_spmodes = ['998', '1005', '1235', '1237', '3981']
sig_spmodes = ['-999']

### Very important! If you redefine the SP modes at the bottom and run everything, you might get an error if they're different. Check your SP modes! 

spmodes = bkg_spmodes + sig_spmodes

weights = {}
for sp in spmodes:
    weights[sp] = bat.scaling_value(int(sp), dataset_information=dataset_information, cs_data=cs_data, plot=False, verbose=False)
    #weights[sp] = 1

### bat.scaling_value is in Babar_analysis_tools.py 

print(weights)
print()
print(spmodes)

In [None]:
## SP HISTOGRAMS 

fill_histograms(ak_sp, all_hists, spmodes=spmodes, weights=weights);

In [None]:
#all_hists

In [None]:
## DATA HISTOGRAMS 

fill_histograms(ak_data, all_hists, spmodes=['0'], weights={'0':1})

In [None]:
all_hists;

## Hist plotting

In [None]:
def plot_histograms(all_hists, vars=[], bkg_spmodes=['998'], datamodes=['0'], sig_spmodes=['-999'], cut='0', save= True, overlay_data=True, only_stacked=False, fixed_grid=None):
    
    ### makes a directory (if it doesn't already exist) for these plots.
    ### plots will be saved to this dictionary if save= true

    
    current_dir= os.getcwd()
    directory = "BNV_pLambda_plots"
    path= os.path.join(current_dir,directory)
    if os.path.isdir(path)== False:
        os.mkdir(path)
    
    if len(vars) == 0:
        vars = list(all_hists.keys())

    ### color scheme dictionary
    cd= {}
    cd["998"]=  {"tab:blue"}
    cd["1005"]= {"tab:orange"}
    cd["-999"]= {"tab:brown"}
    cd["1235"]= {"tab:green"}
    cd["1237"]= {"tab:red"}
    cd["3981"]= {"tab:purple"}
    cd["3429"]= {"tab:pink"}
    cd["0"]= {"tab:cyan"}

    print(bkg_spmodes)

    if only_stacked and fixed_grid:
        width = fixed_grid[0] * 5
        height = fixed_grid[1] * 3
        plt.figure(figsize=(width,height))           

    
    for axes_idx,var in enumerate(vars):
        
        h = all_hists[var]
                
        if only_stacked and not fixed_grid:   # if you only want the stacked sp and not the breakdown for individual modes 
            plt.figure(figsize=(5,3))
        elif only_stacked and fixed_grid:     # fixed grid is a grid of subplots 
            plt.subplot(fixed_grid[0], fixed_grid[1], axes_idx+1)
        else:
            plt.figure(figsize=(18,12))
            plt.subplot(3,3,1)
            
        h[:,bkg_spmodes,cut].stack('SP')[:].project('var').plot(stack=True, histtype="fill")
        h[:,sig_spmodes,cut].project('var').plot(histtype="step", color='yellow', label= "signal")

        if overlay_data:
            h[:,datamodes,cut].project('var').plot(histtype="errorbar", color='black', label='Data')

        plt.legend()
        plt.xlabel(plt.gca().get_xlabel(), fontsize=18)


        # If we are only plotting the stacked histograms, then we can skip over plotting them individually
        if not only_stacked:
            
            # Plot all the others 
            all_modes = bkg_spmodes + sig_spmodes + datamodes
            
            for idx,spmode in enumerate(all_modes):
                plt.subplot(3,3,idx+2)
                h[:,spmode,cut].project('var').plot(histtype="fill", label=spmode, color= cd[str(spmode)])
                plt.legend()
                plt.xlabel(plt.gca().get_xlabel(), fontsize=18)

        plt.tight_layout()
        
        if save== True:
            
            outfilename=f"plot_hist_cut{cut}_{var}.png" 
            if only_stacked and not fixed_grid:
                outfilename=f"plot_hist_cut{cut}_ONLY_STACKED_{var}.png" 
                
            plt.savefig(f"{path}/{outfilename}")

    if save== True and fixed_grid and only_stacked:
        # name of .png saved to computer based on fields specified on function call 
        varnames = "_".join(vars)
        outfilename=f"plot_hist_cut{cut}_ONLY_STACKED_FIXED_GRID_{varnames}.png" 
            
        plt.savefig(f"{path}/{outfilename}")



In [None]:
print(bkg_spmodes)

print("_".join(bkg_spmodes))



## Plot Area 

In [None]:
all_vars = list(all_hists.keys())

#plot_histograms(all_hists, vars=['Lambda0_unc_Mass'], bkg_spmodes=bkg_spmodes, sig_spmodes=sig_spmodes, datamodes=['0'], cut='2');
#plot_histograms(all_hists, vars=['BpostFitMes'], bkg_spmodes=bkg_spmodes, sig_spmodes=sig_spmodes, datamodes=['0'], cut='3');
#plot_histograms(all_hists, vars=['BpostFitDeltaE'], bkg_spmodes=bkg_spmodes, sig_spmodes=sig_spmodes, datamodes=['0'], cut='0');
#plot_histograms(all_hists, vars=['BpostFitDeltaE'], bkg_spmodes=bkg_spmodes, sig_spmodes=sig_spmodes, datamodes=['0'], cut='0', overlay_data=False);
#plot_histograms(all_hists, vars=all_vars, bkg_spmodes=bkg_spmodes, sig_spmodes=sig_spmodes, datamodes=['0'], cut='2');

#plot_histograms(all_hists, vars=["Lambda0_unc_Mass"], bkg_spmodes=bkg_spmodes, sig_spmodes=sig_spmodes, datamodes=['0'], cut='0', overlay_data=True, only_stacked=True, save= False);

plot_histograms(all_hists, vars=["BpostFitMes", "BpostFitDeltaE", "Lambda0_unc_Mass", "BCosSphr"], bkg_spmodes=bkg_spmodes, sig_spmodes=sig_spmodes, datamodes=['0'], cut='3', overlay_data=True, only_stacked=True, save= True, fixed_grid=(2,2));


## Blinding and fitting region

In [None]:
mask = ak_sp['Lambda0FlightLen']>=0

sigmask = ak_sp['spmode'] == '-999'


mes = ak.flatten(ak_sp['BpostFitMes'][mask & sigmask])
de  = ak.flatten(ak_sp['BpostFitDeltaE'][mask & sigmask])


plt.figure(figsize=(12,12))
plt.subplot(3,2,1)
plt.plot(mes, de, '.', markersize=0.5, alpha=0.5)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

norg = len(mes)
print(f"# original: {norg}")

# Stuff
blinding_mask = (mes>5.27) & ((de>-.07) & (de<.07)) 
fitarea_mask   =  (mes>5.2) & ((de>-.2) & (de<.2)) 

plt.subplot(3,2,3)
plt.plot(mes[blinding_mask], de[blinding_mask], '.', markersize=0.5, alpha=0.5)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

plt.subplot(3,2,4)
plt.plot(mes[~blinding_mask], de[~blinding_mask], '.', markersize=0.5, alpha=0.5)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

nblind = len(mes[blinding_mask])
print(f"# blinded: {nblind}")

nnotblind = len(mes[~blinding_mask])
print(f"# notblinded: {nnotblind}")

print(f"frac blinded: {nblind/norg}")


plt.subplot(3,2,5)
plt.plot(mes[fitarea_mask], de[fitarea_mask], '.', markersize=0.5, alpha=0.5)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

plt.subplot(3,2,6)
plt.plot(mes[fitarea_mask & ~blinding_mask], de[fitarea_mask & ~blinding_mask], '.', markersize=0.5, alpha=0.5)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

nblind = len(mes[blinding_mask])
print(f"# blinded: {nblind}")

nnotblind = len(mes[~blinding_mask])
print(f"# notblinded: {nnotblind}")

print(f"frac blinded: {nblind/norg}")


In [None]:
mask = ak_sp['Lambda0FlightLen']>=0

sigmask = ak_sp['spmode'] != '-999'


mes = ak.flatten(ak_sp['BpostFitMes'][mask & sigmask])
de  = ak.flatten(ak_sp['BpostFitDeltaE'][mask & sigmask])


plt.figure(figsize=(12,12))
plt.subplot(3,2,1)
plt.plot(mes, de, '.', markersize=0.5, alpha=0.5)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

norg = len(mes)
print(f"# original: {norg}")

# Stuff
blinding_mask = (mes>5.27) & ((de>-.07) & (de<.07)) 
fitarea_mask   =  (mes>5.2) & ((de>-.2) & (de<.2)) 

plt.subplot(3,2,3)
plt.plot(mes[blinding_mask], de[blinding_mask], '.', markersize=0.5, alpha=0.5)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

plt.subplot(3,2,4)
plt.plot(mes[~blinding_mask], de[~blinding_mask], '.', markersize=0.5, alpha=0.5)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

nblind = len(mes[blinding_mask])
print(f"# blinded: {nblind}")

nnotblind = len(mes[~blinding_mask])
print(f"# notblinded: {nnotblind}")

print(f"frac blinded: {nblind/norg}")


plt.subplot(3,2,5)
plt.plot(mes[fitarea_mask], de[fitarea_mask], '.', markersize=0.5, alpha=0.5)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

plt.subplot(3,2,6)
plt.plot(mes[fitarea_mask & ~blinding_mask], de[fitarea_mask & ~blinding_mask], '.', markersize=0.5, alpha=0.5)
plt.xlim(5.2, 5.3)
plt.ylim(-0.5, 0.5)

nblind = len(mes[blinding_mask])
print(f"# blinded: {nblind}")

nnotblind = len(mes[~blinding_mask])
print(f"# notblinded: {nnotblind}")

print(f"frac blinded: {nblind/norg}")


## Sandbox

In [None]:
plt.figure(figsize=(8,4))

plt.subplot(1,2,1)
x = ak_sp['nTRK']
plt.hist(x, bins=20, range=(0,20))


plt.subplot(1,2,2)
x = ak_data['nTRK']
plt.hist(x,bins=20, range=(0,20))

;

In [None]:
x = 10

print(x)

x = 20

print(x)

In [None]:
x = 10

print(x)

y = x

print(y)

x = 20

print(x,y)

In [None]:
x = [10]

print(x)

y = x

print(y)

x[0] = 20

print(x,y)

In [None]:
x = np.array([10])

print(x)

y = x

print(y)

x[0] = 20

print(x,y)



y[0] = 999

print(x,y)

In [None]:
# Make a copy of the actual values and give it a new name!
x = [10]

print(x)

y = list(x)

print(y)

x[0] = 20

print(x,y)

In [None]:
# Make a new array!
x = np.array([10])

print(x)

y = x.copy()

print(y)

x[0] = 20

print(x,y)

In [None]:
h = all_hists['BSphr']

h[:,['998'],:].project('var').plot(histtype="errorbar", color='black')

In [None]:
all_hists;

In [None]:
h = Hist.new.Reg(100, 0, 20, name="Lambda0postFitFlight", label=r"") \
         .StrCat([], name="SP", label="SP modes", growth=True)\
         .StrCat([], name="cuts", label="Cuts", growth=True)\
         .Weight()

#for key,value in ak_sp.items():
#    x = value['BpostFitMes'][:,0]
#    h.fill(BpostFitMes=x, SP=key, cuts=f"0", weight=1)

spmodes = ['998','1005', '1235', '1237', '3981']
for spmode in spmodes:
    mask = ak_sp.spmode == spmode
    x = ak_sp[mask]['Lambda0postFitFlight'][:,0]
    h.fill(Lambda0postFitFlight=x, SP=spmode, cuts=f"0", weight=1)

In [None]:
h.stack('SP')[:].project('Lambda0postFitFlight').plot(stack=True, histtype="fill")

plt.legend()

In [None]:
plt.figure(figsize=(18,12))

spmodes = ['998','1005', '1235', '1237', '3981']
for idx,spmode in enumerate(spmodes):
    plt.subplot(2,3,idx+1)
    h[:,spmode,:].project('Lambda0postFitFlight').plot(histtype="fill", label=spmode)
    plt.legend()

In [None]:

j = Hist.new.Reg(100, 5.2, 5.3, name="BpostFitMes", label=r"M$_{ES}$ [GeV/c$^2$]") \
         .StrCat([], name="SP", label="SP modes", growth=True)\
         .StrCat([], name="cuts", label="Cuts", growth=True)\
         .Weight()


bkg_spmodes = ['998','1005', '1235', '1237', '991']#, '3429']
spmodes = bkg_spmodes + ['-999']
#spmodes = ['991']

for spmode in spmodes:
    mask = ak_sp.spmode == spmode
    x = ak_sp[mask]['BpostFitMes'][:,0]

    weight = 1
    if spmode == '-999':
        weight = 0.02
    
    j.fill(BpostFitMes=x, SP=spmode, cuts=f"0", weight=weight)

In [None]:
j[:,:,:].stack('SP')[:].project('BpostFitMes').plot(stack=True, histtype="fill")
j[:,['-999'],:].project('BpostFitMes').plot(histtype="errorbar", color='black', label= "data")

plt.legend()

In [None]:
j[:,bkg_spmodes,:].stack('SP')[:].project('BpostFitMes').plot(stack=True, histtype="fill")
j[:,['-999'],:].project('BpostFitMes').plot(histtype="errorbar", color='black')

plt.legend()

In [None]:
mask = ak_data['Lambda0FlightLen'] >=0
x = ak.flatten(ak_data['Lambda0_unc_Mass'][mask])

plt.hist(x,bins=100);

print(ak_data['Lambda0_unc_Mass'][mask])