In [9]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import ipywidgets as w
from ipywidgets import interact, interactive, fixed, interact_manual

In [10]:
# loads all the data files from a specified folder (except total_dataset and acceptance)

folder = "data/" # path to folder with data
extension = ".pkl" # extension of the data to be loaded
def load_file(name, folder = folder, ext = extension):
    if ext == ".pkl":
        pkl = pd.read_pickle(folder + name + ext)
    elif ext == ".csv":
        pkl = pd.read_csv(folder + name + ext)
    else:
        try:
            pkl = pd.read_pickle(folder + name + ext)
        except:
            pkl = pd.read_csv(folder + name + ext)
    return pkl

def load_dir(dir_path, ext = extension):
    """
    Looks through folder and loads all pandas DFs from it
    
    Returns dictionary of DFs
    """
    files = {}
    for file in os.listdir(dir_path):
        if file.endswith(ext) and file[:3] != "acc" and file[:3] != "tot":
            files[file[:-4]] = load_file(file, ext = '')
    return files

DFs = load_dir("./data")

keys = DFs.keys()
print(keys)


dict_keys(['jpsi', 'jpsi_mu_k_swap', 'jpsi_mu_pi_swap', 'k_pi_swap', 'phimumu', 'pKmumu_piTok_kTop', 'pKmumu_piTop', 'psi2S', 'sig'])


In [11]:
N = {} # dictionary that will stor the initial number of elements in the datafiles
for key in keys:
    N[key] = len(DFs[key])
print(N)

def sensitivity(t_muP_prob, t_muP_P, t_muP_PT, t_muP_PE, t_muP_PZ, t_muP_CHI,
               t_muM_prob, t_muM_P, t_muM_PT, t_muM_PE, t_muM_PZ, t_muM_CHI,
               t_K_prob, t_K_P, t_K_PT, t_K_PE, t_K_PZ, t_K_CHI,
               t_Pi_prob, t_Pi_P, t_Pi_PT, t_Pi_PE, t_Pi_PZ, t_Pi_CHI,
               t_B0_MM, t_B0_EndV, t_B0_IPChi, t_B0_FDChi, t_B0_FD,
               t_Ks_MM, t_Ks_EndV, t_Ks_FDChi,
               t_Jpsi_MM, t_Jpsi_EndV, t_Jpsi_FDChi):
    """
    Function is given thresholds through interact
    It aplies them and plots ratios of how much data is left in the file
    """
    def apply_threshold(df, widget, var):
        """
        Applies threshold on a df
        Widget - a tuple of min and max accepted values for the parameter (initially was meant that
        it will give the whole widget, so adaptation should be done in terms of max and min values)
        Variable - on which variable to cut the df
        
        Returns cutted df
        """
        particles = ["mu_plus", "mu_minus", "K", "Pi"]
        """
        #m = widget.min
        #M = widget.max
        #vals = widget.value
        
        It was meant to check whether min or max value is chosen. If it is chosen, cut is not applied from that end (open end)
        """
        m = 0
        vals = widget
        
        if var[-4:] == "prob": # probability combines 4 variables
            M = 1.0 # should be changed to be automatic
            index = particles.index(var[:-5])
            particle = var[:-4] + "MC15TuneV1_ProbNN"
            if m != vals[0]:
                suf = ["mu", "p", "k", "pi"] # a way to automatically choose which probability should be where in the df filtering
                
                df = df[df[particle + suf.pop(0) if index <= 1 else particle + suf.pop(index)]*(1-df[particle+suf.pop()])\
                        *(1-df[particle+suf.pop()])*(1-df[particle+suf.pop()]) > vals[0]]
            if M != vals[1]:
                suf = ["mu", "p", "k", "pi"] # a way to automatically choose which probability should be where in the df filtering
                
                df = df[df[particle + suf.pop(0) if index <= 1 else particle + suf.pop(index)]*(1-df[particle+suf.pop()])\
                        *(1-df[particle+suf.pop()])*(1-df[particle+suf.pop()]) < vals[1]]
        else: # usual one variable filtering
            M = 1_000_000 # should be changed to be automatic
            if m != vals[0]:
                df = df[df[var] > vals[0]]
            if M != vals[1]:
                df = df[df[var] < vals[1]]
        return df
  
    ratios = {}
    # Applying all thresholds for all datasets
    for key in keys:
        df = DFs[key]
        if len(df) != N[key]:
            print("Was expecting to have ", N[key], " but have ", len(df))
        
        df = apply_threshold(df, t_muP_prob, "mu_plus_prob")
        df = apply_threshold(df, t_muP_P, "mu_plus_P")
        df = apply_threshold(df, t_muP_PT, "mu_plus_PT")
        df = apply_threshold(df, t_muP_PE, "mu_plus_PE")
        df = apply_threshold(df, t_muP_PZ, "mu_plus_PZ")
        df = apply_threshold(df, t_muP_CHI, "mu_plus_IPCHI2_OWNPV")
        
        df = apply_threshold(df, t_muM_prob, "mu_minus_prob")
        df = apply_threshold(df, t_muM_P, "mu_minus_P")
        df = apply_threshold(df, t_muM_PT, "mu_minus_PT")
        df = apply_threshold(df, t_muM_PE, "mu_minus_PE")
        df = apply_threshold(df, t_muM_PZ, "mu_minus_PZ")
        df = apply_threshold(df, t_muM_CHI, "mu_minus_IPCHI2_OWNPV")
        
        df = apply_threshold(df, t_K_prob, "K_prob")
        df = apply_threshold(df, t_K_P, "K_P")
        df = apply_threshold(df, t_K_PT, "K_PT")
        df = apply_threshold(df, t_K_PE, "K_PE")
        df = apply_threshold(df, t_K_PZ, "K_PZ")
        df = apply_threshold(df, t_K_CHI, "K_IPCHI2_OWNPV")
        
        df = apply_threshold(df, t_Pi_prob, "Pi_prob")
        df = apply_threshold(df, t_Pi_P, "Pi_P")
        df = apply_threshold(df, t_Pi_PT, "Pi_PT")
        df = apply_threshold(df, t_Pi_PE, "Pi_PE")
        df = apply_threshold(df, t_Pi_PZ, "Pi_PZ")
        df = apply_threshold(df, t_Pi_CHI, "Pi_IPCHI2_OWNPV")
        
        df = apply_threshold(df, t_B0_MM, "B0_MM")
        df = apply_threshold(df, t_B0_EndV, "B0_ENDVERTEX_CHI2")
        df = apply_threshold(df, t_B0_IPChi, "B0_IPCHI2_OWNPV")
        df = apply_threshold(df, t_B0_FDChi, "B0_FDCHI2_OWNPV")
        df = apply_threshold(df, t_B0_FD, "B0_FD_OWNPV")
        
        df = apply_threshold(df, t_Ks_MM, "Kstar_MM")
        df = apply_threshold(df, t_Ks_EndV, "Kstar_ENDVERTEX_CHI2")
        df = apply_threshold(df, t_Ks_FDChi, "Kstar_FDCHI2_OWNPV")
        
        df = apply_threshold(df, t_Jpsi_MM, "J_psi_MM")
        df = apply_threshold(df, t_Jpsi_EndV, "J_psi_ENDVERTEX_CHI2")
        df = apply_threshold(df, t_Jpsi_FDChi, "J_psi_FDCHI2_OWNPV")
        
        ratios[key] = len(df) / N[key]
    # Plotting ratios result
    r = []
    for k, key in enumerate(keys):
        r.append(ratios[key])
    plt.title("Sensitivity")
    plt.plot(r, '-o')
    plt.xticks(range(len(keys)), labels = keys, rotation = 90)
    plt.xlabel("files")
    plt.ylabel("ratio")
    plt.hlines([1, 0.5, 0.1], 0, 9, colors = ['green', 'orange', 'red'], linestyles = '--')
    plt.show()

{'jpsi': 729524, 'jpsi_mu_k_swap': 975, 'jpsi_mu_pi_swap': 2839, 'k_pi_swap': 22106, 'phimumu': 210698, 'pKmumu_piTok_kTop': 67526, 'pKmumu_piTop': 68153, 'psi2S': 511646, 'sig': 8802}


In [12]:
# Values for now are set so there is no cutting initially
# Ranges are set to be bigger than range of any variable
# Ranges and initial values can be changed later to be more appropriate for each parameter -
# !!!but it will also lead to changes in apply_threshold function regarding max and min
interact(sensitivity,
             t_muP_prob = w.FloatRangeSlider(value=[0, 1], min=0, max=1, step=0.05, description='t_muP_prob'),
         t_muP_P = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_muP_P'),
         t_muP_PT = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_muP_PT'),
         t_muP_PE = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_muP_PE'),
         t_muP_PZ = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_muP_PZ'),
         t_muP_CHI = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_muP_CHI'),
               t_muM_prob = w.FloatRangeSlider(value=[0, 1], min=0, max=1, step=0.05, description='t_muM_prob'),
         t_muM_P = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_muM_P'),
         t_muM_PT = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_muM_PT'),
         t_muM_PE = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_muM_PE'),
         t_muM_PZ = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_muM_PZ'),
         t_muM_CHI = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_muM_CHI'),
               t_K_prob = w.FloatRangeSlider(value=[0, 1], min=0, max=1, step=0.05, description='t_K_prob'),
         t_K_P = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_K_P'),
         t_K_PT = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_K_PT'),
         t_K_PE = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_K_PE'),
         t_K_PZ = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_K_PZ'),
         t_K_CHI = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_K_CHI'),
               t_Pi_prob = w.FloatRangeSlider(value=[0, 1], min=0, max=1, step=0.05, description='t_mPi_prob'),
         t_Pi_P = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Pi_P'),
         t_Pi_PT = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Pi_PT'),
         t_Pi_PE = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Pi_PE'),
         t_Pi_PZ = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Pi_PZ'),
         t_Pi_CHI = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Pi_CHI'),
               t_B0_MM = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_B0_MM'),
         t_B0_EndV = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_B0_EndV'),
         t_B0_IPChi = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_B0_IPChi'),
         t_B0_FDChi = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_B0_FDChi'),
         t_B0_FD = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_B0_FD'),
               t_Ks_MM = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Ks_MM'),
         t_Ks_EndV = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Ks_EndV'),
         t_Ks_FDChi = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Ks_FDChi'),
               t_Jpsi_MM = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Jpsi_MM'),
         t_Jpsi_EndV = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Jpsi_EndV'),
         t_Jpsi_FDChi = w.FloatRangeSlider(value=[0, 1_000_000], min=0, max=1_000_000, step=100_000, description='t_Jpsi_FDChi'))

interactive(children=(FloatRangeSlider(value=(0.0, 1.0), description='t_muP_prob', max=1.0, step=0.05), FloatR…

<function __main__.sensitivity(t_muP_prob, t_muP_P, t_muP_PT, t_muP_PE, t_muP_PZ, t_muP_CHI, t_muM_prob, t_muM_P, t_muM_PT, t_muM_PE, t_muM_PZ, t_muM_CHI, t_K_prob, t_K_P, t_K_PT, t_K_PE, t_K_PZ, t_K_CHI, t_Pi_prob, t_Pi_P, t_Pi_PT, t_Pi_PE, t_Pi_PZ, t_Pi_CHI, t_B0_MM, t_B0_EndV, t_B0_IPChi, t_B0_FDChi, t_B0_FD, t_Ks_MM, t_Ks_EndV, t_Ks_FDChi, t_Jpsi_MM, t_Jpsi_EndV, t_Jpsi_FDChi)>