In [1]:
import numpy as np
import pandas as pd
import re
import itertools

In [2]:
from glob import glob
def get_file_list(base_dir, file_ext = None, recglob = False):
    '''
    S. D. Butalla
    2022/08/17 - v0
    
    Generic function that retrieves a list of files in 
    a specified directory. An option for recursive
    globbing for all subdirectories given a base 
    directory is also available.
    
    Dependencies:
    
    glob
    
    Positional arguments:
    base_dir          : (string) The base directory to
                        retrieve the files from.
    Optional arguments:
    file_ext          : (string) File extension. If not
                        None, all contents of directory
                        will be returned (including    
                        directories).
    recglob           : (bool) Enable recursive 
                        globbing.
    
    Output:
    total_file_list   : (list; string) List of strings
                        of complete file paths
    file_names        : (list; string) List of strings
                        of file names (including those 
                        in subdirectories!).
    '''
    if file_ext is not None:
        if file_ext[0] != ".":
            file_ext = "." + file_ext
    else:
        file_ext = ""

    if recglob:
        total_file_list = glob(f'{base_dir}/*/*%s' % file_ext)
    else:
        total_file_list = glob(f'{base_dir}/*%s' % file_ext)
    
    file_names = [file.split("/")[-1] for file in total_file_list]
    return total_file_list, file_names

In [3]:
base_dirs = ["2017_CutFlow_tables/", "2018_CutFlow_tables/"]
base_dir  = base_dirs[1]
total_file_list, file_names = get_file_list(base_dir, ".tex")

In [4]:
total_file_list

['2018_CutFlow_tables/MZD_100_MSD_50.tex',
 '2018_CutFlow_tables/MZD_100_SD_10.tex',
 '2018_CutFlow_tables/MZD_100_SD_15.tex',
 '2018_CutFlow_tables/MZD_100_SD_20.tex',
 '2018_CutFlow_tables/MZD_100_SD_25.tex',
 '2018_CutFlow_tables/MZD_100_SD_30.tex',
 '2018_CutFlow_tables/MZD_100_SD_35.tex',
 '2018_CutFlow_tables/MZD_100_SD_40.tex',
 '2018_CutFlow_tables/MZD_100_SD_45.tex',
 '2018_CutFlow_tables/MZD_100_SD_5.tex',
 '2018_CutFlow_tables/MZD_110_MSD_10.tex',
 '2018_CutFlow_tables/MZD_110_MSD_15.tex',
 '2018_CutFlow_tables/MZD_110_MSD_20.tex',
 '2018_CutFlow_tables/MZD_110_MSD_25.tex',
 '2018_CutFlow_tables/MZD_110_MSD_30.tex',
 '2018_CutFlow_tables/MZD_110_MSD_35.tex',
 '2018_CutFlow_tables/MZD_110_MSD_40.tex',
 '2018_CutFlow_tables/MZD_110_MSD_45.tex',
 '2018_CutFlow_tables/MZD_110_MSD_5.tex',
 '2018_CutFlow_tables/MZD_110_MSD_50.tex',
 '2018_CutFlow_tables/MZD_110_MSD_55.tex',
 '2018_CutFlow_tables/MZD_125_MSD_10.tex',
 '2018_CutFlow_tables/MZD_125_MSD_20.tex',
 '2018_CutFlow_tables/

In [5]:
keys = [key.split(".")[0] for key in file_names] # return keys to be used in dictionary later

In [7]:
all_text = {} # to store all text from each file for further processing
for file in range(len(total_file_list)):
    with open(total_file_list[file], 'r') as text:
        all_text[total_file_list[file].split("/")[-1].split(".")[0]] = text.readlines()

# Processing ideas
Files with MZD_YYY.tex have multiple samples with several $m_{S_{D}}$. Need to process these separately, as they are not all uniform and contain more than one cutflow table for different $m_{S_{D}}$.

In [3]:
class colors:
    WHITE   = '\033[97m'
    CYAN    = '\033[96m'
    MAGENTA = '\033[95m'
    BLUE    = '\033[94m'
    YELLOW  = '\033[93m'
    GREEN   = '\033[92m'
    RED     = '\033[91m'
    ORANGE  = '\033[38;5;208m'
    ENDC    = '\033[39m'
    
def print_info(string, color = colors.GREEN, endcolor = colors.ENDC):
    print(color + string + endcolor)   
    
def print_alert(string, color = colors.YELLOW, endcolor = colors.ENDC):
    print(color + string + endcolor)
    
def print_error(string, color = colors.RED, endcolor = colors.ENDC):
    print(color + string + endcolor)

# Final product

In [4]:
base_dirs = ["2017_CutFlow_tables/", "2018_CutFlow_tables/"]
base_dir  = base_dirs[1]
total_file_list, file_names = get_file_list(base_dir, ".tex")
keys = [key.split(".")[0] for key in file_names] # return keys to be used in dictionary later

In [5]:
keys_dict = {total_file_list[ii].split("/")[1].split(".")[0]: total_file_list[ii] for ii in range(len(total_file_list))}

In [6]:
keys_dict

{'MZD_100_MSD_50': '2018_CutFlow_tables/MZD_100_MSD_50.tex',
 'MZD_100_SD_10': '2018_CutFlow_tables/MZD_100_SD_10.tex',
 'MZD_100_SD_15': '2018_CutFlow_tables/MZD_100_SD_15.tex',
 'MZD_100_SD_20': '2018_CutFlow_tables/MZD_100_SD_20.tex',
 'MZD_100_SD_25': '2018_CutFlow_tables/MZD_100_SD_25.tex',
 'MZD_100_SD_30': '2018_CutFlow_tables/MZD_100_SD_30.tex',
 'MZD_100_SD_35': '2018_CutFlow_tables/MZD_100_SD_35.tex',
 'MZD_100_SD_40': '2018_CutFlow_tables/MZD_100_SD_40.tex',
 'MZD_100_SD_45': '2018_CutFlow_tables/MZD_100_SD_45.tex',
 'MZD_100_SD_5': '2018_CutFlow_tables/MZD_100_SD_5.tex',
 'MZD_110_MSD_10': '2018_CutFlow_tables/MZD_110_MSD_10.tex',
 'MZD_110_MSD_15': '2018_CutFlow_tables/MZD_110_MSD_15.tex',
 'MZD_110_MSD_20': '2018_CutFlow_tables/MZD_110_MSD_20.tex',
 'MZD_110_MSD_25': '2018_CutFlow_tables/MZD_110_MSD_25.tex',
 'MZD_110_MSD_30': '2018_CutFlow_tables/MZD_110_MSD_30.tex',
 'MZD_110_MSD_35': '2018_CutFlow_tables/MZD_110_MSD_35.tex',
 'MZD_110_MSD_40': '2018_CutFlow_tables/MZD_

In [7]:
global dataDir 
dataDir = "2018_cutflow_csv/"

In [8]:
def process_cutflow(file_list, to_pandas = True, save_csv = True, ret = True, verbose = False):
    '''
     S. D. Butalla
    2022/08/19 - v0
    
    Function that, given a list absolute file paths (.tex or .txt) 
    for cutflow tables, parses the files and stores the cutflow 
    tables in a dictionary (key = file name without extension).
    By default, the function places the extracted cutflow
    table in a pandas dataframe and saves the table to a 
    .csv.
    
    Dependencies:
    
    pandas
    re
    
    colors (custom class)
    
    Positional arguments:
    file_list          : (list; string) A list of the absolute file
                         paths for the cutflow table files.
                         
    Optional arguments:
    to_pandas          : (bool) If true, will convert each cutflow table
                         stored in a dictionary to a pandas dataframe. 
    save_csv           : (bool) If true, will save the pandas dataframe
                         as a .csv in the specified data directory
                         ('dataDir').
    ret                : (bool) If true, will return the various outputs
                         (see below).
    verbose            : (bool) If true, will print extra information.
    
    Output:
    cutflow_dict       : (dict) Dictionary of all cutflow tables. Keys are
                         the names of the files in the file list (without
                         the path or the file extension).
    epsilon_alpha      : (dict) Dictionary of the epsilon / alpha value
                         (keys are the same as in cutflow_dict).
    epsilon_alpha_err  : (dict) Dictionary of the epsilon / alpha
                         uncertainties (keys are the same as in cutflow_dict).
    Optional output:
    df_dict            : (dict) Dictionary of pandas dataframes of the cutflow
                         tables.
    '''
    
    if save_csv and not to_pandas:
        print_error("To save cutflow table as a csv the result dictionary must be converted to a pandas dataframe first. Set to_pandas = True.")
        
    verbose    = True
    
    keys_dict         = {file_list[ii].split("/")[1].split(".")[0]: file_list[ii] for ii in range(len(file_list))} # dict of key: file path
    cutflow_dict      = {} # stores all cutflow table information
    epsilon_alpha     = {} # stores all epsilon / alpha values
    epsilon_alpha_err = {} # stores all epsilon / alpha errors
    
    if to_pandas:
        df_dict = {}
    
    for key in keys_dict.keys():
        cut_num                = []
        selection              = []
        events                 = []
        tot_eff                = []
        rel_eff                = []
        tot_eff_err            = []
        rel_eff_err            = []
        cutflow_dict[key]      = {} # stores all cutflow table information
        epsilon_alpha[key]     = {} # stores all epsilon / alpha values
        epsilon_alpha_err[key] = {} # stores all epsilon / alpha errors
        
        if verbose:
            print_alert("Opening file %s\n" % keys_dict[key])
            
        file  = open(keys_dict[key])
        
        if verbose:
            print_alert("File opened successfully\n")
        
        lines = file.readlines()
        
        if verbose:
            print_alert("Processing file...")
        
        cnt = 0
        for line in lines:
            if line == "Here is the cut-flow-table:\n":
                if verbose:
                    print_alert("Cutflow table detected, processing...\n")
                cnt_data = 0 # counter to keep track of line number
                for data in lines[(cnt + 6):]: # iterate over the table data
                    if re.split('(\d+)', re.sub('[^A-Za-z0-9]+', '', data.split("&")[0]))[0] == "epsilonrecalphagen": # find end of table and extract epsilon/alpha and error
                        epsilonalpha    = data.split("&")[1].split("$")[0].strip()                   # clean text and formatting characters, strip spaces
                        epsilonalphaerr = data.split("&")[1].split("$")[2].split("hline")[0].strip() # clean text and formatting characters, strip spaces
                        
                        # Check for results (some tables don't have epsilon/alpha error vals)
                        if len(epsilonalpha) != 0:
                            epsilon_alpha[key] = float(epsilonalpha)
                        else:
                            epsilon_alpha[key] = None
                        
                        if len(epsilonalphaerr) != 0:
                            epsilon_alpha_err[key] = float(epsilonalphaerr)
                        else:
                            epsilon_alpha_err[key] = None    
                                                    
                        if verbose:
                            print_alert("Epsilon/alpha values:")
                            print_alert(21 * "*")
                            if type(epsilon_alpha[key]) is float:
                                print_alert("eps/alp = %f" % epsilon_alpha[key])
                            elif type(epsilon_alpha[key]) is None:
                                print_alert("eps/alp not in cutflow table")
                            
                            if type(epsilon_alpha_err[key]) is float:
                                print_alert("eps/alp err = %f" % epsilon_alpha_err[key])
                            elif type(epsilon_alpha_err[key]) is None:
                                 print_alert("eps/alp error not in cutflow table")
                            
                            print_alert(21 * "*" + "\n")
                                    
                        continue
                    else:
                        temp_str = data.split("&") # split data into list based on LaTeX alignment character '&'
                        if len(temp_str) == 1:     # Skip blank rows of the table (between gen and reco cuts, and reco cuts and epsilon/alpha) 
                            pass
                        else:
                            if cnt_data == 0:
                                header_lst = []
                                cnt_hdr    = 0
                                for text in temp_str:
                                    if cnt_hdr == 5:
                                        header_lst.append(re.split('(\d+)', re.sub('[^A-Za-z0-9]+', '', text.split("hline")[0]))) # remove LaTeX formatting command "hline" and newline characters
                                    else:
                                        header_lst.append(re.split('(\d+)', re.sub('[^A-Za-z0-9]+', '', text))) # remove spaces and special characters

                                    cnt_hdr += 1

                                header_titles = list(itertools.chain.from_iterable(header_lst)) # flatten list 
                                header_titles.insert(0, "CutNum") # Only '#' is present for the cut number, this is removing during the sub./split process, so add a string title
                                cnt_data += 1
                            elif cnt_data > 0: 
                                cut_number  = re.sub('[^A-Za-z0-9]+', '', temp_str[0].split(" ")[1])
                                cut_type = temp_str[0].split(" ")[3]

                                if len(temp_str) == 1:
                                    if re.sub('[^A-Za-z0-9]+', '', temp_str[0]) == "endtabular":
                                        if verbose:
                                            print_alert("End of table reached!\n")
                                        continue
                                else:
                                    cut_num.append(int(cut_number))
                                    selection.append(cut_type)
                                    #cut_num.append(first_two_cols[0])                     # COLUMN 0: store cut num as str
                                    #selection.append(first_two_cols[1])                        # COLUMN 0: store selection/cut type as str
                                    events.append(int(temp_str[1]))                            # COLUMN 2: store num events as int
                                    tot_eff.append(float(temp_str[2]))                         # COLUMN 3: store tot. eff. as float
                                    rel_eff.append(float(temp_str[3]))                         # COLUMN 4: store rel. eff. as float
                                    tot_eff_err.append(float(temp_str[4]))                     # COLUMN 5: store tot. eff. err. as float
                                    rel_eff_err.append(float(temp_str[5].split("hline")[0]))   # COLUMN 5: clean LaTeX formatting and store rel. eff. err. as float
                                    cnt_data += 1

                    data_lists = [cut_num, selection, events, tot_eff, rel_eff, tot_eff_err, rel_eff_err] # store all lists to make filling dictionary easier

                    dict_cnt = 0
                    for datum in data_lists: # fill dictionary with lists
                        cutflow_dict[key][header_titles[dict_cnt]] = datum
                        dict_cnt += 1

                    cnt += 1
            cnt += 1
            
        if to_pandas:
            if verbose:
                print_alert("Transferring dictionary to pandas dataframe\n")
            
            df_dict[key] = pd.DataFrame.from_dict(cutflow_dict[key])
            
            if save_csv:
                file_name = dataDir + "dataframe_%s.csv" % key
                if verbose:
                    print_alert("Saving pandas dataframe to %s\n" % file_name)
                
                df_dict[key].to_csv(file_name)
        
        
        file.close()
        
        if verbose:
            print_alert("File closed successfully\n\n\n\n")
    
    if ret:
        if to_pandas:
            return cutflow_dict, df_dict, epsilon_alpha, epsilon_alpha_err
        else:
            return cutflow_dict, epsilon_alpha, epsilon_alpha_err
            

In [9]:
cutflow_dict_2018, df_dict_2018, epsilon_alpha_2018, epsilon_alpha_err_2018 = process_cutflow(total_file_list, verbose = True)

[93mOpening file 2018_CutFlow_tables/MZD_100_MSD_50.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.454000[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_100_MSD_50.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_100_SD_10.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.000000[39m
[93meps/alp err = nan[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_100_SD_10.csv
[39m
[93mFile closed successfully



[39m
[93mOpening

[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.412000[39m
[93meps/alp err = 0.016500[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_110_MSD_50.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_110_MSD_55.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.429000[39m
[93meps/alp err = 0.015400[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_110_MSD_55.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_125_MSD_10.tex
[39m
[93mFil

[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.382000[39m
[93meps/alp err = 0.014200[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_160_MSD_30.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_160_MSD_35.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.389000[39m
[93meps/alp err = 0.013900[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_160_MSD_35.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_160_MSD_40.tex
[39m
[93mFil

[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_80_SD_35.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.429000[39m
[93meps/alp err = 0.041700[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_80_SD_35.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_80_SD_5.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.406000[39m
[93meps/alp err = 0.044900[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/datafram

[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.467000[39m
[93meps/alp err = 0.034500[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_91_SD_45.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_91_SD_5.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.454000[39m
[93meps/alp err = 0.027900[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_91_SD_5.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_9

[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_140_MSD_55.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.413000[39m
[93meps/alp err = 0.013900[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_140_MSD_55.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_140_MSD_60.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.146000[39m
[93meps/alp err = 0.007390[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/d

[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_170_MSD_5.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.385000[39m
[93meps/alp err = 0.013100[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_170_MSD_5.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_170_MSD_50.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.414000[39m
[93meps/alp err = 0.013600[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe

[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_88_MSD_15.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.394000[39m
[93meps/alp err = 0.031100[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_MZD_88_MSD_15.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2018_CutFlow_tables/MZD_88_MSD_20.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.436000[39m
[93meps/alp err = 0.037100[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2018_cutflow_csv/dataframe_

In [None]:
def save_eps_alph(epsilon_alpha, epsilon_alpha_err):
    pd.dataframe()
    for key in epsilon_alpha.keys():
        
    

In [20]:
eps_alph_dict_2018 = {"dataset": epsilon_alpha_2018.keys(), "epsalp": epsilon_alpha_2018.values(), "err": epsilon_alpha_err_2018.values()}
eps_alph_df_2018 = pd.DataFrame(eps_alph_dict_2018)
eps_alph_df_2018.to_csv("2018_cutflow_csv/eps_alph_2018.csv")

In [21]:
base_dir  = base_dirs[0]
total_file_list, file_names = get_file_list(base_dir, ".tex")
keys = [key.split(".")[0] for key in file_names] # return keys to be used in dictionary later

keys_dict = {total_file_list[ii].split("/")[1].split(".")[0]: total_file_list[ii] for ii in range(len(total_file_list))}

In [22]:
keys_dict

{'MZD_110_MSD_10': '2017_CutFlow_tables/MZD_110_MSD_10.tex',
 'MZD_110_MSD_15': '2017_CutFlow_tables/MZD_110_MSD_15.tex',
 'MZD_110_MSD_20': '2017_CutFlow_tables/MZD_110_MSD_20.tex',
 'MZD_110_MSD_25': '2017_CutFlow_tables/MZD_110_MSD_25.tex',
 'MZD_110_MSD_30': '2017_CutFlow_tables/MZD_110_MSD_30.tex',
 'MZD_110_MSD_35': '2017_CutFlow_tables/MZD_110_MSD_35.tex',
 'MZD_110_MSD_40': '2017_CutFlow_tables/MZD_110_MSD_40.tex',
 'MZD_110_MSD_45': '2017_CutFlow_tables/MZD_110_MSD_45.tex',
 'MZD_110_MSD_5': '2017_CutFlow_tables/MZD_110_MSD_5.tex',
 'MZD_110_MSD_50': '2017_CutFlow_tables/MZD_110_MSD_50.tex',
 'MZD_125_MSD_15': '2017_CutFlow_tables/MZD_125_MSD_15.tex',
 'MZD_125_MSD_25': '2017_CutFlow_tables/MZD_125_MSD_25.tex',
 'MZD_125_MSD_30': '2017_CutFlow_tables/MZD_125_MSD_30.tex',
 'MZD_125_MSD_35': '2017_CutFlow_tables/MZD_125_MSD_35.tex',
 'MZD_125_MSD_45': '2017_CutFlow_tables/MZD_125_MSD_45.tex',
 'MZD_125_MSD_5': '2017_CutFlow_tables/MZD_125_MSD_5.tex',
 'MZD_125_MSD_50': '2017_Cut

In [23]:
dataDir = "2017_cutflow_csv/"
cutflow_dict_2017, df_dict_2017, epsilon_alpha_2017, epsilon_alpha_err_2017 = process_cutflow(total_file_list, verbose = True)

[93mOpening file 2017_CutFlow_tables/MZD_110_MSD_10.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.001450[39m
[93meps/alp err = 0.000841[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2017_cutflow_csv/dataframe_MZD_110_MSD_10.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2017_CutFlow_tables/MZD_110_MSD_15.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.393000[39m
[93meps/alp err = 0.016300[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2017_cutflow_csv/dataframe_MZD_110_MSD_15.csv
[39m
[93mFile c

[93mFile closed successfully



[39m
[93mOpening file 2017_CutFlow_tables/MZD_130_MSD_45.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.408000[39m
[93meps/alp err = 0.014100[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2017_cutflow_csv/dataframe_MZD_130_MSD_45.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2017_CutFlow_tables/MZD_130_MSD_5.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.399000[39m
[93meps/alp err = 0.014900[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2017_cutflow_csv/datafram

[39m
[93mFile closed successfully



[39m
[93mOpening file 2017_CutFlow_tables/MZD_160_MSD_20.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.396000[39m
[93meps/alp err = 0.014000[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2017_cutflow_csv/dataframe_MZD_160_MSD_20.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2017_CutFlow_tables/MZD_160_MSD_25.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.379000[39m
[93meps/alp err = 0.014200[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2017_cutflow_csv/d

[93mFile closed successfully



[39m
[93mOpening file 2017_CutFlow_tables/MZD_190_MSD_45.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.401000[39m
[93meps/alp err = 0.012800[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2017_cutflow_csv/dataframe_MZD_190_MSD_45.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2017_CutFlow_tables/MZD_190_MSD_5.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.380000[39m
[93meps/alp err = 0.012300[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2017_cutflow_csv/datafram

[93mFile closed successfully



[39m
[93mOpening file 2017_CutFlow_tables/MZD_91_MSD_25.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.416000[39m
[93meps/alp err = 0.019000[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2017_cutflow_csv/dataframe_MZD_91_MSD_25.csv
[39m
[93mFile closed successfully



[39m
[93mOpening file 2017_CutFlow_tables/MZD_91_MSD_30.tex
[39m
[93mFile opened successfully
[39m
[93mProcessing file...[39m
[93mCutflow table detected, processing...
[39m
[93mEpsilon/alpha values:[39m
[93m*********************[39m
[93meps/alp = 0.425000[39m
[93meps/alp err = 0.019300[39m
[93m*********************
[39m
[93mTransferring dictionary to pandas dataframe
[39m
[93mSaving pandas dataframe to 2017_cutflow_csv/dataframe_

In [24]:
eps_alph_dict_2017 = {"dataset": epsilon_alpha_2017.keys(), "epsalp": epsilon_alpha_2017.values(), "err": epsilon_alpha_err_2017.values()}
eps_alph_df_2017 = pd.DataFrame(eps_alph_dict_2017)
eps_alph_df_2017.to_csv("2017_cutflow_csv/eps_alph_2017.csv")