This notebook includes per fill calculations with BCM data.

In [None]:
import pandas as pd, numpy as np, matplotlib.dates as dates, seaborn as sns, paramiko, os, re, sys, tables, glob, sys, shutil
from matplotlib import pyplot as plt
from matplotlib.colors import LogNorm
from ipywidgets import interact, IntSlider
if "./src" not in sys.path:
    sys.path.insert(0, "./src")
from src import bcm_utils

# Define parameters

In [None]:
year = 2018

In [None]:
#mask hd5 file list for one month data where several months of data are in the same folder
months = [str(m).zfill(2) for m in list(range(1,13))]

#integrated lumi per year in inverse barns, LHC delivered nominal, from twiki, just for reference
nominal_integrated_lumi = {2015:4.21e15, 2016:40.99e15, 2017:49.79e15, 2018:67.86e15}

# longest running sum used
rs = 12

#column names for channel ratio dataframe
charge_sum_ratio_df_columns = ["CH5/CH1", "CH6/CH2", "CH7/CH3", "CH8/CH4",
           "CH17/CH41", "CH18/CH42", "CH19/CH43", "CH20/CH44", 
           "CH21/CH45", "CH22/CH46", "CH23/CH47", "CH24/CH48", 
           "CH25/CH33", "CH26/CH34", "CH27/CH35", "CH28/CH36", 
           "CH29/CH37", "CH30/CH38", "CH31/CH39", "CH32/CH40",
           "CH24/CH5", "CH48/CH1", "CH24/CH6", "CH48/CH2",
           "CH24/CH7", "CH48/CH3", "CH24/CH8", "CH48/CH4"]

#dirs
excel_json_path = "../excel_json/{}".format(year)
brilcalc_dir = "../brilcalc_data/{}_offline.csv".format(year)

#files in dirs
excel_json_files = os.listdir(excel_json_path)
excel_json_charge_sums = [excel_json_files[idx]for idx, i in enumerate(["sums_20" in et for et in excel_json_files]) if i]
excel_json_charge_sum_errors = [excel_json_files[idx]for idx, i in enumerate(["sums_stat_uncertainties" in et for et in excel_json_files]) if i]
excel_json_charge_sum_ratios = [excel_json_files[idx]for idx, i in enumerate(["sum_ratio" in et for et in excel_json_files]) if i]
excel_json_sim_data_ratios = [excel_json_files[idx]for idx, i in enumerate(["sim_data" in et for et in excel_json_files]) if i]

## Read brilcalc lumi data 
Read from csv and make a dataframe with [lumi sum, fill]. Csv is exported from brilcalc and one file contains one year of lumi info.

In [None]:
#read excel file and convert lumi data from [μb] to [b]
brilcalc_table = pd.read_csv(brilcalc_dir, header=1)[:-3]
brilcalc_table["delivered_[/b]"] = pd.to_numeric(brilcalc_table["delivered(/fb)"], errors="coerce")*1e15
brilcalc_table[['run','fill']] = brilcalc_table['#run:fill'].str.split(':',expand=True)
brilcalc_table = brilcalc_table.sort_index()

In [None]:
#sum up delivered lumi per fill, used to filter hd5 files for fills that are in this table
brilcalc_lumi_table = pd.DataFrame({"fill": pd.to_numeric(brilcalc_table["fill"]).unique().tolist(),
                                    "start_time": [pd.Timestamp(gr.iloc[0]) for gr in pd.DataFrame(brilcalc_table["time"].groupby(brilcalc_table["fill"]))[1]],
                                    "cms_delivered": [brilcalc_table[brilcalc_table["fill"] == fill]["delivered_[/b]"].sum()\
                                                      for fill in brilcalc_table["fill"].unique()]})

1 𝜇b -1 s -1 = 10 30 cm −2 s −1

## 1) Merge fractionated excels
Data were read in arbitrary chunks thus for some months they are in more excel files.

In [None]:
data_type = ["charge_sums", "fill_dates"]

for dt in data_type:
    charge_sum_files = [filename for filename in os.listdir("../excel_json/{}".format(year)) if dt in filename]
    
#auxiliary variables
    running_month = "00"
    concatted_month = pd.DataFrame() 
    
#make a dir to place fractionated files
    move_dir = "../excel_data/{}/month_fractions".format(year)
    if not os.path.isdir(move_dir):
        os.mkdir(move_dir)

#loop over files
    for filename in charge_sum_files:
        date_of_file = re.findall("{}_[0-9]*_[0-9*]".format(year), filename)
        if len(date_of_file):
            print("Found fractionated month: {}".format(date_of_file))

#check if new month
            month_of_file = re.findall("_[0-9]+_", date_of_file[0])[0][1:-1]
            assert len(month_of_file) == 2
            if month_of_file != running_month:
                print("Starting new month.")
                concatted_month = pd.DataFrame() 
                running_month = month_of_file

#read file
            excel_path = "../excel_data/{}/{}".format(year, filename)
            excel_table = pd.read_excel(excel_path)
        
#move fractionated file to fractionated folder
            shutil.move(excel_path, move_dir+"/{}".format(filename))
    
#concat data to whole month df
            concatted_month = concatted_month.append(excel_table)
            print("Length of dataframe after reading {}: {}\n".format(filename, len(concatted_month)))
            if len(concatted_month) > 0:
                    try:
                        concatted_month = concatted_month.sort_values(by="fill").reset_index(drop=True)
                    except:
                        concatted_month = concatted_month.sort_index()
                    out_name = "../excel_data/{}/{}.xlsx".format(year,
                                                                re.findall("^.*_",filename)[0][:-1])
                    
#sum duplicate rows (when a fill was fractionated in two files)
                    if len(concatted_month.index.get_duplicates()) > 0:
                        concatted_month = concatted_month.groupby(level=0).sum()
                    concatted_month.to_excel(out_name)

## 2) Lumi/Q tables
Calculate delivered lumi/summed charge for by fill by channel.

In [None]:
lumi_ch_ratios_all_fills = pd.DataFrame()
for filename in sorted(excel_json_charge_sums):
    charge_sums = pd.read_excel("../excel_json/{}/{}".format(year, filename))
    charge_sums.rename(columns={'Unnamed: 0': 'fill'}, inplace=True)
    month_tag = re.findall("_20.*", filename)[0]
    lumi_ch_ratios = pd.DataFrame(columns=charge_sums.columns)
    for fill in charge_sums["fill"]:
        if fill in list(brilcalc_lumi_table[brilcalc_lumi_table["cms_delivered"] > 1e14]["fill"]):
            temp = [brilcalc_lumi_table[brilcalc_lumi_table["fill"]==fill]["cms_delivered"].item()/\
                                   charge_sums[charge_sums["fill"] == fill][c] for c in charge_sums.columns[1:]]
            lumi_ch_ratios_one_fill = pd.DataFrame(temp).T
            lumi_ch_ratios_one_fill["fill"] = fill
            lumi_ch_ratios = lumi_ch_ratios.append(lumi_ch_ratios_one_fill)
            #lumi_ch_ratios.to_excel("../excel_data/{}/lumi_charge_ratios{}".format(year, month_tag))
    lumi_ch_ratios_all_fills = lumi_ch_ratios_all_fills.append(lumi_ch_ratios)
    print("Read {}".format(filename))
lumi_ch_ratios_all_fills.reset_index(inplace=True, drop=True)

In [None]:
"""
L/Q = ϵ
L/Q_frac > ϵ
ϵ*Q_frac/L = ϵ/(L/Q_frac) = Q_frac/Q < 1

-Z BLM measure higher currents
delivered lumi same for both sides
"""
plt.scatter(lumi_ch_ratios_all_fills["fill"], lumi_ch_ratios_all_fills["CH24RS10"])
plt.scatter(lumi_ch_ratios_all_fills["fill"], lumi_ch_ratios_all_fills["CH48RS10"])
plt.grid()
plt.legend(labels)
plt.ylabel("Delivered luminosity/measured charge")

## 3) Channel ratios -Z/+Z
Columns are channel ratios, rows are runs. Ratio of summed charge between -Z and +Z for symmetrical detectors.

In [None]:
col = "CH25/CH33"
#make a dir for plots
plot_dir = "../charge_sum_ratio_plots/{}".format(year)
if not os.path.isdir(plot_dir):
    os.mkdir(plot_dir)
fig, ax = plt.subplots(figsize=(9, 6))
plt.grid()
plt.yticks(fontsize=13)
plt.xticks(fontsize=13)
for filename in excel_brildata_charge_sums:
        charge_sums = pd.read_excel(excel_brildata_path+"/{}".format(year, filename))
        month_tag = re.findall("_20.*", filename)[0]
        ch_ratios = pd.DataFrame(columns=charge_sum_ratio_df_columns)
        for c in charge_sum_ratio_df_columns:
            current_columns = re.findall("CH[0-9]*", c)
            ch_ratios[c] = charge_sums[current_columns[0]+"RS12"]/charge_sums[current_columns[1]+"RS12"]
            #ch_ratios.to_excel("excel_data/{}/charge_sum_ratios{}".format(year, month_tag))
        print("Read {}".format(filename))
        ax.scatter(ch_ratios.index, ch_ratios[col], label=re.findall("[0-9]+", month_tag)[1])
        ax.set_ylabel('Charge ratio {}'.format(col), fontsize=19)
        ax.set_xlabel('Fills in {}'.format(year), fontsize=19)
        plt.legend()
        #plt.savefig(plot_dir+"/charge_sum_ratios_{}_{}-{}".format(year, *re.findall("CH[0-9]*", col)))

## 4) Comparison with FLUKA
FLUKA benchmark results need to be given below. Compares simulated charge (in Coulombs) to the BLM detector measurements.

In [None]:
σ_inel = 79.5e-3 #TOTEM result, old: 71.3e-3
V = 4.25**2*np.pi*38.7
Q_FLUKA = {"-Z":2.3991828053e-12, "+Z":0.9963477016e-12}

heavy_ion_months = {2015: ["11", "12"], 2016: ["11", "12"], 2017: [], 2018:["11", "12"]}
nb4_months = {2015: ["09", "10", "11", "12"], 2016: [], 2017: [], 2018:[]}
colnames = bcm_utils.get_column_names([24, 48],[10])

In [None]:
#find files of BLM measurement charge
charge_sum_files = [filename for filename in os.listdir(excel_json_path) if "charge_sums_2" in filename]
charge_sum_error_files = [filename for filename in os.listdir(excel_json_path) if "charge_sums_sta" in filename]

### Simulation uncertainties

In [None]:
"""
order= negative positive
stat uncertainties come from FLUKA sum.lis
syst uncertainties come from the quadratic sum of yield from additional simulations
these values are the same when working with each fill

per month stat errors:
sum((stat_data [%] * data)**2)

sim/data ratios error:
sim: stat_sim [%] + syst_sim [%]
data: stat_data [%] (no syst_data considered here)
error of fill ratio [%]: 100*sqrt((stat_sim/sim)**2 + (stat_data/data)**2) + 100*sqrt((syst_sim/sim)**2) = 
sqrt((stat_sim [%])**2 + (stat_data [%])**2) + syst_sim [%]
"""

#syst uncertainty of simulation
syst = pd.Series([(9.87169, -8.19754), (8.53797, -7.58091)])

#stat uncertainty of simulation
stat_percent = pd.Series([0.24094, 0.24787])
stat = pd.Series([stat_percent[0]/100*Q_FLUKA["-Z"], stat_percent[1]/100*Q_FLUKA["+Z"]])

sim_uncertainties = pd.DataFrame({ 'sim_stat_%': stat_percent, 'sim_stat': stat, 'sim_syst': syst })
sim_uncertainties.index=[["-Z", "+Z"]]
sim_uncertainties = sim_uncertainties.T
sim_uncertainties

### Calculate ratios

In [None]:
save_excel = False

#auxiliary lists
blm_fills = []
blm_neg_charge = []
blm_pos_charge = []
fluka_fills = []
fluka_neg_charge = []
fluka_pos_charge = []

#prepare plot
plot_dir = "../sim_data_ratio_plots"
if not os.path.isdir(plot_dir):
    os.mkdir(plot_dir)
fig, ax = plt.subplots(figsize=(9, 6))
plt.grid()
plt.yticks(fontsize=13)
plt.xticks(fontsize=13)
labels = ["BLM -Z", "BLM +Z"]

#loop over measured charge sum and charge error files in each month
for (sum_file, error_file) in zip(sorted(charge_sum_files), sorted(charge_sum_error_files)):
    month_tag = re.findall("_[0-9]+\.", sum_file)[0][1:3]
    
#read sum charge and sum stat uncertainty per fill
    print("Read {}".format(sum_file))
    sum_df = pd.read_excel(excel_json_path+"/{}".format(sum_file)).rename(columns={"Unnamed: 0":"fill"})
    error_df = pd.read_excel(excel_json_path+"/{}".format(error_file)).rename(columns={"Unnamed: 0":"fill"})
    assert list(sum_df["fill"]) == list(error_df["fill"])
    
    print("No. of fills: {}".format(len(sum_df["fill"].unique())))
    print(sum_df["fill"].unique())

#select subset of brilcalc normtag data based on charge sum df indices
    lumi_df = bcm_utils.select_lumi_data(brilcalc_lumi_table, sum_df, lumi_threshold=1e14) # lower threshold for lumi
    
    print("No. of fills with valid lumi: {}".format(len(lumi_df["fill"].unique())))
    print(lumi_df["fill"].unique())
    
#get the normalization factor to each fill's charge
    lumi_df["Rpp"] = lumi_df["cms_delivered"]*σ_inel
    lumi_df["sim_charge_-Z"] = lumi_df["Rpp"]/2/V*Q_FLUKA["-Z"]
    lumi_df["sim_charge_+Z"] = lumi_df["Rpp"]/2/V*Q_FLUKA["+Z"]
    lumi_df["sim_charge_-Z_stat_error"] = lumi_df["Rpp"]/2/V*sim_uncertainties.loc["sim_stat"]["-Z"]#[0]
    lumi_df["sim_charge_+Z_stat_error"] = lumi_df["Rpp"]/2/V*sim_uncertainties.loc["sim_stat"]["+Z"]#[0]
    
# select BLM channels of measurement data
    blm_sums_df = sum_df[colnames]
    blm_sums_df.index = sum_df["fill"]
    blm_stat_df = error_df[colnames]
    blm_stat_df.index = error_df["fill"]

# fill up auxiliary lists to plot sim-data absolute values over time
    blm_fills.append(list(sum_df["fill"]))
    blm_neg_charge.append(list(blm_sums_df["CH24RS10"]))
    blm_pos_charge.append(list(blm_sums_df["CH48RS10"]))
    fluka_fills.append(list(lumi_df["fill"]))
    fluka_neg_charge.append(list(lumi_df["sim_charge_-Z"]))
    fluka_pos_charge.append(list(lumi_df["sim_charge_+Z"]))
    
# relative uncertainty of simulation
    sim_rel_error = sim_uncertainties.loc["sim_stat_%"]/100
        
# calculate simulation/data ratios and their stat uncertainty (syst. uncertainty comes from simulation only)
    sim_data_ratios = bcm_utils.calculate_sim_data_ratios(lumi_df, blm_sums_df, colnames)
    print("Ratios length: {}".format(len(sim_data_ratios)))
    data_term = (blm_stat_df/blm_sums_df)**2
    sim_term = sim_uncertainties.loc["sim_stat_%"]/100
    data_term["CH24RS10"] = data_term["CH24RS10"] + sim_term["-Z"]**2#[0]**2
    data_term["CH48RS10"] = data_term["CH48RS10"] + sim_term["+Z"]**2#[0]**2
    ratios_stat = sim_data_ratios*np.sqrt(data_term)
    ratios_stat.dropna(inplace=True)

#if sampling was nb4
    if month_tag in nb4_months[year]:
        print("nb4 frequency in {}".format(month_tag))
        sim_data_ratios = sim_data_ratios/1.456

#save ratios to csv
    if save_excel:
        sim_data_ratios.to_excel(excel_json_path+"/sim_data_ratios_{}_{}.xlsx".format(year, month_tag))
        ratios_stat.to_excel(excel_json_path+"/sim_data_ratios_stat_{}_{}.xlsx".format(year, month_tag))

#make plot of sim/data ratios, filter outliers (mainly due to missing fill parts from data)
    if month_tag not in heavy_ion_months[year]:
        ratios_to_plot = sim_data_ratios#[(sim_data_ratios["CH24RS10"] < 1) & (sim_data_ratios["CH48RS10"] > .5)]
        ax.errorbar(ratios_to_plot.index,
                    ratios_to_plot["CH24RS10"], c="b", fmt = 'o')
                    #ratios_to_plot["CH24RS10"]*ratios_stat["CH24RS10"]/100, c="b", label=labels[0], fmt = 'o')#, label=month_tag)
        ax.errorbar(ratios_to_plot.index,
                    ratios_to_plot["CH48RS10"], c="r", fmt = 'o')
                    #ratios_to_plot["CH48RS10"]*ratios_stat["CH48RS10"]/100, c="r", label=labels[1], fmt = 'o')#, label=month_tag)
        ax.set_ylabel('FLUKA/measurement ratio', fontsize=19)
        ax.set_xlabel('Proton fills in {}'.format(year), fontsize=19)
        
plt.legend(labels)
plt.ylim(0.6, 1.5)

In [None]:

blm_valid_fills = sum(blm_fills, [])
plt.figure(figsize=(9, 6))
plt.scatter(sum(blm_fills, []), sum(blm_neg_charge, []), label="BLM -Z measurement charge")
plt.scatter(sum(fluka_fills, []), sum(fluka_neg_charge, []), label="BLM -Z scaled simulated charge")
plt.axhline(σ_inel*1e14/2/V*Q_FLUKA["-Z"], c="r", label="BLM -Z luminosity threshold")
plt.legend()
plt.ylabel("Charge [C]")
plt.xlabel("Proton fills in 2018")
plt.grid()

In [None]:
plt.figure(figsize=(16, 6))
plt.plot(sum(blm_fills, []), sum(blm_pos_charge, []), label="BLM +Z measurement charge")
plt.plot(sum(fluka_fills, []), sum(fluka_pos_charge, []), label="BLM +Z scaled simulated charge")
plt.axhline(σ_inel*1e14/2/V*Q_FLUKA["+Z"], c="r", label="BLM +Z luminosity threshold")
plt.legend()
plt.ylabel("Charge [C]")
plt.xlabel("Proton fills in 2018")
plt.grid()