In [1]:
import os, sys
import csv
import json
import pandas as pd

In [2]:
csv_file = 'CSVfromSpreadsheet/Run3Summer22EE.csv'
campaign = 'Run3Summer22EE'

In [3]:
def dict_to_dataframe(data_dict):
    data_list = []
    for sample, subsamples in data_dict.items():
        for subsample, lumi in subsamples.items():
            data_list.append([sample, subsample, lumi])    
    df = pd.DataFrame(data_list, columns=["sample", "subsample", "lumi"])
    return df

def write_lumidata_into_json(df, campaign):
    outdir='extracted_jsons'
    os.makedirs(outdir, exist_ok=True)
    outfile = os.path.join(outdir, f'lumidata_{campaign}.json')
    with open(outfile, 'w') as json_file: json.dump(df, json_file, indent=4)
    print('File written: ' + outfile)
    
print('Functions loaded.')

Functions loaded.


In [4]:
df_lumi = pd.DataFrame()
with open(csv_file, newline='') as csvfile:
    reader = csv.reader(csvfile)
    lumilist = list(reader)

df_lumi = pd.DataFrame(lumilist)
df_lumi=df_lumi[[1, 8]]
df_lumi = df_lumi[~df_lumi.apply(lambda row: all(cell == '' for cell in row), axis=1)].reset_index(drop=True) # Dropping empty rows.

# Dropping rows where the first column has a value (e.g., process name) but all other columns are empty
df_lumi = df_lumi[~df_lumi.apply(lambda row: row.iloc[0] != '' and all(cell == '' for cell in row.iloc[1:]), axis=1)].reset_index(drop=True)

# Remove rows containing 'sample_subsample'
df_lumi = df_lumi[df_lumi.iloc[:, 0] != 'sample_subsample'].reset_index(drop=True)

df_lumi.columns = ['sample_subsample', 'lumi']
df_lumi['lumi'] = df_lumi['lumi'].astype(float)

split_values = df_lumi['sample_subsample'].str.split('_', n=1)
df_lumi['sample'] = split_values.str[0]
df_lumi['subsample'] = split_values.str[1]

df_lumi = df_lumi[['sample', 'subsample', 'lumi']]
display(df_lumi)

Unnamed: 0,sample,subsample,lumi
0,WtoLNu,Inclusive,4.227545e+03
1,WGtoLNuG,10to100,2.601859e+05
2,WGtoLNuG,100to200,1.478004e+06
3,WGtoLNuG,200to400,5.263982e+06
4,WGtoLNuG,400to600,7.228023e+07
...,...,...,...
91,QCDMu,300to470,1.139849e+05
92,QCDMu,470to600,9.673206e+05
93,QCDMu,600to800,3.437494e+06
94,QCDMu,800to1000,3.211534e+07


In [5]:
lumi_era_info = {
    "Run3Summer22":     {"lumi":  7980.4, "samples": {"Muon": ["C", "D"], "EGamma": ["C", "D"]}},
    "Run3Summer22EE":   {"lumi": 26671.7, "samples": {"Muon": ["E", "F", "G"],"EGamma": ["E", "F", "G"]}},
    "Run3Summer23":     {"lumi": 17794.0, "samples": {"Muon0": ["C1", "C2", "C3", "C4"],   "Muon1":   ["C1", "C2", "C3", "C4"],
                                                    "EGamma0": ["C1", "C2", "C3", "C4"], "EGamma1": ["C1", "C2", "C3", "C4"]}},
    "Run3Summer23BPix": {"lumi":  9451.0, "samples": {"Muon0":   ["D1", "D2"], "Muon1": ["D1", "D2"],
                                                      "EGamma0": ["D1", "D2"], "EGamma1": ["D1", "D2"]}}
}

campaign_data = lumi_era_info.get(campaign)
datadict = {}
lumi = campaign_data["lumi"]
datadict = {sample: {era: lumi for era in eras}  for sample, eras in campaign_data["samples"].items()}

signaldict = {
    "VLLS_ele":{
        "M100":     512598.29,
        "M125":    1091218.35,
        "M150":    2064227.59,
        "M200":    5696971.43,
        "M250":   10804152.17,
        "M300":   21769257.64,
        "M350":   15935520.00,
        "M400":   27645555.56,
        "M450":   44700229.36,
        "M500":   72181021.90,
        "M750":  254269230.77,
        "M1000":1268884615.38
    },
    "VLLS_mu":{
        "M100":     508329.91,
        "M125":    1100348.62,
        "M150":    2064344.83,
        "M200":    5710047.62,
        "M250":   10794347.83,
        "M300":   21809519.65,
        "M400":   27595277.78,
        "M450":   45337844.04,
        "M500":   72847810.22,
        "M750":  255569230.77,
        "M1000":1275589743.59
    },
    "VLLD_ele":{
        "M100":     6560.41,
        "M200":    54213.24,
        "M300":    85061.86,
        "M400":   270022.05,
        "M600":  1651744.97,
        "M800":  6824207.49,
        "M1000":25011328.53
    },
    "VLLD_mu":{
        "M100":   6622.84,
        "M200":  54344.12,
        "M300":  85986.25,
        "M400": 267905.18,
        "M600":1670469.80,
        "M800":7136311.24
    }
}

df_lumi     = df_lumi.sort_values(by=["sample", "subsample"]).reset_index(drop=True)
df_signal   = dict_to_dataframe(signaldict)
df_data     = dict_to_dataframe(datadict)
df_campaign = pd.concat([df_lumi, df_signal, df_data])

In [6]:
df_campaign

Unnamed: 0,sample,subsample,lumi
0,DYGToLLG,M4to50-PT100to200,2.603175e+07
1,DYGToLLG,M4to50-PT10to100,1.788726e+06
2,DYGToLLG,M4to50-PT200toInf,1.841599e+08
3,DYGToLLG,M50-PT100to200,3.366834e+06
4,DYGToLLG,M50-PT10to50,6.250729e+05
...,...,...,...
1,Muon,F,2.667170e+04
2,Muon,G,2.667170e+04
3,EGamma,E,2.667170e+04
4,EGamma,F,2.667170e+04


In [7]:
lumidata = {}
for sample, subsample, lumi in df_campaign.itertuples(index=False):
    if lumi == 0: continue
    if sample not in lumidata: lumidata[sample] = {}
    lumidata[sample][subsample] = lumi

write_lumidata_into_json(lumidata, campaign)

File written: extracted_jsons/lumidata_Run3Summer22EE.json
