In [1]:
import json
import pandas as pd

In [2]:
with open('datasets_all.txt','r') as f:
    datasets = list(set([l.split("/")[1] for l in f.read().splitlines()]))

In [3]:
output = ""
for d in datasets:
    output += f"\"{d}\""
    if d != datasets[-1]:
        output+=",\n"
with open("datasets_unique.txt",'w') as f:
    f.write(output)

## read xsdb json

In [4]:
df = pd.read_json("xsdb_dump.json")

In [5]:
df.columns

Index(['id', 'process_name', 'status', 'cross_section', 'total_uncertainty',
       'accuracy', 'DAS', 'MCM', 'equivalent_lumi', 'fraction_negative_weight',
       'shower', 'energy', 'comments', 'modifiedOn', 'createdOn', 'modifiedBy',
       'createdBy', 'isValid', 'other_uncertainty', 'contact', 'reweighting',
       'cuts', 'kFactor', 'matrix_generator', 'refs', 'discussion',
       'approvedBy'],
      dtype='object')

In [6]:
dataset_opts = {}
for d in datasets:
    dataset_opts[d] = [s for s in df[df.process_name == d].MCM.str.split("-").str[1].to_list() if 'UL' in s]
    if len(dataset_opts[d]) == 0:
        print("bad:",d)

In [7]:
all_opts = []
for key,val in dataset_opts.items():
    all_opts.extend(val)
list(set(all_opts))

['RunIISummer20UL17MiniAODv2', 'RunIISummer20UL16MiniAOD']

In [8]:
final_xsdb = {}
for d,avail in dataset_opts.items():
    xs_dict = {}
    campaign = 'RunIISummer20UL17MiniAODv2' if 'RunIISummer20UL17MiniAODv2' in avail else 'RunIISummer20UL16MiniAOD'
    dfsub = df[(df.process_name==d)&(df.MCM.str.split("-").str[1] == campaign)]
    if len(dfsub) > 1:
        print("bad:",d)
    xs_dict['xs'] = dfsub['cross_section'].to_list()[0]
    xs_dict['uncert'] = dfsub['total_uncertainty'].to_list()[0]
    final_xsdb[d] = xs_dict

In [9]:
with open('bkg_mc_xsecDB.json','w') as fout:
    json.dump(final_xsdb,fout,indent=4)

## make table for latex

In [28]:
import os
def get_table(era):
    pref="/uscms/home/sbrightt/nobackup/iDM/iDMe_analysis/CMSSW_10_6_26/src/iDMe/AODSkimmer/fileLists/getFileLists_DAS/"
    out = ""
    files = sorted([f for f in os.listdir("..") if f"{era}.txt" in f])
    for f in files:
        #out+=f+"\n"
        out+="----------------------------------------------------\n"
        with open(f"../{f}",'r') as fin:
            dsets = sorted([k.split("/")[1] for k in fin.read().splitlines()])
        for d in dsets:
            a = d.replace("_","\\_")
            out+= f"& {a} & {final_xsdb[d]['xs']} \\\\"+"\n"
    return out

In [34]:
table16 = get_table("UL16")
table16APV = get_table("UL16APV")
table17 = get_table("UL17")

In [35]:
print(table17)

----------------------------------------------------
& DYJetsToLL\_M-50\_TuneCP5\_13TeV-madgraphMLM-pythia8 & 5379.0 \\
----------------------------------------------------
& WWW\_4F\_TuneCP5\_13TeV-amcatnlo-pythia8 & 0.2158 \\
& WWZ\_4F\_TuneCP5\_13TeV-amcatnlo-pythia8 & 0.1707 \\
& WW\_TuneCP5\_13TeV-pythia8 & 76.25 \\
& WZZ\_TuneCP5\_13TeV-amcatnlo-pythia8 & 0.05709 \\
& WZ\_TuneCP5\_13TeV-pythia8 & 27.55 \\
& ZZZ\_TuneCP5\_13TeV-amcatnlo-pythia8 & 0.01476 \\
& ZZ\_TuneCP5\_13TeV-pythia8 & 12.23 \\
----------------------------------------------------
& QCD\_HT1000to1500\_TuneCP5\_13TeV-madgraphMLM-pythia8 & 1122.0 \\
& QCD\_HT100to200\_TuneCP5\_13TeV-madgraphMLM-pythia8 & 23500000.0 \\
& QCD\_HT1500to2000\_TuneCP5\_13TeV-madgraphMLM-pythia8 & 109.4 \\
& QCD\_HT2000toInf\_TuneCP5\_13TeV-madgraphMLM-pythia8 & 21.74 \\
& QCD\_HT200to300\_TuneCP5\_13TeV-madgraphMLM-pythia8 & 1552000.0 \\
& QCD\_HT300to500\_TuneCP5\_13TeV-madgraphMLM-pythia8 & 321100.0 \\
& QCD\_HT500to700\_TuneCP5\_13Te

In [31]:
print(table16APV)

----------------------------------------------------
& DYJetsToLL\_M-50\_TuneCP5\_13TeV-madgraphMLM-pythia8 & 5379.0 \\
----------------------------------------------------
& WWW\_4F\_TuneCP5\_13TeV-amcatnlo-pythia8 & 0.2158 \\
& WWZ\_4F\_TuneCP5\_13TeV-amcatnlo-pythia8 & 0.1707 \\
& WW\_TuneCP5\_13TeV-pythia8 & 76.25 \\
& WZZ\_TuneCP5\_13TeV-amcatnlo-pythia8 & 0.05709 \\
& WZ\_TuneCP5\_13TeV-pythia8 & 27.55 \\
& ZZZ\_TuneCP5\_13TeV-amcatnlo-pythia8 & 0.01476 \\
& ZZ\_TuneCP5\_13TeV-pythia8 & 12.23 \\
----------------------------------------------------
& QCD\_HT1000to1500\_TuneCP5\_PSWeights\_13TeV-madgraphMLM-pythia8 & 1118.0 \\
& QCD\_HT100to200\_TuneCP5\_PSWeights\_13TeV-madgraphMLM-pythia8 & 23630000.0 \\
& QCD\_HT1500to2000\_TuneCP5\_PSWeights\_13TeV-madgraphMLM-pythia8 & 108.9 \\
& QCD\_HT2000toInf\_TuneCP5\_PSWeights\_13TeV-madgraphMLM-pythia8 & 21.93 \\
& QCD\_HT200to300\_TuneCP5\_PSWeights\_13TeV-madgraphMLM-pythia8 & 1554000.0 \\
& QCD\_HT300to500\_TuneCP5\_PSWeights\_13TeV-