# Opening MergeCategorization output and replacing buggy trigSF with one
Needs to be run with ROOT 6.26 as  using ROOT 6.30 makes buggy files that cannot be read back with 6.26
Also : RDataFrame.RDefine is not available in this old version so need a ugly Filter hack to change the variable
(maybe due to this : https://github.com/root-project/root/issues/14793)

In [1]:
import ROOT
import JupyROOT.helpers.utils
JupyROOT.helpers.utils.GetGeometryDrawer = lambda:None # magic fix to make ROOT faster
import uproot
from pathlib import Path
import concurrent.futures
# import cppyy

Welcome to JupyROOT 6.24/07


In [2]:
def getSelection(file:str):
    t = uproot.open(str(file) + ":Events")
    ar = t.arrays(filter_name=["event", "luminosityBlock", "trigSF"])
    filter_list = []
    for evt in ar[ar.trigSF > 100]:
        filter_list.append(f"(event == {evt.event} && luminosityBlock == {evt.luminosityBlock})")
    sel = "||".join(filter_list)
    return sel, len(filter_list)

def fixFile_v1(file:str):
    sel, n_fix = getSelection(file)
    if len(sel) == 0:
        return False
    df = ROOT.RDataFrame("Events", file)
    print(f"Fixing {n_fix} in {file}")
    file_cpp_name = file.replace("/", "_").replace(".root", "")
    for branch in ['trigSF',
 'trigSF_single',
 'trigSF_cross',
 'trigSF_muUp',
 'trigSF_muDown',
 'trigSF_eleUp',
 'trigSF_eleDown',
 'trigSF_DM0Up',
 'trigSF_DM1Up',
 'trigSF_DM10Up',
 'trigSF_DM11Up',
 'trigSF_DM0Down',
 'trigSF_DM1Down',
 'trigSF_DM10Down',
 'trigSF_DM11Down',
 'trigSF_vbfjetUp',
 'trigSF_vbfjetDown']:
        # recent ROOT with redefine
        #df = df.Redefine(branch, f"({sel}) ? 1. : {branch}")
        # ugly hack for old ROOT : https://root-forum.cern.ch/t/rdataframe-define-columns-with-same-name/42776/3
        definition_fct =( f"""
        bool fakeFilter_{branch}_{file_cpp_name}(uint64_t event, uint32_t luminosityBlock, double& sf) """
        "{"
            f"if ({sel}) sf = 1.;"
            "return true;}")
        print(definition_fct)
        cppyy.cppdef(definition_fct)
        df = df.Filter(getattr(cppyy.gbl, f"fakeFilter_{branch}_{file_cpp_name}"), ("event","luminosityBlock", branch))
    df.Snapshot("Events", file + ".fixed")
    print(f"Done {file}")
    return True


def fixFile(file:str):
    sel, n_fix = getSelection(file)
    if len(sel) == 0:
        return False
    df = ROOT.RDataFrame("Events", file)
    print(f"Fixing {n_fix} in {file}")
    file_cpp_name = file.replace("/", "_").replace(".root", "")

    cpp_str = f'''
    auto df = ROOT::RDataFrame("Events", "{file}");
    auto df_filtered = df'''

    for branch in ['trigSF',
 'trigSF_single',
 'trigSF_cross',
 'trigSF_muUp',
 'trigSF_muDown',
 'trigSF_eleUp',
 'trigSF_eleDown',
 'trigSF_DM0Up',
 'trigSF_DM1Up',
 'trigSF_DM10Up',
 'trigSF_DM11Up',
 'trigSF_DM0Down',
 'trigSF_DM1Down',
 'trigSF_DM10Down',
 'trigSF_DM11Down',
 'trigSF_vbfjetUp',
 'trigSF_vbfjetDown']:
        cpp_str += (".Filter([](unsigned long long event, uint32_t luminosityBlock, double& sf) {" 
            f"if ({sel}) sf = 1.;"
            "return true;"
        '}, {"event", "luminosityBlock", "' + branch +  '"})')
    cpp_str += f';df_filtered.Snapshot("Events", "{file + ".fixed"}");'
    print(cpp_str)
    #return cpp_str
    ROOT.gROOT.ProcessLine(cpp_str)
    #cppyy.cppdef(cpp_str)
    #cppyy.gbl.df_filtered.Snapshot("Events", file + ".fixed")
    print(f"Done {file}")
    return True

def renameFile(file:str):
    old = Path(file).stat()
    new = Path(file + ".fixed").stat()
    if new.st_size / old.st_size < 0.1:
        raise RuntimeError(file)
    Path(file).unlink()
    Path(file + ".fixed").rename(Path(file))

def doAll(file:str):
    print(f"Starting {file}")
    try:
        if fixFile(str(file)):
            renameFile(str(file))
    except AttributeError: pass #data
    except uproot.KeyInFileError: # empty file
        print(f"WARNING : file {file} is empty")
    except Exception as e:
        raise RuntimeError(f"Error whilst reading {file}") from e

<cppyy.CPPOverload at 0x7ff5065f6b50>

In [3]:
test_file = "/scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ZprimeToZH_ZToBB_HToTauTau_M1000/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root"
#getSelection(test_file)

In [29]:
uproot.open(test_file+":Events").show(filter_name=["event", "luminosityBlock", "trigSF*"], name_width=40)

name                                     | typename                 | interpretation                
-----------------------------------------+--------------------------+-------------------------------
event                                    | uint64_t                 | AsDtype('>u8')
luminosityBlock                          | uint32_t                 | AsDtype('>u4')
trigSF                                   | double                   | AsDtype('>f8')
trigSF_single                            | double                   | AsDtype('>f8')
trigSF_cross                             | double                   | AsDtype('>f8')
trigSF_muUp                              | double                   | AsDtype('>f8')
trigSF_muDown                            | double                   | AsDtype('>f8')
trigSF_eleUp                             | double                   | AsDtype('>f8')
trigSF_eleDown                           | double                   | AsDtype('>f8')
trigSF_DM0Up                     

In [4]:
s = fixFile(test_file)
s

Fixing 10 in /scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ZprimeToZH_ZToBB_HToTauTau_M1000/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root

    auto df = ROOT::RDataFrame("Events", "/scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ZprimeToZH_ZToBB_HToTauTau_M1000/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root");
    auto df_filtered = df.Filter([](unsigned long long event, uint32_t luminosityBlock, double& sf) {if ((event == 87566 && luminosityBlock == 95)||(event == 63110 && luminosityBlock == 69)||(event == 67655 && luminosityBlock == 73)||(event == 137991 && luminosityBlock == 149)||(event == 159962 && luminosityBlock == 173)||(event == 177813 && luminosityBlock == 192)||(event == 2420 && luminosityBlock == 3)||(event == 72548 && luminosityBlock == 79)||(event == 108418 && luminosityBlock == 117)||(event == 82073 && luminosityBlock == 89)) sf = 1.;return true;}, {"event", "luminosityBlock", "trigSF"}).Filter([](unsigned long long event, uint32_t 

True

In [9]:
print(s)


    auto df = ROOT::RDataFrame("Events", "/scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ZprimeToZH_ZToBB_HToTauTau_M1000/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root");
    auto df_filtered = df.Filter([](uint64_t event, uint32_t luminosityBlock, double& sf) {if ((event == 87566 && luminosityBlock == 95)||(event == 63110 && luminosityBlock == 69)||(event == 67655 && luminosityBlock == 73)||(event == 137991 && luminosityBlock == 149)||(event == 159962 && luminosityBlock == 173)||(event == 177813 && luminosityBlock == 192)||(event == 2420 && luminosityBlock == 3)||(event == 72548 && luminosityBlock == 79)||(event == 108418 && luminosityBlock == 117)||(event == 82073 && luminosityBlock == 89)) sf = 1.;return true;}, {"event", "luminosityBlock", "trigSF"}).Filter([](uint64_t event, uint32_t luminosityBlock, double& sf) {if ((event == 87566 && luminosityBlock == 95)||(event == 63110 && luminosityBlock == 69)||(event == 67655 && luminosityBlock == 73)||(event == 137991

In [18]:
renameFile(test_file)

In [11]:
list(Path("/scratch/cuisset/cmt/MergeCategorization").glob("ul_*_v12/*/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_*.root"))

[PosixPath('/scratch/cuisset/cmt/MergeCategorization/ul_2018_ZbbHtt_v12/st_antitop/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root'),
 PosixPath('/scratch/cuisset/cmt/MergeCategorization/ul_2018_ZbbHtt_v12/zz_dl/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root'),
 PosixPath('/scratch/cuisset/cmt/MergeCategorization/ul_2018_ZbbHtt_v12/tt_fh/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root'),
 PosixPath('/scratch/cuisset/cmt/MergeCategorization/ul_2018_ZbbHtt_v12/st_top/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root'),
 PosixPath('/scratch/cuisset/cmt/MergeCategorization/ul_2018_ZbbHtt_v12/ttz_llnunu/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root'),
 PosixPath('/scratch/cuisset/cmt/MergeCategorization/ul_2018_ZbbHtt_v12/tt_sl/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_7.root'),
 PosixPath('/scratch/cuisset/cmt/MergeCategorization/ul_2018_ZbbHtt_v12/tt_sl/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_9.root'),
 PosixPath('/scratch/cuisset/cmt/MergeCategorizatio

In [29]:
doAll("/scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ZprimeToZH_ZToBB_HToTauTau_M1000/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_1.root")

Starting /scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ZprimeToZH_ZToBB_HToTauTau_M1000/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_1.root


FileNotFoundError: file not found

    '/scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ZprimeToZH_ZToBB_HToTauTau_M1000/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_1.root'

Files may be specified as:
   * str/bytes: relative or absolute filesystem path or URL, without any colons
         other than Windows drive letter or URL schema.
         Examples: "rel/file.root", "C:\abs\file.root", "http://where/what.root"
   * str/bytes: same with an object-within-ROOT path, separated by a colon.
         Example: "rel/file.root:tdirectory/ttree"
   * pathlib.Path: always interpreted as a filesystem path or URL only (no
         object-within-ROOT path), regardless of whether there are any colons.
         Examples: Path("rel:/file.root"), Path("/abs/path:stuff.root")

Functions that accept many files (uproot.iterate, etc.) also allow:
   * glob syntax in str/bytes and pathlib.Path.
         Examples: Path("rel/*.root"), "/abs/*.root:tdirectory/ttree"
   * dict: keys are filesystem paths, values are objects-within-ROOT paths.
         Example: {"/data_v1/*.root": "ttree_v1", "/data_v2/*.root": "ttree_v2"}
   * already-open TTree objects.
   * iterables of the above.


In [10]:
for i, file in enumerate(Path("/scratch/cuisset/cmt/MergeCategorization").glob("ul_2016_*_v12/*/cat_*_elliptical_cut_90/prod_240522/data_*.root")):
    # if getSelection(file)[1] > 0:
    #     print(f"{i} : {file}")
    # print(i)
    doAll(file)

Starting /scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ggH_ZZ/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ggf_sm/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ttzz/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ttzh/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/zh_zbb_htt_signal/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/zz_sl/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/zh_zbb_htt_background/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/wminush

In [5]:
with concurrent.futures.ProcessPoolExecutor(max_workers=20) as exc:
    list(exc.map(doAll, list(Path("/scratch/cuisset/cmt/MergeCategorization").glob("ul_*_*_v12/*/cat_ZttHbb_orthogonal_cut_90_*/prod_240522/data_*.root"))))
    #exc.map(doAll, ["/scratch/cuisset/cmt/MergeCategorization/ul_2016_ZbbHtt_v12/ZprimeToZH_ZToBB_HToTauTau_M1200/cat_ZbbHtt_elliptical_cut_90/prod_240430/data_1.root"]) 

Starting /scratch/cuisset/cmt/MergeCategorization/ul_2017_ZttHbb_v12/ggH_ZZ/cat_ZttHbb_orthogonal_cut_90_boosted_noPNet/prod_240522/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2017_ZttHbb_v12/ggH_ZZ/cat_ZttHbb_orthogonal_cut_90_resolved_1b/prod_240522/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2017_ZttHbb_v12/ggH_ZZ/cat_ZttHbb_orthogonal_cut_90_resolved_2b/prod_240522/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2017_ZttHbb_v12/ggH_ZZ/cat_ZttHbb_orthogonal_cut_90_CR_boosted_noPNet/prod_240522/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2017_ZttHbb_v12/ggH_ZZ/cat_ZttHbb_orthogonal_cut_90_CR_resolved_2b/prod_240522/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2017_ZttHbb_v12/ggH_ZZ/cat_ZttHbb_orthogonal_cut_90_CR_resolved_1b/prod_240522/data_0.root
Starting /scratch/cuisset/cmt/MergeCategorization/ul_2017_ZttHbb_v12/ggH_ZZ/cat_ZttHbb_orthogonal_cut_90_CR/prod_240522/data_0.root
Startin