In [1]:
import zdb
import glob
import os
import oyaml as yaml

In [2]:
help(zdb.modules.skim)

Help on function skim in module zdb.modules.skim:

skim(config, mode='multiprocessing', ncores=0, nfiles=-1, batch_opts='', output=None, chunksize=250000)



## Functions

In [3]:
def generate_config(outpath, selection, tables, filepaths):
    cfg = {
        "selection": "(" + ") & (".join(selection)+")",
        "tables": tables,
        "files": sorted(p for p in glob.glob(filepaths)),
    }
    with open(outpath, "w") as f:
        yaml.dump(cfg, f, indent=4)

## Configs

In [4]:
!mkdir -p skims/

In [5]:
# Data
generate_config(
    "skims/data.yaml",
    ["IsCertified", "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter", "Flag_eeBadScFilter", "MET_dCaloMET<0.6", "nJetSelection>0", "nJetSelection==nJetVeto", "LeadJetSelection_chHEF>0.1", "LeadJetSelection_neHEF<0.8", "LeadJetSelection_pt>200.", "nPhotonVeto==0", "nBJetVeto==0"],
    ["Events"],
    "/vols/cms/sdb15/Analysis/ZinvWidth/databases/2019/08_Aug/28_Legacy/Data/result_*.h5",
)

# MC
generate_config(
    "skims/mc.yaml",
    ["(parent!='EWKV2Jets' | nGenBosonSelection==1)", "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter", "MET_dCaloMET<0.6", "nJetSelection>0", "nJetSelection==nJetVeto", "LeadJetSelection_chHEF>0.1", "LeadJetSelection_neHEF<0.8", "LeadJetSelection_pt>200."],
    ["Events"],
    "/vols/cms/sdb15/Analysis/ZinvWidth/databases/2019/08_Aug/28_Legacy/MC/result_*.h5",
)

# MC jes
generate_config(
    "skims/mc_jes.yaml",
    ["(parent!='EWKV2Jets' | nGenBosonSelection==1)", "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter", "MET_dCaloMET<0.6", "nJetSelection>0", "nJetSelection==nJetVeto", "LeadJetSelection_chHEF>0.1", "LeadJetSelection_neHEF<0.8", "LeadJetSelection_pt>200."],
    ["Events_jesTotal10", "Events_jesTotal20", "Events_jesTotal30", "Events_jesTotal40", "Events_jesTotal50", "Events_jesTotal60", "Events_jesTotal70", "Events_jesTotal80", "Events_jesTotal90"],
    "/vols/cms/sdb15/Analysis/ZinvWidth/databases/2019/08_Aug/28_Legacy/MC_jes/result_*.h5",
)

# MC jer
generate_config(
    "skims/mc_jer.yaml",
    ["(parent!='EWKV2Jets' | nGenBosonSelection==1)", "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter", "MET_dCaloMET<0.6", "nJetSelection>0", "nJetSelection==nJetVeto", "LeadJetSelection_chHEF>0.1", "LeadJetSelection_neHEF<0.8", "LeadJetSelection_pt>200."],
    ["Events_jerSF10", "Events_jerSF20", "Events_jerSF30", "Events_jerSF40", "Events_jerSF50", "Events_jerSF60", "Events_jerSF70", "Events_jerSF80", "Events_jerSF90"],
    "/vols/cms/sdb15/Analysis/ZinvWidth/databases/2019/08_Aug/28_Legacy/MC_jer/result_*.h5",
)

# MC unclust
generate_config(
    "skims/mc_unclust.yaml",
    ["(parent!='EWKV2Jets' | nGenBosonSelection==1)", "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter", "MET_dCaloMET<0.6", "nJetSelection>0", "nJetSelection==nJetVeto", "LeadJetSelection_chHEF>0.1", "LeadJetSelection_neHEF<0.8", "LeadJetSelection_pt>200."],
    ["Events_unclust10", "Events_unclust20", "Events_unclust30", "Events_unclust40", "Events_unclust50", "Events_unclust60", "Events_unclust70", "Events_unclust80", "Events_unclust90"],
    "/vols/cms/sdb15/Analysis/ZinvWidth/databases/2019/08_Aug/28_Legacy/MC_unclust/result_*.h5",
)

# MC lepscales
generate_config(
    "skims/mc_lepscales.yaml",
    ["(parent!='EWKV2Jets' | nGenBosonSelection==1)", "Flag_goodVertices", "Flag_globalSuperTightHalo2016Filter", "Flag_HBHENoiseFilter", "Flag_HBHENoiseIsoFilter", "Flag_EcalDeadCellTriggerPrimitiveFilter", "Flag_BadPFMuonFilter", "MET_dCaloMET<0.6", "nJetSelection>0", "nJetSelection==nJetVeto", "LeadJetSelection_chHEF>0.1", "LeadJetSelection_neHEF<0.8", "LeadJetSelection_pt>200."],
    ["Events_eleEnergyScaleup", "Events_eleEnergyScaledown", "Events_muonPtScaleup", "Events_muonPtScaledown", "Events_photonEnergyScaleup", "Events_photonEnergyScaledown", "Events_tauPtScaleup", "Events_tauPtScaledown"],
    "/vols/cms/sdb15/Analysis/ZinvWidth/databases/2019/08_Aug/28_Legacy/MC_lepscales/result_*.h5",
)

In [6]:
skim_dir = "/vols/cms/sdb15/Analysis/ZinvWidth/databases/skims/2019/09_Sep/14_skims/"
if not os.path.exists(skim_dir):
    os.makedirs(skim_dir)

In [12]:
#zdb.modules.multi_skim(
#    ["skims/data.yaml", "skims/mc.yaml", "skims/mc_jes.yaml", "skims/mc_jer.yaml", "skims/mc_unclust.yaml", "skims/mc_lepscales.yaml"],
#    outputs=[
#        os.path.join(skim_dir, "data/result_{:05d}.h5"),
#        os.path.join(skim_dir, "mc/result_{:05d}.h5"),
#        os.path.join(skim_dir, "mc_jes/result_{:05d}.h5"),
#        os.path.join(skim_dir, "mc_jer/result_{:05d}.h5"),
#        os.path.join(skim_dir, "mc_unclust/result_{:05d}.h5"),
#        os.path.join(skim_dir, "mc_lepscales/result_{:05d}.h5"),
#    ],
#    mode='sge',
#    ncores=100,
#    batch_opts="-q hep.q -l h_rt=3:0:0 -l h_vmem=12G",
#    chunksize=250_000,
#)
pass