In [2]:
debug = True

import ROOT as R

if debug:
    R.ROOT.EnableImplicitMT() # only for ROOT rdf

import os
from pprint import pprint
import pandas as pd 


In [3]:
import PyRDF
if not debug:
    PyRDF.use("spark")
PyRDF.include_headers("headers.hh")

print(PyRDF.current_backend.supported_operations)

['Define', 'Filter', 'Histo1D', 'Histo2D', 'Histo3D', 'Profile1D', 'Profile2D', 'Profile3D', 'Count', 'Min', 'Max', 'Mean', 'Sum', 'Fill', 'Report', 'Graph']


In [4]:
# this is only for root rdf "interactive"
if debug:
    R.gInterpreter.ProcessLine(".L headers.hh")

In [4]:
# sample version
version = "v900"

# input: plot config tag
config_dir = "Full2018v6s5"

cut = "res_sig_mjjincl"
#cut = "boos_sig_mjjincl"

# selection
#samples = ["DATA","VBS", "VBF-V", "Wjets", "singleTop", "ttbar",  "VV","DY","FakeQCD", "VVV"]
samples = ["VBS", "top", "DY", "Wjets", "VV", "VBF-V"]
weights = ["weight_", "XSWeight"]

# output: samples directory
output_basedir = "/eos/user/d/dmapelli/public/latino/"
output_dir = os.path.join(output_basedir, config_dir, cut, "samples", version)


import os 
os.makedirs(output_dir, exist_ok=True)

In [5]:
%load_ext autoreload

In [6]:
%autoreload 2

In [7]:
import latinos_rdf as lrdf

In [8]:
config = lrdf.ConfigReader(config_dir, version)

columns_MC = config.variables

#columns_DATA = columns
columns_MC = [ cut+"_"+c for c in columns_MC]

columns_MC = weights + columns_MC

print(columns_MC)

['weight_', 'XSWeight', 'boos_sig_mjjincl_events', 'boos_sig_mjjincl_Lepton_eta', 'boos_sig_mjjincl_Lepton_pt', 'boos_sig_mjjincl_Lepton_flavour', 'boos_sig_mjjincl_PuppiMET', 'boos_sig_mjjincl_PuppiMET_phi', 'boos_sig_mjjincl_MET_pt', 'boos_sig_mjjincl_mjj_vbs', 'boos_sig_mjjincl_deltaeta_vbs', 'boos_sig_mjjincl_mjj_vjet', 'boos_sig_mjjincl_vjet_0_pt', 'boos_sig_mjjincl_vjet_1_pt', 'boos_sig_mjjincl_vbs_etaprod', 'boos_sig_mjjincl_vbs_0_pt', 'boos_sig_mjjincl_vbs_1_pt', 'boos_sig_mjjincl_vbs_0_eta', 'boos_sig_mjjincl_vbs_1_eta', 'boos_sig_mjjincl_vjet_0_eta', 'boos_sig_mjjincl_vjet_1_eta', 'boos_sig_mjjincl_deltaphi_vbs', 'boos_sig_mjjincl_vbs_index_0', 'boos_sig_mjjincl_vbs_index_1', 'boos_sig_mjjincl_vjet_index_0', 'boos_sig_mjjincl_vjet_index_1', 'boos_sig_mjjincl_Zvjets_0', 'boos_sig_mjjincl_Zvjets_1', 'boos_sig_mjjincl_Zlep', 'boos_sig_mjjincl_Asym_vbs', 'boos_sig_mjjincl_Asym_vjet', 'boos_sig_mjjincl_Mw_lep_reco', 'boos_sig_mjjincl_Mtw_lep', 'boos_sig_mjjincl_w_lep_pt', 'boos_si

In [9]:
joblist = []

for sample in samples:
    print(sample)
    if debug:
        trees, nfiles = lrdf.build_dataframe(config_dir, version, sample, R, "root") # ROOT RDF "interactive"
    else:
        trees, nfiles = lrdf.build_dataframe(config_dir, version, sample, PyRDF , "pyrdf") # Spark
    for tree, nfile in zip(trees,nfiles):
        joblist.append((tree,nfile))
        
######## Now sort by number of files
jobslist = sorted(joblist, key=lambda v: v[1], reverse=True)


VBS
top
DY
Wjets
VV
VBF-V


In [10]:
########
dfs = {}
for tree,_ in jobslist:
        print(tree)
        # Choose the right output columns
        if sample == "DATA": outputcols = columns_DATA
        else: outputcols = columns_MC

        df = pd.DataFrame(tree[cut].rdf_node.AsNumpy(columns=outputcols))
        print(tree.name, df.shape)
        
        if tree.name in dfs:
            df.to_pickle(os.path.join(output_dir, tree.name + f"_part{dfs[tree.name]+1}.pkl"))
            dfs[tree.name] +=1
        else:
            df.to_pickle(os.path.join(output_dir, tree.name +"_part1.pkl"))
            dfs[tree.name] =1
            
        

Tree: top
name: supercut
parent: None
cut: (                   ( ( (abs(Lepton_pdgId[0])==11) && (Lepton_pt[0]>40) ) ||                     ( (abs(Lepton_pdgId[0])==13) && (Lepton_pt[0]>30) ) )                       && vbs_0_pt > 30 && vbs_1_pt > 30                     && deltaeta_vbs >= 2                      && PuppiMET_pt > 30                    )
vars: 
aliases: bVeto,bReq,bVetoSF,bReqSF,btagSF,isTTbar,isSingleTop,topGenPtOTF,antitopGenPtOTF,Top_pTrw
weight: (XSWeight*puWeight*              TriggerEffWeight_1l*              Lepton_RecoSF[0]*              EMTFbug_veto*Lepton_tightElectron_mvaFall17V1Iso_WP90_IdIsoSF[0]*                Lepton_tightMuon_cut_Tight_HWWW_IdIsoSF[0]*(Lepton_isTightElectron_mvaFall17V1Iso_WP90[0]>0.5 || Lepton_isTightMuon_cut_Tight_HWWW[0]>0.5)* btagSF*METFilter_MC*Lepton_genmatched[0]* Top_pTrw)
--------------------------------------------------------------------------------
name: res_sig_mjjincl
parent: supercut
cut: VBS_category==1             && vjet_0

DY (2, 43)
Tree: DY
name: supercut
parent: None
cut: (                   ( ( (abs(Lepton_pdgId[0])==11) && (Lepton_pt[0]>40) ) ||                     ( (abs(Lepton_pdgId[0])==13) && (Lepton_pt[0]>30) ) )                       && vbs_0_pt > 30 && vbs_1_pt > 30                     && deltaeta_vbs >= 2                      && PuppiMET_pt > 30                    )
vars: 
aliases: bVeto,bReq,bVetoSF,bReqSF,btagSF
weight: (XSWeight*puWeight*              TriggerEffWeight_1l*              Lepton_RecoSF[0]*              EMTFbug_veto*Lepton_tightElectron_mvaFall17V1Iso_WP90_IdIsoSF[0]*                Lepton_tightMuon_cut_Tight_HWWW_IdIsoSF[0]*(Lepton_isTightElectron_mvaFall17V1Iso_WP90[0]>0.5 || Lepton_isTightMuon_cut_Tight_HWWW[0]>0.5)* btagSF*METFilter_MC*Lepton_genmatched[0]*(Sum(GenPart_pdgId == 22 && OddVec(GenPart_statusFlags) && GenPart_pt > 20.) == 0))*((1.)*(((0.632927+0.0456956*gen_ptll-0.00154485*gen_ptll*gen_ptll+2.64397e-05*gen_ptll*gen_ptll*gen_ptll-2.19374e-07*gen_ptll*gen_ptll*g

Using 50 partition(s)
  filelist, self.friend_info)


Wjets (0, 43)
Tree: VBS
name: supercut
parent: None
cut: (                   ( ( (abs(Lepton_pdgId[0])==11) && (Lepton_pt[0]>40) ) ||                     ( (abs(Lepton_pdgId[0])==13) && (Lepton_pt[0]>30) ) )                       && vbs_0_pt > 30 && vbs_1_pt > 30                     && deltaeta_vbs >= 2                      && PuppiMET_pt > 30                    )
vars: 
aliases: bVeto,bReq,bVetoSF,bReqSF,btagSF
weight: (XSWeight*puWeight*              TriggerEffWeight_1l*              Lepton_RecoSF[0]*              EMTFbug_veto*Lepton_tightElectron_mvaFall17V1Iso_WP90_IdIsoSF[0]*                Lepton_tightMuon_cut_Tight_HWWW_IdIsoSF[0]*(Lepton_isTightElectron_mvaFall17V1Iso_WP90[0]>0.5 || Lepton_isTightMuon_cut_Tight_HWWW[0]>0.5)* btagSF*METFilter_MC*Lepton_genmatched[0])
--------------------------------------------------------------------------------
name: res_sig_mjjincl
parent: supercut
cut: VBS_category==1             && vjet_0_pt > 30 && vjet_1_pt > 30             && mjj_vjet > 

Using 26 partition(s)
  filelist, self.friend_info)


DY (0, 43)
Tree: VBF-V
name: supercut
parent: None
cut: (                   ( ( (abs(Lepton_pdgId[0])==11) && (Lepton_pt[0]>40) ) ||                     ( (abs(Lepton_pdgId[0])==13) && (Lepton_pt[0]>30) ) )                       && vbs_0_pt > 30 && vbs_1_pt > 30                     && deltaeta_vbs >= 2                      && PuppiMET_pt > 30                    )
vars: 
aliases: bVeto,bReq,bVetoSF,bReqSF,btagSF
weight: (XSWeight*puWeight*              TriggerEffWeight_1l*              Lepton_RecoSF[0]*              EMTFbug_veto*Lepton_tightElectron_mvaFall17V1Iso_WP90_IdIsoSF[0]*                Lepton_tightMuon_cut_Tight_HWWW_IdIsoSF[0]*(Lepton_isTightElectron_mvaFall17V1Iso_WP90[0]>0.5 || Lepton_isTightMuon_cut_Tight_HWWW[0]>0.5)* btagSF*METFilter_MC*Lepton_genmatched[0])
--------------------------------------------------------------------------------
name: res_sig_mjjincl
parent: supercut
cut: VBS_category==1             && vjet_0_pt > 30 && vjet_1_pt > 30             && mjj_vjet > 6

