In [1]:
import os
import time
import glob
import re
from functools import reduce
import numpy as np
import uproot
import uproot_methods
import awkward
import pandas as pd
from klepto.archives import dir_archive


import coffea.processor as processor
from coffea.processor.accumulator import AccumulatorABC
from coffea import hist

%matplotlib inline
import matplotlib.pyplot as plt

In [3]:
#define processor

class WHhadProcessor(processor.ProcessorABC):
    def __init__(self):
        
        #define bins
        dataset_axis         = hist.Cat("dataset", "Primary dataset")
        pt_axis              = hist.Bin("pt", r"$p_{T}$ (GeV)", 500, 0, 2000)
        mass_axis            = hist.Bin("mass", r"$mass$ (GeV)", 500, 0, 2000)
        eta_axis             = hist.Bin("eta", r"$\eta$", 80, -4, 4)
        phi_axis             = hist.Bin("phi", r"$\phi$", 80, 0, 4)
        multiplicity_axis    = hist.Bin("multiplicity", r"N", 30, -0.5, 29.5)
        
        #define dictionary for plots
        self._accumulator = processor.dict_accumulator({
            "MET_pt":               hist.Hist("Counts", dataset_axis, pt_axis),
            "MET_pt_n1":            hist.Hist("Counts", dataset_axis, pt_axis),
            "LeadJet_pt" :          hist.Hist("Counts", dataset_axis, pt_axis),
            "LeadBJet_pt" :         hist.Hist("Counts", dataset_axis, pt_axis),
            "nB" :                  hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "nB_n1" :               hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "nGoodJet" :                hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "nGoodJet_n1" :             hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "nFatJet" :                hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "nFatJet_n1" :             hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "HT":                   hist.Hist("Counts", dataset_axis, pt_axis),
            "Mbb":                  hist.Hist("Counts", dataset_axis, mass_axis),
            "Mbb_n1":               hist.Hist("Counts", dataset_axis, mass_axis),
          #  "MT2_WH":               hist.Hist("Counts", dataset_axis, mass_axis),
          #  "MT2_bbjj":             hist.Hist("Counts", dataset_axis, mass_axis),
           # "MT2_bjjb":             hist.Hist("Counts", dataset_axis, mass_axis),
            "MCT_bb":               hist.Hist("Counts", dataset_axis, mass_axis),
            "MCT_jj":               hist.Hist("Counts", dataset_axis, mass_axis),
            "Mjj":                  hist.Hist("Counts", dataset_axis, mass_axis),
            "mindphijMET":          hist.Hist("Counts", dataset_axis, phi_axis),
            "bb_pt":                hist.Hist("Counts", dataset_axis, pt_axis),
            "dphibbMET":            hist.Hist("Counts", dataset_axis, phi_axis),
            "dphibbjj":             hist.Hist("Counts", dataset_axis, phi_axis),
            "nHtag":                hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "nWtag":                hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "H_pt":                 hist.Hist("Counts", dataset_axis, pt_axis),
            "dphiWH":               hist.Hist("Counts", dataset_axis, phi_axis),
            "WHptMET":              hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "nH_W1":              hist.Hist("Counts", dataset_axis, multiplicity_axis),
            "nW_H1":              hist.Hist("Counts", dataset_axis, multiplicity_axis)
            })

    
    @property
    def accumulator(self):
        return self._accumulator

    
    def process(self, df):
        
        output = self.accumulator.identity()
        
        dataset = df['dataset']
        
        
      
        
            #jets
        jets = awkward.JaggedArray.zip(pt=df['Jet_pt'], eta=df['Jet_eta'], phi=df['Jet_phi'], btag=df['Jet_btagDeepB'], jetid=df['Jet_jetId'], mass=df['Jet_mass'], n=df['nJet'])
        goodjets = jets[(jets['pt']>30) & (abs(jets['eta'])<2.4) & (jets['jetid']>0) ]
        bjets = jets[(jets['pt']>30) & (abs(jets['eta'])<2.4) & (jets['jetid']>0) & (jets['btag']>0.4184) ]
        fatjets = awkward.JaggedArray.zip(pt=df['FatJet_pt'], eta=df['FatJet_eta'], phi=df['FatJet_phi'], mass=df['FatJet_mass'], softdrop=df["FatJet_msoftdrop"], btag=df['FatJet_btagDeepB'], n=df['nFatJet'],fromH = df['FatJet_deepTagMD_HbbvsQCD'], fromW = df['FatJet_deepTagMD_WvsQCD'], fatjetid=df['FatJet_jetId'])
        goodfatjets = fatjets[(fatjets['pt'] > 200) & (fatjets['fatjetid']>0)]     

    
        #PRESELECTION
        met_ps = (df['MET_pt']>250)
        njet_ps = (goodjets.counts == 4)
        bjet_ps = (bjets.counts == 2)
        #tauveto_ps = (df['nVetoTau'] == 0)
        #isotrackveto_ps = (df['nVetoIsoTrack'] == 0)
        mbbup_ps = (df['bb_mass'].min() > 90 )
        mbblo_ps = (df['bb_mass'].max() < 150)
        fj_ps = ((fatjets['pt'] > 250) >= 1)
        
        presel = met_ps & njet_ps & bjet_ps  & mbbup_ps & mbblo_ps 
        metn1_presel = njet_ps & bjet_ps & mbbup_ps & mbblo_ps 
        njn1_presel = met_ps & bjet_ps  & mbbup_ps & mbblo_ps 
        nbn1_presel =  met_ps & njet_ps & mbbup_ps & mbblo_ps
        mbbn1_presel = met_ps & njet_ps & bjet_ps 
        fj_presel = met_ps & njet_ps & bjet_ps  & mbbup_ps & mbblo_ps
        
        
        hsel = (df["nFatJetfromH"] == 1)
        wsel = (df["nFatJetfromW"] == 1)
        hsell = presel & hsel
        wsell = presel & wsel
        
        
        #DEFINE VARIABLES 
    
    
        wght = df['weight'][0] * 137* 1000

        #FILL OUTPUTS
        output['MET_pt'].fill(dataset=dataset, pt=df["MET_pt"][presel].flatten(),
                              weight=wght)
        output['MET_pt_n1'].fill(dataset=dataset, pt=df["MET_pt"][metn1_presel].flatten(),
                              weight=wght)
        output['Mbb'].fill(dataset=dataset, mass=df["bb_mass"][presel].flatten(),
                              weight=wght)
        output['Mbb_n1'].fill(dataset=dataset, mass=df["bb_mass"][mbbn1_presel].flatten(),
                              weight=wght)
        output['Mjj'].fill(dataset=dataset, mass=df["jj_mass"][presel].flatten(),
                              weight=wght)
        output['MCT_bb'].fill(dataset=dataset, mass=df["bb_mct"][presel].flatten(),
                              weight=wght)
        output['MCT_jj'].fill(dataset=dataset, mass=df["jj_mct"][presel].flatten(),
                              weight=wght)
        output['bb_pt'].fill(dataset=dataset, pt=df["bb_pt"][presel].flatten(),
                              weight=wght)
        
        output['HT'].fill(dataset=dataset,pt=goodjets['pt'][presel].sum().flatten(),
                              weight=wght)
        output['nB'].fill(dataset=dataset, multiplicity=bjets[presel].counts,
                              weight=wght)
        output['nB_n1'].fill(dataset=dataset, multiplicity=bjets[nbn1_presel].counts,
                              weight=wght)
        output['nGoodJet'].fill(dataset=dataset, multiplicity=goodjets[presel].counts,
                              weight=wght)
        output['nGoodJet_n1'].fill(dataset=dataset, multiplicity=goodjets[njn1_presel].counts,
                              weight=wght)
        output['nHtag'].fill(dataset=dataset, multiplicity=df["nFatJetfromH"][wsell],
                              weight=wght)
        output['nWtag'].fill(dataset=dataset, multiplicity=df["nFatJetfromW"][wsell],
                              weight=wght)
        output['H_pt'].fill(dataset=dataset, pt=df["FatJetfromH_pt"][presel].flatten(),
                              weight=wght)
        output['WHptMET'].fill(dataset=dataset, multiplicity=df['WHptMET'].flatten(),
                              weight=wght)
        output['LeadJet_pt'].fill(dataset=dataset, pt=goodjets['pt'][presel].max().flatten(),
                              weight=wght)
        output['LeadBJet_pt'].fill(dataset=dataset, pt=bjets['pt'][presel].max().flatten(),
                              weight=wght)
        output['mindphijMET'].fill(dataset=dataset,phi=df["mindphijmet"][presel].flatten(),
                              weight=wght)
        
        output['dphibbMET'].fill(dataset=dataset, phi=df["Dphibbmet"][presel].flatten(),
                                weight=wght)
        output['dphibbjj'].fill(dataset=dataset, phi=df["Dphibbjj"][presel].flatten(),
                                weight=wght)
        output['dphiWH'].fill(dataset=dataset, phi=df["DphiWH"][presel].flatten(),
                                weight=wght)
        output['nH_W1'].fill(dataset=dataset, multiplicity=df["nFatJetfromH"][wsell],
                            weight=wght)
        output['nW_H1'].fill(dataset=dataset, multiplicity=df["nFatJetfromW"][hsell],
                            weight=wght)
        
        return output

    
    def postprocess(self, accumulator):
        return accumulator
        

In [None]:
fileset_sig = {'mC750_l1': ['/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/WH_had_750_1__nanoAOD/merged/WH_had_750_1__nanoAOD_1.root']}
fileset_back = {'W+Jets': ['/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-100To200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__WJetsToLNu_HT-100To200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-100To200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__WJetsToLNu_HT-100To200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-200To400_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__WJetsToLNu_HT-200To400_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-200To400_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__WJetsToLNu_HT-200To400_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-400To600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__WJetsToLNu_HT-400To600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-400To600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__WJetsToLNu_HT-400To600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__WJetsToLNu_HT-600To800_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-800To1200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__WJetsToLNu_HT-800To1200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-800To1200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__WJetsToLNu_HT-800To1200_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-1200To2500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__WJetsToLNu_HT-1200To2500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-1200To2500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__WJetsToLNu_HT-1200To2500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-2500ToInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__WJetsToLNu_HT-2500ToInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_HT-2500ToInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__WJetsToLNu_HT-2500ToInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                        '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__WJetsToLNu_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__WJetsToLNu_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root'
                         ],
                'QCD':['/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__QCD_HT500to700_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__QCD_HT500to700_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                       '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__QCD_HT500to700_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__QCD_HT500to700_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                       '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__QCD_HT700to1000_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__QCD_HT700to1000_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                       '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__QCD_HT1000to1500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__QCD_HT1000to1500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                       '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__QCD_HT1000to1500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__QCD_HT1000to1500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                       '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__QCD_HT1500to2000_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__QCD_HT1500to2000_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                       '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__QCD_HT1500to2000_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__QCD_HT1500to2000_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                       '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__QCD_HT2000toInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__QCD_HT2000toInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                       '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__QCD_HT2000toInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__QCD_HT2000toInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root'
                    ],
            'TTJets':['/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                      '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                      '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                      '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv88/background__TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root',
                      '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/merged/background__TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1_1.root',
                      '/hadoop/cms/store/user/mbryson/WH_hadronic/WH_had_750_1/nanoAOD/MBv69/background__TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1/merged/background__TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8__RunIISummer16NanoAODv7-PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8_ext1-v1_1.root'
                   ]
          }

output_sig = processor.run_uproot_job(fileset_sig,
                                    treename='Events',
                                    processor_instance=WHhadProcessor(),
                                    executor=processor.futures_executor,
                                    executor_args={'workers': 24, 'function_args': {'flatten': False}},
                                    chunksize=500000,
                                 )

output_back = processor.run_uproot_job(fileset_back,
                                    treename='Events',
                                    processor_instance=WHhadProcessor(),
                                    executor=processor.futures_executor,
                                    executor_args={'workers': 24, 'function_args': {'flatten': False}},
                                    chunksize=500000,
                                 )
     



HBox(children=(FloatProgress(value=0.0, description='Preprocessing', max=1.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Processing', max=1.0, style=ProgressStyle(description_wid…




HBox(children=(FloatProgress(value=0.0, description='Preprocessing', max=30.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Processing', max=61.0, style=ProgressStyle(description_wi…

In [None]:
histograms = ["MET_pt","MET_pt_n1", "nB", "nB_n1", "Mbb","Mbb_n1", "nGoodJet","nGoodJet_n1", 
              "nHtag","H_pt", "nWtag", "LeadJet_pt", "HT" , "LeadBJet_pt", 
              "MCT_bb", "MCT_jj", "mindphijMET", "bb_pt","dphibbMET","dphibbjj", 
              "dphiWH", "WHptMET" , "nH_W1", "nH_W1"]

outdir = "/home/users/mbryson/public_html/dump/WH/"

In [None]:
lineopts = {
    'color': 'r',
}

data_err_opts = {
    'linestyle': 'none',
    'marker': 'x',
    'markersize': 10.,
    'color': 'r',
    'elinewidth': 1,
}

lineopts2 = {
    'color': [(0.1,0.3,0.9), (0.2,0.6,0.9), (0.3,0.9,0.9)],
}
fillopts1 = {
    'edgecolor': (0,0,0,0.3),
    'facecolor': [(0.1,0.3,0.9), (0.2,0.6,0.9), (0.3,0.9,0.9)],
    'alpha': 0.8
}


def savefig(signal, background, outdir, name):
    ax = hist.plot1d(background, overlay="dataset", density=False, stack=True, 
                fill_opts = fillopts1, overflow = 'over')
    hist.plot1d(signal, overlay="dataset", density=False, stack=False, 
                error_opts=data_err_opts, overflow = 'over') 
    ax.set_yscale('log')
    ax.set_ylim(0.001,1000000)
    ax.figure.savefig(os.path.join(outdir, "{}_log.png".format(name)))
    ax.clear()


    
def savefigshape(signal, background, outdir, name):
    ax = hist.plot1d(background, overlay="dataset", density=True, stack=False, 
                line_opts = lineopts2, overflow = 'over')
    hist.plot1d(signal, overlay="dataset", density=True, stack=False, 
                error_opts=data_err_opts, overflow = 'over') 
    ax.set_yscale('log')
    ax.set_ylim(0.00001,10)
    ax.figure.savefig(os.path.join(outdir, "{}_shape_log.png".format(name)))
    ax.clear()

In [None]:
for name in histograms:
    print (name)
    histo_sig = output_sig[name]
    histo_back = output_back[name]
    #rebinning
    if name == 'MET_pt':
        new_met_bins = hist.Bin('pt', r'$E_T^{miss} \ (GeV)$', 20, 200, 1000)
        histo_sig = histo_sig.rebin('pt', new_met_bins)
        histo_back = histo_back.rebin('pt', new_met_bins)
    if name == 'MET_pt_n1':
        new_metn1_bins = hist.Bin('pt', r'$N-1: \ E_T^{miss} \ (GeV)$', 20, 0, 2000)
        histo_sig = histo_sig.rebin('pt', new_metn1_bins)
        histo_back = histo_back.rebin('pt', new_metn1_bins)
    if name == 'HT':
        new_ht_bins = hist.Bin('pt', r'$H_T \ (GeV)$', 20, 0, 2000)
        histo_sig = histo_sig.rebin('pt', new_ht_bins)
        histo_back = histo_back.rebin('pt', new_ht_bins)
    if name == 'LeadJet_pt':
        new_ljpt_bins = hist.Bin('pt', r'$Lead \ Jet \ p_T  \ (GeV)$', 20, 200, 1000)
        histo_sig = histo_sig.rebin('pt', new_ljpt_bins)
        histo_back = histo_back.rebin('pt', new_ljpt_bins)
    if name == 'LeadBJet_pt':
        new_lbjpt_bins = hist.Bin('pt', r'$Lead \ BJet \ p_T  \ (GeV)$', 20, 200, 1000)
        histo_sig = histo_sig.rebin('pt', new_lbjpt_bins)
        histo_back = histo_back.rebin('pt', new_lbjpt_bins)
    if name == 'nB':
        new_nb_bins = hist.Bin('multiplicity', r'$N_{Bjets}$', 6, -0.5 , 5.5)
        histo_sig = histo_sig.rebin('multiplicity', new_nb_bins)
        histo_back = histo_back.rebin('multiplicity', new_nb_bins)
    if name == 'nB_n1':
        new_nbn1_bins = hist.Bin('multiplicity', r'$N-1: \ N_{Bjets}$', 7, -0.5 , 6.5)
        histo_sig = histo_sig.rebin('multiplicity', new_nbn1_bins)
        histo_back = histo_back.rebin('multiplicity', new_nbn1_bins)
    if name == 'nGoodJet':
        new_njet_bins = hist.Bin('multiplicity', r'$N_{jets}$', 11, -0.5 , 10.5)
        histo_sig = histo_sig.rebin('multiplicity', new_njet_bins)
        histo_back = histo_back.rebin('multiplicity', new_njet_bins)
    if name == 'nGoodJet_n1':
        new_njetn1_bins = hist.Bin('multiplicity', r'$N-1: \ N_{jets}$', 11, -0.5 , 10.5)
        histo_sig = histo_sig.rebin('multiplicity', new_njetn1_bins)
        histo_back = histo_back.rebin('multiplicity', new_njetn1_bins)
    if name == 'Mbb':
        new_mbb_bins = hist.Bin('mass', r'$M_{bb}  \ GeV$', 20, 80, 160)
        histo_sig = histo_sig.rebin('mass', new_mbb_bins)
        histo_back = histo_back.rebin('mass', new_mbb_bins)
    if name == 'Mbb_n1':
        new_mbbbn1_bins = hist.Bin('mass', r'$N-1: \ M_{bb}  \ GeV$', 25, 0, 2000)
        histo_sig = histo_sig.rebin('mass', new_mbbbn1_bins)
        histo_back = histo_back.rebin('mass', new_mbbbn1_bins)
    if name == 'Mjj':
        new_mjj_bins = hist.Bin('mass', r'$M_{jj}  \ GeV$', 250, 0, 1000)
        histo_sig = histo_sig.rebin('mass', new_mjj_bins)
        histo_back = histo_back.rebin('mass', new_mjj_bins)
    if name == 'MCT_bb':
        new_mctbb_bins = hist.Bin('mass', r'$MCT_{bb}  \ GeV$', 25, 0, 1000)
        histo_sig = histo_sig.rebin('mass', new_mctbb_bins)
        histo_back = histo_back.rebin('mass', new_mctbb_bins)
    if name == 'MCT_jj':
        new_mctjj_bins = hist.Bin('mass', r'$MCT_{jj}  \ GeV$', 25, 0, 1000)
        histo_sig = histo_sig.rebin('mass', new_mctjj_bins)
        histo_back = histo_back.rebin('mass', new_mctjj_bins)
    if name == 'nHtag':
        new_nhtag_bins = hist.Bin('multiplicity', r'$N_{Higgs \ Tag}$', 4, -0.5 , 3.5)
        histo_sig = histo_sig.rebin('multiplicity', new_nhtag_bins)
        histo_back = histo_back.rebin('multiplicity', new_nhtag_bins)
    if name == 'nWtag':
        new_nwtag_bins = hist.Bin('multiplicity', r'$N_{Higgs \ Tag}$', 4, -0.5 , 3.5)
        histo_sig = histo_sig.rebin('multiplicity', new_nwtag_bins)
        histo_back = histo_back.rebin('multiplicity', new_nwtag_bins)
    if name == 'H_pt':
        new_hpt_bins = hist.Bin('pt', r'$Higgs \ p_T \ (GeV)$', 20, 0, 2000)
        histo_sig = histo_sig.rebin('pt', new_hpt_bins)
        histo_back = histo_back.rebin('pt', new_hpt_bins)
    if name == 'bb_pt':
        new_bbpt_bins = hist.Bin('pt', r'$bb \ p_T \ (GeV)$', 50, 0, 2000)
        histo_sig = histo_sig.rebin('pt', new_bbpt_bins)
        histo_back = histo_back.rebin('pt', new_bbpt_bins)
    if name == 'WHptMET':
        new_whptmet_bins = hist.Bin('multiplicity', r'$ \ p_T(W,H)/E_T^{miss} \ (GeV)$', 11, -0.5 , 10.5)
        histo_sig = histo_sig.rebin('multiplicity', new_whptmet_bins)
        histo_back = histo_back.rebin('multiplicity', new_whptmet_bins)
    if name == 'mindphijMET':
        new_dpjm_bins = hist.Bin('phi', r'$ min(\Delta\phi(j_i,MET))$', 40, 0, 4)
        histo_sig = histo_sig.rebin('phi', new_dpjm_bins)
        histo_back = histo_back.rebin('phi', new_dpjm_bins)
    if name == 'dphibbMET':
        new_dpbbm_bins = hist.Bin('phi', r'$ \Delta\phi(bb,MET)$', 40, 0, 4)
        histo_sig = histo_sig.rebin('phi', new_dpbbm_bins)
        histo_back = histo_back.rebin('phi', new_dpbbm_bins)
    if name == 'dphibbjj':
        new_dpbbjj_bins = hist.Bin('phi', r'$ \Delta\phi(bb,jj)$', 40, 0, 4)
        histo_sig = histo_sig.rebin('phi', new_dpbbjj_bins)
        histo_back = histo_back.rebin('phi', new_dpbbjj_bins)
    if name == 'dphiWH':
        new_dpwh_bins = hist.Bin('phi', r'$ \Delta\phi(W,H)$', 40, 0, 4)
        histo_sig = histo_sig.rebin('phi', new_dpwh_bins)
        histo_back = histo_back.rebin('phi', new_dpwh_bins)
    savefig(histo_sig, histo_back,outdir, name)
    savefigshape(histo_sig, histo_back,outdir, name)

