# Correctionlib to text

In [1]:
import json
import os,sys
import pandas as pd
import numpy as np
import correctionlib
sys.path.append('modules')
from pileup import parse_pileup_wt
from egamma import parse_electron_sf
from muon import parse_muon_sf
from jets import parse_jet_jec_sf, parse_jet_jer_sf, parse_jet_ptres_sf
from bjets import parse_bjet_eff
print('Modules loaded.')

Modules loaded.


### Setting up some global parameters and functions

In [2]:
correctiondict = {
    'electron_id_sf':{
        'basedir':'POG/EGM/',
        'jsonfile': 'electron.json',
        'correction_name': 'UL-Electron-ID-SF',
        'outfile': 'corrections/electron_id_sf.txt'
    },
    'muon_id_sf':{
        'basedir':'POG/MUO/',
        'jsonfile': 'muon_Z_v2.json',
        'correction_name':'NUM_MediumID_DEN_genTracks',
        'outfile':'corrections/muon_id_sf.txt'
    },
    'muon_iso_sf':{
        'basedir':'POG/MUO/',
        'jsonfile': 'muon_Z_v2.json',
        'correction_name':'NUM_TightRelIso_DEN_MediumID',
        'outfile':'corrections/muon_iso_sf.txt'
    },
    'jet_jec_sf':{
        'basedir':'POG/JME/',
        'jsonfile': 'jet_jerc.json',
        'correction_name': {
            '2016postVFP_UL'   :'Summer19UL16_V7_MC_Total_AK4PFchs',
            '2016preVFP_UL'    :'Summer19UL16APV_V7_MC_Total_AK4PFchs', 
            '2017_UL'          :'Summer19UL17_V5_MC_Total_AK4PFchs',
            '2018_UL'          :'Summer19UL18_V5_MC_Total_AK4PFchs',
            'Run3Summer22'     :'Summer22_22Sep2023_V2_MC_Total_AK4PFPuppi',
            'Run3Summer22EE'   :'Summer22EE_22Sep2023_V2_MC_Total_AK4PFPuppi',
            'Run3Summer23'     :'Summer23Prompt23_V1_MC_Total_AK4PFPuppi',
            'Run3Summer23BPix' :'Summer23BPixPrompt23_V1_MC_Total_AK4PFPuppi'
        },
        'outfile':'corrections/jet_jec_sf.txt'
    },
    'jet_jer_sf':{
        'basedir':'POG/JME/',
        'jsonfile': 'jet_jerc.json',
        'correction_name': {
            '2016postVFP_UL'   :'Summer20UL16_JRV3_MC_ScaleFactor_AK4PFchs',
            '2016preVFP_UL'    :'Summer20UL16APV_JRV3_MC_ScaleFactor_AK4PFchs', 
            '2017_UL'          :'Summer19UL17_JRV2_MC_ScaleFactor_AK4PFchs',
            '2018_UL'          :'Summer19UL18_JRV2_MC_ScaleFactor_AK4PFchs',
            'Run3Summer22'     :'Summer22_22Sep2023_JRV1_MC_ScaleFactor_AK4PFPuppi',
            'Run3Summer22EE'   :'Summer22EE_22Sep2023_JRV1_MC_ScaleFactor_AK4PFPuppi',
            'Run3Summer23'     :'Summer23Prompt23_RunCv1234_JRV1_MC_ScaleFactor_AK4PFPuppi',
            'Run3Summer23BPix' :'Summer23BPixPrompt23_RunD_JRV1_MC_ScaleFactor_AK4PFPuppi'
        },
        'outfile':'corrections/jet_jer_sf.txt'
    },
    'jet_ptres_sf':{
        'basedir':'POG/JME/',
        'jsonfile': 'jet_jerc.json',
        'correction_name': {
            '2016postVFP_UL'   :'Summer20UL16_JRV3_MC_PtResolution_AK4PFchs',
            '2016preVFP_UL'    :'Summer20UL16APV_JRV3_MC_PtResolution_AK4PFchs', 
            '2017_UL'          :'Summer19UL17_JRV2_MC_PtResolution_AK4PFchs',
            '2018_UL'          :'Summer19UL18_JRV2_MC_PtResolution_AK4PFchs',
            'Run3Summer22'     :'Summer22_22Sep2023_JRV1_MC_PtResolution_AK4PFPuppi',
            'Run3Summer22EE'   :'Summer22EE_22Sep2023_JRV1_MC_PtResolution_AK4PFPuppi',
            'Run3Summer23'     :'Summer23Prompt23_RunCv1234_JRV1_MC_PtResolution_AK4PFPuppi',
            'Run3Summer23BPix' :'Summer23BPixPrompt23_RunD_JRV1_MC_PtResolution_AK4PFPuppi'
        },
        'outfile':'corrections/jet_ptres_sf.txt'
    },
    'bjet_mujets_and_incl_eff':{
        'basedir':'POG/BTV/',
        'jsonfile': 'btagging.json',
        'correction_name': 'deepJet_mujets',
        'outfile': 'corrections/bjet_mujets_and_incl_eff.txt'
    },
    'bjet_comb_and_incl_eff':{
        'basedir':'POG/BTV/',
        'jsonfile': 'btagging.json',
        'correction_name': 'deepJet_comb',
        'outfile': 'corrections/bjet_comb_and_incl_eff.txt'
    },
    'pileup_wt':{
        'basedir':'POG/LUM/',
        'jsonfile': 'puWeights.json',
        'correction_name':'deepJet_mujets',
        'outfile':'corrections/pileup_wt.txt'
    }
}

campaigndict = {
    '2016postVFP_UL'   :'2016postVFP_UL',
    '2016preVFP_UL'    :'2016preVFP_UL', 
    '2017_UL'          :'2017_UL',
    '2018_UL'          :'2018_UL',
    '2022_Summer22'    :'Run3Summer22',
    '2022_Summer22EE'  :'Run3Summer22EE',
    '2023_Summer23'    :'Run3Summer23',
    '2023_Summer23BPix':'Run3Summer23BPix'
}

def warning(text, color_=31, type_=0):
    text = f'\033[{type_};{color_}m{text}\033[0m'
    print(text)

print('Correction names loaded.')

Correction names loaded.


In [3]:
%%time
line = '\n'+'-'*50

for correction, val in correctiondict.items():
    if 'pileup' not in correction: continue
    
    warning(line+f'\nProcessing corrections for: {correction}'+line, 32, 1)

    outfile  = val['outfile']
    basedir  = val['basedir']
    jsonfile = val['jsonfile']
    correction_name = val['correction_name']

    outdir = os.path.dirname(outfile)
    if outdir: os.makedirs(outdir, exist_ok=True)

    data = []
    
    for campaign, campaign_name in campaigndict.items():
        if correction.startswith('jet'): correction_name = val['correction_name'][campaign_name]
        #if 'Run3' not in campaign_name: continue
        
        ### Corrections for Run2 and Run3 campaign names:
        if 'Run3' in campaign_name:
            ### Electrons:
            correction_name = correction_name.replace('UL-Electron-ID-SF', 'Electron-ID-SF')
            ### Muons:
            jsonfile = jsonfile.replace('muon_Z_v2.json', 'muon_Z.json')
            if 'muon_id'  in correction: correction_name = 'NUM_MediumID_DEN_TrackerMuons'
            if 'muon_iso' in correction: correction_name = 'NUM_TightPFIso_DEN_MediumID'        
        
        infile = os.path.join(basedir, campaign, jsonfile)
        if not os.path.exists(infile):
            warning(f'{campaign_name:<18}: Path does not exist: {infile} (skipping)')
            continue
        
        print(f'{campaign_name:<18}: {correction_name}')

        ### Extract the scale factors here.
        extracted_data = None
        if 'pileup' in correction:   extracted_data = parse_pileup_wt(infile, campaign_name)
        if 'electron' in correction: extracted_data = parse_electron_sf(infile, correction_name, campaign_name)
        if 'muon' in correction :    extracted_data = parse_muon_sf(infile, correction_name, campaign_name)
        if 'jec' in correction:      extracted_data = parse_jet_jec_sf(infile, correction_name, campaign_name)
        if 'jer' in correction:      extracted_data = parse_jet_jer_sf(infile, correction_name, campaign_name)
        if 'ptres' in correction:    extracted_data = parse_jet_ptres_sf(infile, correction_name, campaign_name)
        if 'bjet' in correction:     extracted_data = parse_bjet_eff(infile, correction_name, campaign_name)
        
        if extracted_data is not None: data.append(extracted_data)
        #break ###campaign
        
    if len(data) == 0:
        warning(f'Dataframe empty. Skipping correction: {correction}')
        continue
            
    df = pd.concat(data, ignore_index=True)
    columns_to_round = ['sfdown', 'sf', 'sfup']
    existing_columns = [col for col in columns_to_round if col in df.columns]
    #df[existing_columns] = df[existing_columns].round(6)
    df[existing_columns] = df[existing_columns].clip(upper=99).round(6) ### ignoring too large numbers
    display(df)
  
    with open(outfile, 'w') as f:
        for index, row in df.iterrows():
            formatted_row = ""
            for i, column in enumerate(df.columns):
                if i == 0:                       formatted_row += f"{str(row[column]):<20}"
                elif column in columns_to_round: formatted_row += f"{str(row[column]):<12}"
                else:                            formatted_row += f"{str(row[column]):<8}"
            f.write(formatted_row.strip() + "\n")
    
    warning(f"Data written to {outfile}", 32)
    
    #break ### correction

[1;32m
--------------------------------------------------
Processing corrections for: pileup_wt
--------------------------------------------------[0m
2016postVFP_UL    : deepJet_mujets
2016preVFP_UL     : deepJet_mujets
2017_UL           : deepJet_mujets
2018_UL           : deepJet_mujets
Run3Summer22      : deepJet_mujets
Run3Summer22EE    : deepJet_mujets
Run3Summer23      : deepJet_mujets
Run3Summer23BPix  : deepJet_mujets


Unnamed: 0,campaign,nint,sfdown,sf,sfup
0,2016postVFP_UL,0,0.318110,0.277740,0.244090
1,2016postVFP_UL,1,0.425847,0.341608,0.289523
2,2016postVFP_UL,2,0.971638,0.914788,0.850717
3,2016postVFP_UL,3,0.774113,0.696977,0.628712
4,2016postVFP_UL,4,0.696696,0.633194,0.580053
...,...,...,...,...,...
795,Run3Summer23BPix,95,0.002370,0.032316,0.287189
796,Run3Summer23BPix,96,0.003755,0.055453,0.526316
797,Run3Summer23BPix,97,1.000000,1.000000,1.000000
798,Run3Summer23BPix,98,1.000000,1.000000,1.000000


[0;32mData written to corrections/pileup_wt.txt[0m
CPU times: user 96.7 ms, sys: 2.22 ms, total: 98.9 ms
Wall time: 735 ms


In [4]:
print('Done!')

Done!
