# Correctionlib to text

In [1]:
import json
import os,sys
import pandas as pd
import numpy as np
import correctionlib

### Setting up some global parameters and functions

In [2]:
objectdict = {
    'Electron':{
        'basedir':'POG/EGM/',
        'jsonfile': 'electron.json',
        'corrections':{
            'UL-Electron-ID-SF':  'electron_id_sf.txt'
        },
        'outdir':'electronsf'
    },
    'Muon':{
        'basedir':'POG/MUO/',
        'jsonfile': 'muon_Z_v2.json',
        'corrections':{
            'NUM_MediumID_DEN_genTracks':  'muon_id_sf.txt',
            'NUM_TightRelIso_DEN_MediumID':'muon_iso_sf.txt'
        },
        'outdir':'muonsf'
    }
}

#### Extracting electron scale-factors in pT-eta bins

In [10]:
def parse_electron_sf(filename, correction_name, campaign):
    scale_factors = []
    with open(filename, "r") as f: json_data = json.load(f)

    pt_edges = None
    eta_edges = None

    for item in json_data['corrections']:
        #Each item is a dict
        if item['name'] != correction_name: continue
        
        content = item['data']['content']
        for obj in content:
            ### campaign name
            if obj['key'] not in campaign: continue
            print('Extracting data for: '+obj['key'])
            
            subcontent = obj['value']['content']
            for subobj in subcontent:
                ### sf type
                if subobj['key'] != 'sf':continue
                print('Extracting data for:'+subobj['key'])

                subsubcontent = subobj['value']['content']
                for subsubobj in subsubcontent:
                    ### Working point
                    if subsubobj['key'] != 'Medium': continue
                    print('Extracting data for '+subsubobj['key']+' WP')

                    edges = subsubobj['value']['edges']
                    eta_edges = edges[0]
                    pt_edges  = edges[1]

                    print('Edges extracted!')

    # Now that the binning is calculated,
    correction_set = correctionlib.CorrectionSet.from_file(filename)
    correction = correction_set[correction_name]
    MAX_PT = 1500
    MIN_ETA = -2.5
    MAX_ETA = 2.5
    
    #Given the pt and eta edges, loop over their midvalues.
    for eta_low, eta_high in zip(eta_edges[:-1], eta_edges[1:]):
        for pt_low, pt_high in zip(pt_edges[:-1], pt_edges[1:]):

            if not np.isfinite(eta_low):  eta_low  = MIN_ETA if eta_low == -np.inf else MAX_ETA
            if not np.isfinite(eta_high): eta_high = MIN_ETA if eta_high == -np.inf else MAX_ETA
            if not np.isfinite(pt_low):     pt_low = MAX_PT if pt_low == np.inf else 0
            if not np.isfinite(pt_high):   pt_high = MAX_PT if pt_high == np.inf else 0 
            
            eta = (eta_low + eta_high) / 2
            pt  = (pt_low  + pt_high) / 2
            era = campaign.replace('_UL', '')
            mode = "sf"
            wp = "Medium"
            values = [era, 'sf', wp, eta, pt]
            values_down = [era, 'sfdown', wp, eta, pt]
            values_up = [era, 'sfup', wp, eta, pt]
            sfdown = correction.evaluate(*values_down)
            sf = correction.evaluate(*values)
            sfup = correction.evaluate(*values_up)
            
            scale_factors.append({
                'campaign': campaign,
                'eta_low' : eta_low,
                'eta_high': eta_high, 
                'pt_low'  : pt_low,
                'pt_high' : pt_high,
                'sfdown'  : sfdown,
                'sf'      : sf,
                'sfup'    : sfup
            })

            #print(f"Scale factor: {sf}, sfdown: {sfdown}, sfup: {sfup}")
            #break ### ptbin
        #break ### etabin
   
    df = pd.DataFrame(scale_factors)
    print('Correctionlib evaluated and dataframe created.\n')
    return df

print('Function for electrons loaded.')

Function for electrons loaded.


#### Extracting muonn scale-factors in pT-eta bins

In [16]:
def parse_muon_sf(filename, correction_name, campaign):
    scale_factors=[]
    with open(filename, "r") as f: json_data = json.load(f)

    pt_edges = None
    eta_edges = None

    for item in json_data['corrections']:
        #Each item is a dict
        if item['name'] != correction_name: continue
        print(item.keys())

    df = pd.DataFrame(scale_factors)
    return df

print('Function for muons loaded.')

Function for muons loaded.


### Main: Iterating over the object dictionary to find scale-factors for each

In [17]:
for obj, val in objectdict.items():
    
    if obj != 'Muon': continue ### For testing purposes

    print(f'\n'+'-'*50+'\n\033[032mProcessing corrections for: {object}\033[0m\n'+'-'*50)
    basedir = val['basedir']
    outdir = val['outdir']
    os.makedirs(outdir, exist_ok=True)
    campaigns = os.listdir(basedir) #list only folders, not files
    files = []
    for camp in campaigns:
        if camp not in ['2018_UL', '2017_UL', '2016preVFP_UL', '2016postVFP_UL']: continue
        filename = os.path.join(basedir, camp, val['jsonfile'])
        if os.path.exists(filename) and filename.endswith('.json'):
            files.append((filename, camp))
    
    for correction in val['corrections']:
        print(f"\n\033[033mProcessing correction: {correction}\033[0m\n")
        output_filename = val['corrections'][correction]
        os.makedirs(val['outdir'], exist_ok=True)

        data = []
        for filename, campaign in files:
            print(f'Opening file: {filename}')
            if obj == 'Electron': extracted_data = parse_electron_sf(filename, correction, campaign)
            if obj == 'Muon': extracted_data = parse_muon_sf(filename, correction, campaign)
            data.append(extracted_data)
            
        data = pd.concat(data, ignore_index=True)
        columns_to_round = ['sfdown', 'sf', 'sfup']
        if data.empty:
            print('\033[31mDataframe empty. Skipping this.\033[0m')
            continue
            
        data[columns_to_round] = data[columns_to_round].round(6)
        display(data)

        outfile = os.path.join(val['outdir'], output_filename)    
        with open(outfile, 'w') as f:
            for index, row in data.iterrows():
                ### Text formatting:
                formatted_row = ""
                for i, column in enumerate(data.columns):
                    if i == 0:                       formatted_row += f"{str(row[column]):<20}"
                    elif column in columns_to_round: formatted_row += f"{str(row[column]):<12}"
                    else:                            formatted_row += f"{str(row[column]):<8}"
                f.write(formatted_row.strip() + "\n")

        print(f"Data written to {outfile}")        


--------------------------------------------------
[032mProcessing corrections for: {object}[0m
--------------------------------------------------

[033mProcessing correction: NUM_MediumID_DEN_genTracks[0m

Opening file: POG/MUO/2016postVFP_UL/muon_Z_v2.json
dict_keys(['name', 'description', 'version', 'inputs', 'output', 'data'])
Opening file: POG/MUO/2016preVFP_UL/muon_Z_v2.json
dict_keys(['name', 'description', 'version', 'inputs', 'output', 'data'])
Opening file: POG/MUO/2017_UL/muon_Z_v2.json
dict_keys(['name', 'description', 'version', 'inputs', 'output', 'data'])
Opening file: POG/MUO/2018_UL/muon_Z_v2.json
dict_keys(['name', 'description', 'version', 'inputs', 'output', 'data'])
[31mDataframe empty. Skipping this.[0m

[033mProcessing correction: NUM_TightRelIso_DEN_MediumID[0m

Opening file: POG/MUO/2016postVFP_UL/muon_Z_v2.json
dict_keys(['name', 'description', 'version', 'inputs', 'output', 'data'])
Opening file: POG/MUO/2016preVFP_UL/muon_Z_v2.json
dict_keys(['name'