In [11]:
from trainlib.FileCollection import FileCollection
from trainlib.config import Config
import trainlib.cuts as cuts
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import copy
import re

In [2]:
def get_sorted_list_element_df(inframe, list_index, sorted_column, aux_columns, pt_limit):
    outframe = pd.DataFrame()
    for row in inframe.iterrows():
        processed_row = get_sorted_list_element(row[1], list_index, sorted_column, aux_columns, pt_limit)
        outframe = outframe.append(pd.DataFrame(processed_row))
    outframe = outframe.reset_index(drop = True)
    
    return outframe

In [3]:
candidate_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav", "costhetastar", "helphi", "helcosthetaZ1", "helcosthetaZ2", "phistarZ1", "phistarZ2", "xi", "xistar"]
MELA_branches = ["D_VBF2j_ggH_ME", "D_VBF1j_ggH_ME", "D_WHh_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME"]
list_branches = ["Jet", "Lep", "ExtraLep"]

In [4]:
MC_path = "/data_CMS/cms/wind/CJLST_NTuples/"
H1_coll_val = FileCollection({MC_path + "VBFH125/ZZ4lAnalysis.root": cuts.mZZ_cut}, 0.5, 1.0)

skimming /data_CMS/cms/wind/CJLST_NTuples/VBFH125/ZZ4lAnalysis.root
collection set up: 1 files, 62320 entries in total, 31160 of which will be used


In [5]:
complete_list_branches = [list_branches + postfix for list_branches in list_branches for postfix in ["Pt", "Eta", "Phi"]]

In [6]:
H1_df = H1_coll_val.get_data(complete_list_branches + ["ZZMass"], 0.0, 1.0)

In [7]:
col_basenames = ["Jet", "Lep", "ExtraLep"]
sorted_column = "Pt"
columns = ["Pt", "Eta", "Phi"]
order = 1

In [8]:
def extract_order(df, col_basename, sorted_column, columns, order):
    def get_index(row, order, col_basename, sorted_column):
        sorted_column = row[col_basename + sorted_column]
        if order >= len(sorted_column):
            return -1
        else:
            return np.flipud(np.argsort(sorted_column))[order]
    
    index_column = pd.DataFrame(H1_df.transform(lambda row: get_index(row, order, col_basename, sorted_column), axis = 1, raw = True))
    index_column.columns = ["index"]
    df_temp = pd.concat([index_column, df], axis = 1)
    
    def get_element(row, column_name):
        if row["index"] == -1:
            return 0
        else:
            return row[column_name][row["index"]]
        
    extracted_cols = pd.DataFrame()
    for column in columns:
        extracted_col = pd.DataFrame(df_temp.transform(lambda row: get_element(row, col_basename + column), axis = 1, raw = True))
        extracted_col.columns = [col_basename + column + "_" + str(order)]
        extracted_cols = pd.concat([extracted_cols, extracted_col], axis = 1)
        
    return extracted_cols

In [9]:
def prepare_data(df, col_basenames, sorted_column, columns, orders):
    all_extracted = pd.DataFrame()
    for col_basename in col_basenames:
        for order in orders:
            extracted = extract_order(df, col_basename, sorted_column, columns, order)
            mask = extracted[col_basename + "Pt_" + str(order)] < 30.0
            extracted[mask] = 0.0

            all_extracted = pd.concat([all_extracted, extracted], axis = 1)
            
    return all_extracted

In [10]:
prepare_data(H1_df, col_basenames, sorted_column, columns, range(4))

Unnamed: 0,JetPt_0,JetEta_0,JetPhi_0,JetPt_1,JetEta_1,JetPhi_1,JetPt_2,JetEta_2,JetPhi_2,JetPt_3,...,ExtraLepPhi_0,ExtraLepPt_1,ExtraLepEta_1,ExtraLepPhi_1,ExtraLepPt_2,ExtraLepEta_2,ExtraLepPhi_2,ExtraLepPt_3,ExtraLepEta_3,ExtraLepPhi_3
0,114.725479,-1.693448,1.317638,86.071198,2.745670,-1.563440,49.529335,-0.670572,0.043922,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,100.943222,2.746336,-2.707756,66.715744,-1.923601,-0.269056,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,190.447678,2.274184,2.471648,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,80.918739,-2.724194,2.693447,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,63.885483,3.368660,-2.810166,34.853413,-0.697634,0.561523,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,107.242172,3.129260,-2.494584,55.543613,-0.054099,1.568728,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,62.505924,2.637863,-1.179873,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,77.487785,2.562668,-0.738574,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,51.917568,-2.601324,2.085942,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,47.240326,-3.206044,-2.534147,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
