In [50]:
from trainlib.FileCollection import FileCollection
import trainlib.cuts as cuts
from trainlib.utils import read_data
from trainlib.RNNPreprocessor import RNNPreprocessor
from trainlib.config import Config
from trainlib.PCAWhiteningPreprocessor import PCAWhiteningPreprocessor
import pandas as pd
import numpy as np
import pickle
import re

In [51]:
def extract_column(df, colstring):
    if '[' in colstring and ']' in colstring:
        col, sorted_col_order, _ = re.split('[\[\]]', colstring)
        sorted_col, order = re.split('\|', sorted_col_order)
        cf = extract_order_filtered(df, sorted_col, [col], int(order))
    else:
        cf = df[[colstring]]
    return cf

In [52]:
def extract_order_filtered(df, sorted_column, columns, order):
    extracted_raw = extract_order(df, sorted_column, columns, order)

    if any("Jet" in col for col in extracted_raw.columns):
        # apply the jet-pt cut
        mask_column = extract_order(df, "JetPt", ["JetPt"], order)
        mask = mask_column < 30.0
        extracted_raw[mask.as_matrix()] = 0.0

    return extracted_raw

In [53]:
def extract_order(df, sorted_column, columns, order):
    def get_index(row, order, sorted_column):
        sorted_column = row[sorted_column]
        if order >= len(sorted_column):
            return -1
        else:
            return np.flipud(np.argsort(sorted_column))[order]

    index_column = pd.DataFrame(df.transform(lambda row: get_index(row, order, sorted_column), axis = 1, raw = True))
    index_column.columns = ["index"]
    df_temp = pd.concat([index_column, df], axis = 1)

    def get_element(row, column_name):
        if row["index"] == -1:
            return 0
        else:
            return row[column_name][row["index"]]

    extracted_cols = pd.DataFrame()
    for column in columns:
        extracted_col = pd.DataFrame(df_temp.transform(lambda row: get_element(row, column), axis = 1, raw = True))
        extracted_col.columns = [column + "_" + str(order)]
        extracted_cols = pd.concat([extracted_cols, extracted_col], axis = 1)

    return extracted_cols

In [54]:
j2cut = lambda row: row["nCleanedJetsPt30"] >= 2

In [68]:
# read some input data
fcoll = FileCollection({"/data_CMS/cms/wind/CJLST_NTuples_prepared_systematics/VBFH125ext_JEC_UP/ZZ4lAnalysis.root" : j2cut}, 0.0, 1.0)
setup_data = read_data(fcoll, 0, 62000, branches = Config.branches + ["JetJERUp", "JetJERDown", "PFMET_jesUp", "PFMET_jesDn"])

skimming /data_CMS/cms/wind/CJLST_NTuples_prepared_systematics/VBFH125ext_JEC_UP/ZZ4lAnalysis.root
collection set up: 1 files, 109964 entries in total, 109964 of which will be used


In [69]:
setup_data[["PFMET", "PFMET_jesUp", "PFMET_jesDn"]]

Unnamed: 0,PFMET,PFMET_jesUp,PFMET_jesDn
1,7.127293,7.127293,8.532790
4,24.864750,24.864750,19.789850
5,26.245907,26.245907,29.939737
7,39.274055,39.274055,44.029099
13,35.152088,35.152088,33.362717
15,53.843628,53.843628,60.994373
16,41.455864,41.455864,25.398987
17,51.089642,51.089642,43.431183
18,25.673473,25.673473,23.081484
22,29.313005,29.313005,20.894043


In [62]:
setup_data[["JetPt", "JetJERUp", "JetJERDown"]]

Unnamed: 0,JetPt,JetJERUp,JetJERDown
1,"[47.9916, 45.4407]","[47.5933, 44.6546]","[47.9916, 45.4407]"
4,"[73.9409, 35.8368]","[73.3467, 36.3976]","[73.9409, 35.8368]"
5,"[126.279, 38.1255, 29.9515]","[125.685, 37.6384, 29.0073]","[126.279, 38.1255, 29.9515]"
7,"[107.992, 67.3538]","[105.423, 67.1606]","[107.992, 67.3538]"
13,"[82.5144, 51.9631]","[83.9068, 55.1883]","[82.5144, 51.9631]"
15,"[88.8903, 59.5798, 24.663]","[89.8798, 62.1438, 24.7259]","[88.8903, 59.5798, 24.663]"
16,"[62.3309, 37.9186, 24.4237]","[65.193, 39.566, 29.1304]","[62.3309, 37.9186, 24.4237]"
18,"[163.139, 74.7351]","[164.624, 71.0506]","[163.139, 74.7351]"
22,"[115.399, 101.611, 57.3584, 44.0641, 40.4843]","[115.675, 101.874, 57.4019, 42.6119, 41.0411]","[115.399, 101.611, 57.3584, 44.0641, 40.4843]"
23,"[110.478, 64.8607, 28.1815, 24.965]","[108.424, 66.1139, 28.2598, 25.7176]","[110.478, 64.8607, 28.1815, 24.965]"
