In [2]:
from ROOT import *
from root_numpy import tree2array
from ROOT import TFile
import pandas as pd
import numpy as np
import deepdish.io as io
from sklearn.preprocessing import StandardScaler
import os

In [3]:
data = TFile.Open("/home/minerva1993/public/v808/nosplit/ttHbb_PowhegPythia.root")
data2 = TFile.Open("/home/minerva1993/public/v808/nosplit/ttbar_PowhegPythiaBkg.root")
tree = data.Get("ttbbLepJets/tree")
tree2 = data2.Get("ttbbLepJets/tree")

In [4]:
def tree_to_df(tree, branch_names=[], index_name='', drop_roofit_labels=False):
    if tree is None:
        return None

    branch_list = tree.GetListOfBranches()
    all_branch_names = [branch_list.At(i).GetName() for i in range(branch_list.GetEntries())]
    if len(branch_names) == 0:
        branch_names = all_branch_names
    for bn in branch_names[:]:
        if bn not in all_branch_names:
            branch_names.remove(bn)
        if drop_roofit_labels:
            if bn.endswith('_lbl'):
                branch_names.remove(bn)

    arrs = tree2array(tree, branch_names, start = 0, stop = 10000)
    df = pd.DataFrame(arrs)

    if len(index_name) == 0:
        for col in df.columns:
            if col.startswith('__index__'):
                index_name = col
                break
    if len(index_name):
        try:
            df[index_name] = df[index_name].astype(np.int32)
            df.set_index(index_name, inplace=True)
        except BaseException:
            pass

    if drop_roofit_labels:
        df.columns = [col.replace('_idx', '') for col in df.columns]

    n_tree = tree.GetEntries()
    n_df = len(df.index)

    return df 

In [5]:
dftree = tree_to_df(tree)
dftree_bg = tree_to_df(tree2)

In [6]:
dftree

Unnamed: 0,event,run,luminumber,genweight,GoodPV,channel,PUWeight,MET,MET_phi,lepton_pT,...,genjet_mom,genjet_gencone_mom,addbjet1_pt,addbjet1_eta,addbjet1_phi,addbjet1_e,addbjet2_pt,addbjet2_eta,addbjet2_phi,addbjet2_e
0,123040,1,883,1.0,31,1,"[0.40484145, 0.6538616, 0.21446894]",49.436161,2.514606,58.858044,...,"[6, 24, 0, 24, -24, -6, 24]","[0, 0, 0, 24, 0, 6, 24]",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,123060,1,883,1.0,19,0,"[1.0498619, 1.0581169, 1.0192269]",78.730804,3.109999,74.936958,...,"[0, -24, 24, 24, 24, 6, 0, -6, 0, 6, 0, 0]","[0, 0, 0, 24, 24, 6, 0, 6, 0, 0, 0, 0]",23.452469,0.755852,-2.941486,31.069412,22.976763,1.160722,2.689981,40.604553
2,123061,1,883,1.0,22,0,"[1.0966326, 1.0272917, 1.1657113]",99.922394,-1.245461,32.057869,...,"[24, 0, 6, 0, 0, 24, 0, -6, -24, 0, 24, 0]","[6, 0, 0, 0, 0, 0, 0, 6, 0, 0, 24, 0]",96.468826,1.702264,-0.600232,274.260590,64.680901,1.183147,-0.895099,116.760696
3,123063,1,883,1.0,27,0,"[0.16368973, 0.32980123, 0.064325966]",330.668762,-1.084947,102.835823,...,"[24, 0, 6, -6, -24, 0, 6, 24]","[24, 0, 0, 6, 0, 0, 6, 24]",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,123082,1,883,1.0,22,1,"[1.0581728, 1.0854989, 1.0060143]",47.671928,-1.331177,154.841553,...,"[24, -24, -6, 0, -24, 0, 0]","[0, 24, 6, 0, 6, 0, 0]",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,123085,1,883,1.0,22,0,"[1.0498619, 1.0581169, 1.0192269]",85.362183,3.127869,71.661835,...,"[-24, 6, -6, -24, 24, -6, -6, -6]","[0, 6, 0, 24, 0, 6, 0, 0]",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6,123112,1,883,1.0,18,0,"[1.0511662, 0.9979897, 1.0905966]",137.648026,-1.984573,94.082756,...,"[6, -24, 0, 24]","[6, 0, 0, 24]",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,123115,1,883,1.0,26,1,"[1.0946208, 1.2462693, 0.91461706]",80.219719,-2.071757,32.749702,...,"[6, -24, 24, 0, -24, 0]","[6, 24, 0, 0, 24, 0]",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,123127,1,883,-1.0,27,0,"[0.91080743, 1.1814939, 0.650952]",142.997971,-1.485821,194.104813,...,"[24, -24, 24, 0, 24, 0]","[6, 0, 24, 0, 24, 0]",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,123146,1,883,1.0,10,0,"[1.0511662, 0.9979897, 1.0905966]",105.785645,1.609540,31.532207,...,"[-6, -24, 0, -24, 0, 24, 0]","[6, 6, 0, 24, 0, 0, 0]",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [7]:
def process(df):
    columns = ['jet_pT','jet_eta','jet_phi','jet_E','lepton_pT','lepton_eta','lepton_phi','lepton_E','MET','MET_phi','jet_CvsB','channel']
    event_weight = []
    for i in range(len(df['lepton_SF'])):
        product = df['lepton_SF'][i][0] * df['jet_SF_CSV_30'][i][0] * df['PUWeight'][i][0] * df['genweight'][i]
        event_weight.append(product)
    print("completed")
    train_tree = df[columns].copy()
    train_tree['event_weight'] = event_weight
    return train_tree

In [8]:
train_tree = process(dftree)

completed


In [9]:
train_tree_2 = process(dftree_bg)

completed


In [10]:
train_tree['result'] = np.zeros(len(train_tree))

In [11]:
train_tree_2['result'] = [1 for i in range(len(train_tree_2))]

In [12]:
train_tree.append(train_tree_2, ignore_index=True)

Unnamed: 0,jet_pT,jet_eta,jet_phi,jet_E,lepton_pT,lepton_eta,lepton_phi,lepton_E,MET,MET_phi,jet_CvsB,channel,event_weight,result
0,"[42.661274, 65.645966, 22.911049, 125.97464, 2...","[-0.271016, 1.3844224, -0.6846375, 1.5187304, ...","[0.94530946, -2.9926286, 2.5624125, -1.8096906...","[44.8713, 139.70744, 28.783531, 301.89896, 100...",58.858044,0.657305,1.420835,72.037285,49.436161,2.514606,"[-0.813637, -0.025431635, 0.21034619, 0.127873...",1,0.438722,0.0
1,"[45.74305, 57.38898, 61.79478, 73.1204, 165.83...","[1.5573863, -0.06565438, 0.34177333, -0.130020...","[-0.9524832, 2.6832464, 1.1491051, 1.6261227, ...","[113.68012, 58.55352, 65.78093, 74.887215, 199...",74.936958,-0.601784,-1.900180,88.920494,78.730804,3.109999,"[-0.26433367, 0.25034687, 0.44398418, 0.034136...",0,1.432893,0.0
2,"[88.72132, 460.08844, 44.595196, 234.2631, 31....","[1.167819, 0.53193265, 1.184677, 0.80057067, 1...","[2.107498, 1.7357248, -0.88383263, 2.383072, -...","[156.82622, 531.2781, 80.14583, 316.00833, 89....",32.057869,0.271955,-2.687549,33.250851,99.922394,-1.245461,"[-0.9191747, -0.4551153, -0.6020838, -0.086795...",0,1.441029,0.0
3,"[22.590187, 45.902477, 164.23619, 133.37915, 3...","[-1.4246961, -1.3401238, -1.7458824, -0.517940...","[2.0270612, 1.2537354, 2.560874, -1.4086629, 1...","[49.86484, 94.119644, 485.32507, 152.96466, 36...",102.835823,-0.054636,-1.809997,102.989403,330.668762,-1.084947,"[-0.6482715, -0.64722025, -0.6196078, 0.254726...",0,0.162375,0.0
4,"[85.04708, 44.179707, 82.6381, 47.557316, 28.9...","[-0.83912593, -0.57244843, -0.17182124, 0.5823...","[0.7512298, 2.728741, 1.3593801, 0.020198429, ...","[117.38267, 52.18768, 85.164154, 56.453644, 12...",154.841553,0.155100,-2.643861,156.707703,47.671928,-1.331177,"[-0.85536873, 0.22078165, -0.45577872, -0.1573...",1,1.206927,0.0
5,"[30.258467, 267.78867, 43.378586, 118.986305, ...","[-0.055366836, -0.46139804, 0.26952937, 0.7246...","[-1.4464462, -2.5755446, 2.2641814, 0.86087847...","[30.841148, 297.69543, 45.44547, 152.35663, 26...",71.661835,0.161969,1.261273,72.603951,85.362183,3.127869,"[-0.9290271, -0.30875707, -0.28230354, 0.37945...",0,0.841446,0.0
6,"[54.16175, 141.90894, 60.835266]","[0.8170406, -0.13258718, -0.65846133]","[0.32017702, 1.1195123, -0.023466114]","[73.77612, 143.8163, 74.80518]",94.082756,0.637413,-2.487734,113.851418,137.648026,-1.984573,"[-0.705597, 0.12703606, 0.7795981]",0,0.878051,0.0
7,"[103.055855, 25.612146, 39.455177, 73.58129]","[-1.0279114, 0.5408919, -1.7285845, 0.33180013]","[2.076244, -0.44398242, 1.356959, -2.5992303]","[163.85724, 29.823355, 114.89598, 78.2507]",32.749702,-1.143762,-0.455254,56.610672,80.219719,-2.071757,"[-0.8741164, 0.47148502, 0.3597645, 0.26419595]",1,1.127149,0.0
8,"[66.75249, 302.4175, 71.99213, 26.656015, 46.6...","[0.07355192, -1.2523402, -1.8330383, 2.0137346...","[-2.7514453, 2.115291, 1.9689722, 0.18039836, ...","[67.52934, 573.1681, 231.31413, 101.76752, 59....",194.104813,0.489875,-0.950300,217.864716,142.997971,-1.485821,"[-0.66242737, 0.33789632, -0.14399838, -0.0359...",0,-1.044097,0.0
9,"[47.150536, 67.09775, 92.06267, 136.5527, 50.2...","[-0.4343695, 0.2776462, 0.5645252, 0.075170115...","[-3.1023426, 0.20798288, 2.0645509, -1.3290219...","[52.698715, 70.08499, 108.1978, 138.63322, 81....",31.532207,1.668708,3.080636,86.615898,105.785645,1.609540,"[-0.9435033, -0.7769106, 0.31706303, 0.4937684...",0,1.179015,0.0


In [None]:
for i in range(len(train_tree['jet_pT'])):
    train_tree['jet_eta'][i], train_tree['jet_phi'][i], train_tree['jet_E'][i], train_tree['jet_CvsB'][i], train_tree['jet_pT'][i] = sum(train_tree['jet_eta'][i]), sum(train_tree['jet_phi'][i]), sum(train_tree['jet_E'][i]), sum(train_tree['jet_CvsB'][i]), sum(train_tree['jet_pT'][i])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [None]:
train_tree

In [24]:
y_pred = train_tree['result']
train_tree = train_tree.drop('result',1)

In [29]:
def preprocess_data(X, scaler=None):
    if not scaler:
        scaler = StandardScaler()
        scaler.fit(X)
    X = scaler.transform(X)
    return X, scaler

In [39]:
max([len(x) for x in train_tree['jet_pT']])

17

In [40]:
min([len(x) for x in train_tree['jet_pT']])

0