In [1]:
import ROOT
import root_numpy as rnpy
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn
import pickle

Welcome to ROOTaaS 6.06/08


In [2]:
batch_size_jets = 25
batch_size_tracks = 25
read_pos_jets = 0
read_pos_tracks = 0

In [3]:
d1 = pd.DataFrame(rnpy.root2array("/mnt/t3nfs01/data01/shome/jpata/btv/gc/TagVarExtractor/GCa08e5e237323/TT_TuneCUETP8M1_13TeV-powheg-pythia8/job_0_out.root",
                                 treename = "tagVars/ttree", start = read_pos_jets, stop = read_pos_jets + batch_size_jets))

In [4]:
d2 = pd.DataFrame(rnpy.root2array("/mnt/t3nfs01/data01/shome/jpata/btv/gc/TagVarExtractor/GCa08e5e237323/TT_TuneCUETP8M1_13TeV-powheg-pythia8/job_0_out.root",
                                 treename = "tagVars/ttree_track", start = read_pos_tracks, stop = read_pos_tracks + batch_size_tracks))

  warn_missing_tree)


In [5]:
last_tracks = (int)(d2.tail(1)['Track_jetIndex'].iloc[0]-1)

In [6]:
last_jet = (int)(d1.tail(1)['Jet_jetIndex'].iloc[0]-1)

In [7]:
read_pos_jets += (d1.loc[d1['Jet_jetIndex'] == last_tracks].index[-1] + 1)
read_pos_tracks += (d2.loc[d2['Track_jetIndex'] == last_tracks].index[-1] + 1)

In [8]:
d1['track_data'] = pd.np.empty((len(d1.index), 0)).tolist()

In [9]:
# iterate over the track list to join jets with the tracks belonging to them
for irow, row in d2.iterrows():
    # these are the track data of the current track:
    tracks = row[["Track_pt", "Track_eta", "Track_phi", "Track_dxy", "Track_dz", "Track_IP", "Track_IP2D", "Track_length"]].as_matrix()
    jet_index = int(row["Track_jetIndex"])
    if jet_index > last_tracks:
        break
    table_index = d1.loc[d1['Jet_jetIndex'] == jet_index].index[0]
    
    # append the tracks data to the matching jet in the main table
    d1['track_data'][table_index].append(tracks)

In [10]:
def get_max_tracks(data):
    retval = 0
    for cur in data['track_data']:
        if len(cur) > retval:
            retval = len(cur)
    return retval

In [11]:
def equalize_tracks(data, set_tracks):
    empty = np.full(8, 0, float)
    for idx, cur in enumerate(data['track_data']):
        # take only these that are non-empty track lists
        if(len(cur) > 0):
            for i in range(set_tracks - len(cur)):
                data['track_data'][idx].append(empty)

In [12]:
def create_track_columns(set_tracks, number_parameters):
    colnames = []
    for i in range(set_tracks * number_parameters):
        colnames.append('T' + str(i))
    return colnames

In [13]:
def create_track_table(data):
    set_tracks = len(data['track_data'][0])
    number_parameters = len(data['track_data'][0][0])
    
    tracks = []
    colnames = create_track_columns(set_tracks, number_parameters)
    
    for cur in data['track_data']:
        arr = np.array(cur)
        tracks.append(arr.flatten())
        
    return pd.DataFrame(tracks, columns=colnames)

In [14]:
def create_track_list(table, set_tracks, number_parameters):
    number_jets = len(table)
    cols = create_track_columns(set_tracks, number_parameters)

    # extract raw matrix
    tracks = table.ix[:,cols].as_matrix()
    return tracks.reshape(number_jets, -1, number_parameters)

In [15]:
set_tracks = 10
number_parameters = 8

In [22]:
equalize_tracks(d1, set_tracks)

In [23]:
track_table = create_track_table(d1)

In [24]:
joined = pd.concat([d1.ix[:,0:-1], track_table], axis = 1)

In [25]:
tracks_reconstructed = create_track_list(joined, set_tracks, number_parameters)

In [42]:
matched = d1.loc[d1['Jet_jetIndex'] < 30]

In [131]:
def save_dataset(file, data):
    store = pd.HDFStore(file)
    store.put('data', data, format = 'table')
    store.close()

In [517]:
save_dataset('./padded_test.h5', joined)

In [38]:
# now divide the jets and put them in separate lists, according to their flavour
jets_b = []
jets_l = []
jets_c = []

# iterate over the jet list, with already matched tracks
for irow, row in d1.iterrows():
    jet_index = int(row["Jet_jetIndex"])
    if jet_index > last_tracks:
        break
    
    flavour = int(row["Jet_flavour"])
    
    # select the right list this jet belongs to
    if abs(flavour) == 5:
        jets = jets_b
    elif abs(flavour) == 4:
        jets = jets_c
    else:
        jets = jets_l
        
    # add the new jet to the list
    jets += [(row["Jet_pt"], row["Jet_eta"], row["Jet_phi"], row["Jet_mass"], flavour, row["track_data"])]

In [76]:
def read_metadata(store):
    return store.get_storer('data').attrs.metadata

In [110]:
with pd.HDFStore('/shome/phwindis/data/matched/1.h5') as store:
    metadata = read_metadata(store)
number_tracks = metadata['number_tracks']

In [112]:
readin = pd.read_hdf('/shome/phwindis/data/matched/1.h5')

In [114]:
tracks_reconstructed = create_track_list(readin, number_tracks, number_parameters)

In [119]:
tracks_reconstructed[-3]

array([[  1.18945312e+00,  -3.87829214e-01,   5.48250735e-01,
         -1.05224608e-03,  -2.87841796e-03,   2.87432224e-03,
         -1.05225050e-03,   1.00191003e-02],
       [  4.75000000e+00,  -6.38691366e-01,   5.50014794e-01,
          4.85351542e-03,   5.37109375e-03,  -6.57487568e-03,
          4.85352008e-03,   6.74351156e-02],
       [  8.97656250e+00,  -6.59199834e-01,   6.07341707e-01,
          3.51562491e-03,  -7.16796890e-03,   6.82539213e-03,
         -3.51562840e-03,   2.47787423e-02],
       [  6.42968750e+00,  -6.67439818e-01,   5.35174072e-01,
          3.08105466e-03,  -9.19921882e-03,   8.08249228e-03,
          3.08105606e-03,   2.28384376e-01],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00],
       [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00,   0.0

In [113]:
readin

Unnamed: 0,Jet_jetIndex,Jet_pt,Jet_genpt,Jet_eta,Jet_phi,Jet_mass,Jet_flavour,Jet_nbHadrons,Jet_JP,Jet_JBP,...,T246,T247,T248,T249,T250,T251,T252,T253,T254,T255
0,0,28.059725,26.634577,-0.404541,-2.600135,6.500473,-5.0,1.0,0.768905,3.540938,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,20.876144,25.888498,-0.154551,2.651759,4.047904,21.0,0.0,0.197709,0.548262,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,85.293282,91.934029,-1.111687,-2.854859,16.675396,-3.0,0.0,0.023409,0.388970,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,55.315605,51.409073,-0.830396,0.505299,7.930476,5.0,1.0,1.524514,6.266030,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,49.338642,47.123440,-1.804011,-0.913971,8.289750,-5.0,1.0,0.505539,2.644597,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5,83.293488,83.498001,1.152863,0.412678,9.843184,-3.0,0.0,0.058694,0.308320,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,6,68.837479,65.600845,0.879585,1.317408,8.306872,4.0,0.0,0.965140,4.376747,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,7,42.308193,39.237881,-0.367536,-2.845827,10.817450,-5.0,1.0,0.240841,1.218917,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,8,24.858631,24.699846,0.621653,-1.356849,3.892481,21.0,0.0,0.301631,0.902341,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,9,57.202858,53.362408,0.395276,1.335062,8.331734,5.0,1.0,0.230859,1.063146,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
