In [97]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, random_split, SubsetRandomSampler
from pathlib import Path
import joblib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import sys

from utils.model_dict import set_model_structure
from utils.hh_dataset import HH_Dataset_hdf5
from NN.neural_network import Model, NeuralNetwork
from utils.model_dict import set_model_structure
import uproot
import pandas as pd
import awkward as ak
from pathlib import Path
import multiprocessing as mp
from copy import deepcopy
import json
import seaborn as sns
import yaml

from sklearn.model_selection import StratifiedKFold, KFold, train_test_split
from sklearn.utils.class_weight import compute_class_weight
import sklearn.metrics as metrics

import ROOT
import uproot

%matplotlib widget

In [98]:
channel = '0l2tau'

features = [
        'pt_H',
        'lep_phi0_1',
        'met', 
        'lep_pt_1',
        'N_j_central',
        'Dphi_metyy'
    ]
    
if channel == '1l0tau':
    features += [
        'y1_phi0',
        'minDphi_metjl',
        'Dr_lv', 
        'Dr_yyW', 
        'eta_W',
    ]

elif channel == '0l1tau':
    features += [
        'y1_phi0',
        'y1_eta',
    ]

else:
    features += [
        'phi_H', 
        'lep_pt_2', 
        'met_phi',
        'minDphi_metjl',
        'Dphi_metll',
        'Dr_lv', 
        'm_ll',
        'Dr_ll',
        'Dphi_ll',
        'Dr_yyll',
        'Jet_pt1',
    ]

processes = [
    'SH', 
    'Sherpa', 
    'Vyy',
    'signal_ggF', 
    'signal_VBF'
]

In [99]:
with HH_Dataset_hdf5(f'../Input_Files/{channel}.hdf5') as hf:
    X = hf.tensors(features)
    y = hf.labels()
    w = hf.tensors('weight')

In [100]:
X.shape

torch.Size([20617, 17])

In [101]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

ds = TensorDataset(X, y, w)

for train_index, test_index in skf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    w_train, w_test = w[train_index], w[test_index]
    
    train_sampler = SubsetRandomSampler(train_index)
    test_sampler = SubsetRandomSampler(test_index)
    
    train_dl = DataLoader(ds, batch_size=1024, sampler=SubsetRandomSampler(train_index))
    test_dl = DataLoader(ds, batch_size=1024, sampler=SubsetRandomSampler(test_index))

In [102]:
train_sampler.indices

array([    0,     1,     2, ..., 20613, 20614, 20616])

In [103]:
for i, (x, y, w) in enumerate(test_dl):
    print(i, y)

0 tensor([1, 1, 1,  ..., 0, 1, 1])
1 tensor([1, 0, 1,  ..., 1, 0, 1])
2 tensor([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1])


In [104]:
indices = test_dl.sampler.indices

test_dl.dataset.tensors[1][indices]

tensor([1, 1, 1,  ..., 0, 0, 0])

In [105]:
dl = DataLoader(ds, batch_size=1024)

In [106]:
dl.dataset.tensors

(tensor([[ 7.5875e+01,  1.4099e-01,  6.6462e+01,  ..., -1.0000e+02,
          -1.0000e+02,  4.9757e+01],
         [ 1.2326e+02, -2.3055e+00,  6.7364e+01,  ..., -1.0000e+02,
          -1.0000e+02, -1.0000e+02],
         [ 1.4237e+02, -2.8732e+00,  4.3757e+01,  ..., -1.0000e+02,
          -1.0000e+02,  6.4320e+01],
         ...,
         [ 8.2454e+01,  1.6778e+00,  6.5940e+01,  ..., -1.0000e+02,
          -1.0000e+02, -1.0000e+02],
         [ 6.8065e+01, -1.5038e+00,  6.6608e+01,  ..., -1.0000e+02,
          -1.0000e+02, -1.0000e+02],
         [ 5.6029e+01,  1.8414e-01,  1.4011e+02,  ..., -1.0000e+02,
          -1.0000e+02,  2.1631e+02]], dtype=torch.float64),
 tensor([1, 1, 1,  ..., 0, 0, 0]),
 tensor([3.9951e-06, 3.8305e-06, 1.9007e-06,  ..., 3.8511e-11, 3.7783e-10,
         2.3908e-10], dtype=torch.float64))

In [107]:
with open('../Configs/NN/bc_1l0tau_config.yaml', 'r') as f:
    config = yaml.safe_load(f)

In [108]:
for layer in config['model']['layers']:
    print(layer)
  

{'type': 'dense', 'neurons': 2048, 'activation': 'relu'}
{'type': 'dense', 'neurons': 2048, 'activation': 'relu'}
{'type': 'dense', 'neurons': 2048, 'activation': 'relu'}
{'type': 'dense', 'neurons': 2048, 'activation': 'relu'}
{'type': 'dense', 'neurons': 2048, 'activation': 'relu'}


In [109]:
model = config['model']

md = set_model_structure(
    model['input_shape'],
    *model['layers'],
    model['output_shape'],
)

Model(md)

KeyError: 'units'

In [None]:
t = torch.randn(10)

sampler = SubsetRandomSampler(torch.arange(0, 10))

ds = TensorDataset(t)
dl = DataLoader(ds, batch_size=1, sampler=sampler)

In [None]:
for x in dl:
    print(x)

[tensor([0.3583])]
[tensor([-0.8014])]
[tensor([-0.0680])]
[tensor([-0.7626])]
[tensor([0.7407])]
[tensor([-0.0673])]
[tensor([0.7940])]
[tensor([0.5903])]
[tensor([1.2902])]
[tensor([0.7305])]


In [None]:
skf = KFold(n_splits=2, shuffle=True, random_state=42)

for train_index, test_index in skf.split(t):
    np.random.shuffle(train_index)
    np.random.shuffle(test_index)
    print(train_index, test_index)

[9 2 6 4 3] [8 0 7 1 5]
[7 8 1 0 5] [6 9 4 2 3]


In [None]:
a = np.arange(0, 10)

np.random.shuffle(a)

a

array([9, 3, 1, 4, 2, 0, 8, 5, 7, 6])