In [1]:
# Automatically reload modules after executing each cell.
%load_ext autoreload
%autoreload 2

In [2]:
# General imports
import os
import tensorflow as tf
from scipy import stats
from xgboost import XGBClassifier

# Utility imports
from utils.losses import *
from utils.plotting import *
from utils.training import *

np.random.seed(666) # Need to do more to ensure data is the same across runs.

In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "3" # pick a number < 4 on ML4HEP; < 3 on Voltan 
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

# $d = 1$

In [4]:
# Experiment parameters
num = 0
reps = 100
d = 1
Ns = 10**np.arange(2, 8)

# Model parameters
bce_params = {'loss':bce, 'd': d}

filestr = 'models/trees/{}/set_{}/'.format(d, num)
bce_filestr = filestr + 'bce/model_{}_{}.h5'
bdt_filestr = filestr + 'bdt/model_{}_{}.h5'

if not os.path.isdir(filestr):
    os.mkdir(filestr)

if not os.path.isdir(filestr + 'bce/'):
    os.mkdir(filestr + 'bce/')
    
if not os.path.isdir(filestr + 'bdt/'):
    os.mkdir(filestr + 'bdt/')

# Data parameters
X = np.load('data/trees/{}/X_trn.npy'.format(d)).reshape(-1, 1)
y = np.load('data/trees/{}/y_trn.npy'.format(d)).astype('float32')

In [None]:
for N in Ns:
    print('===================================================\n{}'.format(N))
    # Take the first N samples.
    data, m, s = split_data(X[:N], y[:N])
    
    for i in range(reps):
        print(i, end = ':\t')
        # Train BCE model
        bce_model, trace = train(data, **bce_params)
        
        # Train BDT model
        bdt_model = XGBClassifier(early_stopping_rounds = 10)
        X_trn, X_vld, y_trn, y_vld = data
        bdt_model.fit(X_trn, y_trn, eval_set = [(X_vld, y_vld)], verbose = 0)
        trace = bdt_model.evals_result()['validation_0']
        print(trace['logloss'][-1], '\t', len(trace['logloss']), end = '\n')
        
        bce_model.save_weights(bce_filestr.format(N, i))
        bdt_model.save_model(bdt_filestr.format(N, i))

100
0:	0.736082136631012 	 11	0.6216746115684509 	 24
1:	0.7378633618354797 	 11	0.6216746115684509 	 24
2:	0.7381986975669861 	 11	0.6216746115684509 	 24
3:	0.7234733700752258 	 11	0.6216746115684509 	 24
4:	0.7252333164215088 	 12	0.6216746115684509 	 24
5:	0.7472240924835205 	 11	0.6216746115684509 	 24
6:	0.747083306312561 	 11	0.6216746115684509 	 24
7:	0.733532726764679 	 12	0.6216746115684509 	 24
8:	0.7416378855705261 	 13	0.6216746115684509 	 24
9:	0.7370964884757996 	 11	0.6216746115684509 	 24
10:	0.7291405200958252 	 12	0.6216746115684509 	 24
11:	0.7292354106903076 	 11	0.6216746115684509 	 24
12:	0.7382006049156189 	 11	0.6216746115684509 	 24
13:	0.7446950674057007 	 11	0.6216746115684509 	 24
14:	0.7312747836112976 	 11	0.6216746115684509 	 24
15:	0.7449672222137451 	 11	0.6216746115684509 	 24
16:	

# $d = 2$

In [None]:
# Experiment parameters
num = 0
reps = 100
d = 2
Ns = 10**np.arange(2, 8)

# Model parameters
bce_params = {'loss':bce, 'd': d}

filestr = 'models/trees/{}/set_{}/'.format(d, num)
bce_filestr = filestr + 'bce/model_{}_{}.h5'
bdt_filestr = filestr + 'bdt/model_{}_{}.h5'

if not os.path.isdir(filestr):
    os.mkdir(filestr)

if not os.path.isdir(filestr + 'bce/'):
    os.mkdir(filestr + 'bce/')
    
if not os.path.isdir(filestr + 'bdt/'):
    os.mkdir(filestr + 'bdt/')

# Data parameters
X = np.load('data/trees/{}/X_trn.npy'.format(d)).reshape(-1, 1)
y = np.load('data/trees/{}/y_trn.npy'.format(d)).astype('float32')

In [None]:
for N in Ns:
    print('===================================================\n{}'.format(N))
    # Take the first N samples.
    data, m, s = split_data(X[:N], y[:N])
    
    for i in range(reps):
        print(i, end = ':\t')
        # Train BCE model
        bce_model, trace = train(data, **bce_params)
        
        # Train BDT model
        bdt_model = XGBClassifier(early_stopping_rounds = 10)
        X_trn, X_vld, y_trn, y_vld = data
        bdt_model.fit(X_trn, y_trn, eval_set = [(X_vld, y_vld)], verbose = 0)
        trace = bdt_model.evals_result()['validation_0']
        print(trace['logloss'][-1], '\t', len(trace['logloss']), end = '\n')
        
        bce_model.save_weights(bce_filestr.format(N, i))
        bdt_model.save_model(bdt_filestr.format(N, i))

# $d=4$

In [None]:
# Experiment parameters
num = 0
reps = 100
d = 4
Ns = 10**np.arange(2, 8)

# Model parameters
bce_params = {'loss':bce, 'd': d}

filestr = 'models/trees/{}/set_{}/'.format(d, num)
bce_filestr = filestr + 'bce/model_{}_{}.h5'
bdt_filestr = filestr + 'bdt/model_{}_{}.h5'

if not os.path.isdir(filestr):
    os.mkdir(filestr)

if not os.path.isdir(filestr + 'bce/'):
    os.mkdir(filestr + 'bce/')
    
if not os.path.isdir(filestr + 'bdt/'):
    os.mkdir(filestr + 'bdt/')

# Data parameters
X = np.load('data/trees/{}/X_trn.npy'.format(d)).reshape(-1, 1)
y = np.load('data/trees/{}/y_trn.npy'.format(d)).astype('float32')

In [None]:
for N in Ns:
    print('===================================================\n{}'.format(N))
    # Take the first N samples.
    data, m, s = split_data(X[:N], y[:N])
    
    for i in range(reps):
        print(i, end = ':\t')
        # Train BCE model
        bce_model, trace = train(data, **bce_params)
        
        # Train BDT model
        bdt_model = XGBClassifier(early_stopping_rounds = 10)
        X_trn, X_vld, y_trn, y_vld = data
        bdt_model.fit(X_trn, y_trn, eval_set = [(X_vld, y_vld)], verbose = 0)
        trace = bdt_model.evals_result()['validation_0']
        print(trace['logloss'][-1], '\t', len(trace['logloss']), end = '\n')
        
        bce_model.save_weights(bce_filestr.format(N, i))
        bdt_model.save_model(bdt_filestr.format(N, i))

# $d=8$

In [None]:
# Experiment parameters
num = 0
reps = 100
d = 8
Ns = 10**np.arange(2, 8)

# Model parameters
bce_params = {'loss':bce, 'd': d}

filestr = 'models/trees/{}/set_{}/'.format(d, num)
bce_filestr = filestr + 'bce/model_{}_{}.h5'
bdt_filestr = filestr + 'bdt/model_{}_{}.h5'

if not os.path.isdir(filestr):
    os.mkdir(filestr)

if not os.path.isdir(filestr + 'bce/'):
    os.mkdir(filestr + 'bce/')
    
if not os.path.isdir(filestr + 'bdt/'):
    os.mkdir(filestr + 'bdt/')

# Data parameters
X = np.load('data/trees/{}/X_trn.npy'.format(d)).reshape(-1, 1)
y = np.load('data/trees/{}/y_trn.npy'.format(d)).astype('float32')

In [None]:
for N in Ns:
    print('===================================================\n{}'.format(N))
    # Take the first N samples.
    data, m, s = split_data(X[:N], y[:N])
    
    for i in range(reps):
        print(i, end = ':\t')
        # Train BCE model
        bce_model, trace = train(data, **bce_params)
        
        # Train BDT model
        bdt_model = XGBClassifier(early_stopping_rounds = 10)
        X_trn, X_vld, y_trn, y_vld = data
        bdt_model.fit(X_trn, y_trn, eval_set = [(X_vld, y_vld)], verbose = 0)
        trace = bdt_model.evals_result()['validation_0']
        print(trace['logloss'][-1], '\t', len(trace['logloss']), end = '\n')
        
        bce_model.save_weights(bce_filestr.format(N, i))
        bdt_model.save_model(bdt_filestr.format(N, i))

# $d=16$

In [None]:
# Experiment parameters
num = 0
reps = 100
d = 16
Ns = 10**np.arange(2, 8)

# Model parameters
bce_params = {'loss':bce, 'd': d}

filestr = 'models/trees/{}/set_{}/'.format(d, num)
bce_filestr = filestr + 'bce/model_{}_{}.h5'
bdt_filestr = filestr + 'bdt/model_{}_{}.h5'

if not os.path.isdir(filestr):
    os.mkdir(filestr)

if not os.path.isdir(filestr + 'bce/'):
    os.mkdir(filestr + 'bce/')
    
if not os.path.isdir(filestr + 'bdt/'):
    os.mkdir(filestr + 'bdt/')

# Data parameters
X = np.load('data/trees/{}/X_trn.npy'.format(d)).reshape(-1, 1)
y = np.load('data/trees/{}/y_trn.npy'.format(d)).astype('float32')

In [None]:
for N in Ns:
    print('===================================================\n{}'.format(N))
    # Take the first N samples.
    data, m, s = split_data(X[:N], y[:N])
    
    for i in range(reps):
        print(i, end = ':\t')
        # Train BCE model
        bce_model, trace = train(data, **bce_params)
        
        # Train BDT model
        bdt_model = XGBClassifier(early_stopping_rounds = 10)
        X_trn, X_vld, y_trn, y_vld = data
        bdt_model.fit(X_trn, y_trn, eval_set = [(X_vld, y_vld)], verbose = 0)
        trace = bdt_model.evals_result()['validation_0']
        print(trace['logloss'][-1], '\t', len(trace['logloss']), end = '\n')
        
        bce_model.save_weights(bce_filestr.format(N, i))
        bdt_model.save_model(bdt_filestr.format(N, i))