In [3]:
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
from plotly.offline import init_notebook_mode, iplot
import numpy as np

init_notebook_mode(connected=True)


def get_results(model):
    return list(np.array(model.history).reshape(len(model.history)))

In [1]:
## Higgs dataset

import pandas as pd

df = pd.read_csv('HIGGS.csv', nrows=300000)
df.columns = ['label', 'lepton pT', 'lepton eta', 'lepton phi', 'missing energy magnitude', 'missing energy phi', 'jet 1 pt', 'jet 1 eta', 'jet 1 phi', 'jet 1 b-tag', 'jet 2 pt', 'jet 2 eta', 'jet 2 phi', 'jet 2 b-tag', 'jet 3 pt', 'jet 3 eta', 'jet 3 phi', 'jet 3 b-tag', 'jet 4 pt', 'jet 4 eta', 'jet 4 phi', 'jet 4 b-tag', 'm_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']
df["label"] = df["label"].apply(int)

df_train = df

X = df_train.iloc[:, 1:].to_numpy()
y = df_train['label'].to_numpy()

# Pre-processing

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

sc = StandardScaler()
X_std = sc.fit_transform(X)

X_t, X_test, y_t, y_test = train_test_split(X_std, y, test_size=0.20, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_t, y_t, test_size=0.20, random_state=42)

print(X_train.shape, X_val.shape, X_test.shape)
print(y_train.shape, y_val.shape, y_test.shape)

(192000, 28) (48000, 28) (60000, 28)
(192000,) (48000,) (60000,)


In [4]:
from py_boost import GradientBoosting
import time

m_d=8
eval_sets=[{'X': X_train, 'y': y_train},]
## Order 2

start = time.time()

model2 = GradientBoosting(loss='bce', ntrees=10000, max_depth=m_d, lambda_l2=0.001, verbose=1000)
model2.fit(X_train, y_train, eval_sets=eval_sets)

end = time.time()
time2 = end - start
time_per_iter = time2 / len(get_results(model2))

print(f"time per iteration: {time_per_iter}")

[13:23:52] Stdout logging level is INFO.
[13:23:52] GDBT train starts. Max iter 10000, early stopping rounds 100
[13:23:52] Iter 0; Sample 0, BCE = 0.6807392195769906; 
[13:24:07] Iter 1000; Sample 0, BCE = 0.3011814162915884; 
[13:24:22] Iter 2000; Sample 0, BCE = 0.1848192891043696; 
[13:24:36] Iter 3000; Sample 0, BCE = 0.11393504194846692; 
[13:24:50] Iter 4000; Sample 0, BCE = 0.06883267563064051; 
[13:25:04] Iter 5000; Sample 0, BCE = 0.04175218486565787; 
[13:25:18] Iter 6000; Sample 0, BCE = 0.025579250367220287; 
[13:25:32] Iter 7000; Sample 0, BCE = 0.015260950897855535; 
[13:25:45] Iter 8000; Sample 0, BCE = 0.00930962845560892; 
[13:25:59] Iter 9000; Sample 0, BCE = 0.005702202016376766; 
[13:26:14] Iter 9999; Sample 0, BCE = 0.003463196877648185; 
time per iteration: 0.014185046601295472
