In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.special import expit
from lightgbm import LGBMClassifier
from sklearn.metrics import log_loss, accuracy_score
from sklearn.model_selection import train_test_split
from perpetual import PerpetualBooster

In [2]:
pd.set_option('display.max_rows', 1000)

In [None]:
!python --version

In [None]:
from importlib.metadata import version

print(f"numpy: {version('numpy')}")
print(f"optuna: {version('optuna')}")
print(f"lightgbm: {version('lightgbm')}")
print(f"scikit-learn: {version('scikit-learn')}")
print(f"perpetual: {version('perpetual')}")

In [5]:
df = sns.load_dataset("titanic")

In [6]:
df.drop(columns=["alive"], inplace=True)

In [7]:
X = df.drop(columns=["survived"])
y = df["survived"]

In [None]:
X.shape

In [None]:
X.dtypes

In [None]:
X.nunique()

In [None]:
X.head()

In [None]:
X["sex"] = pd.get_dummies(X["sex"], drop_first=True, dtype=float).to_numpy()
X["adult_male"] = pd.get_dummies(X["adult_male"], drop_first=True, dtype=float).to_numpy()
# X["alive"] = pd.get_dummies(X["alive"], drop_first=True, dtype=float).to_numpy()
X["alone"] = pd.get_dummies(X["alone"], drop_first=True, dtype=float).to_numpy()
cols = ['pclass', 'sibsp', 'parch', 'embarked', 'class', 'who', 'deck', 'embark_town', 'age', 'fare']
X[cols] = X[cols].astype('category')
X.head()

In [13]:
seed = 42
n_estimators = 100
n_trials = 1

In [14]:
scoring = "neg_log_loss"
metric_function = log_loss
metric_name = "log_loss"
objective_type = "LogLoss"

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

print(f"X_train.shape: {X_train.shape}")
print(f"X_test.shape: {X_test.shape}")

In [None]:
X_train.head()

In [None]:
set(X_train["who"])

In [None]:
model_lgbm = LGBMClassifier(objective="binary")
model_lgbm.fit(X_train, y_train)

In [None]:
model = PerpetualBooster(objective="LogLoss")
model.fit(X_train, y_train, budget=0.1)

In [None]:
y_pred = np.round(expit(model.predict(X_test)))
print(accuracy_score(y_test, y_pred))

In [None]:
y_pred = np.round(expit(model.predict(X_train)))
print(accuracy_score(y_train, y_pred))

In [None]:
if metric_name == "log_loss":
    y_pred = expit(model.predict(X_test))
else:
    y_pred = np.round(expit(model.predict(X_test)))
print(f"Test {metric_name}: {round(metric_function(y_test, y_pred), 6)}")

In [23]:
df_trees = model.trees_to_dataframe()

In [None]:
df_trees.head(10)

In [None]:
model_lgbm.booster_.trees_to_dataframe().head(10)