In [None]:
# Setting auto reloading for imported modules
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split,
from sklearn.ensemble import GradientBoostingClassifier
from auxiliary_functions import Utils

In [None]:
utils = Utils()

In [None]:
df = pd.read_csv('../data/clean_data/df_sessions_w_feats.csv')
df.head()

In [None]:
x = df.drop(['CR'], axis=1)
y = df['CR']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

Initial model training without any fine-tuning

In [None]:
gbc = GradientBoostingClassifier()
gbc.fit(x_train, y_train)

In [None]:
y_pred = gbc.predict(x_test)
y_pred_prob = gbc.predict_proba(x_test)[:, 1]

In [None]:
metrics_result = utils.evaluate_model(y_test, y_pred, y_pred_prob)

In [None]:
utils.create_roc_auc_curve(metrics_result['roc_auc'], y_test, y_pred_prob)

Using sample weights to give more importance to the minority class to improve model performance

In [None]:
sample_weights = np.ones(len(y_train))
sample_weights[y_train == 1] = 25

gbc_weighted = GradientBoostingClassifier()
gbc_weighted.fit(x_train, y_train, sample_weight=sample_weights)

In [None]:
y_pred_weighted = gbc_weighted.predict(x_test)
y_pred_prob_weighted = gbc_weighted.predict_proba(x_test)[:, 1]

In [None]:
metrics_result_weighted = utils.evaluate_model(y_test, y_pred_weighted, y_pred_prob_weighted)

In [None]:
utils.create_roc_auc_curve(metrics_result_weighted['roc_auc'], y_test, y_pred_prob_weighted)