In [1]:
from sklearn.model_selection import cross_validate, train_test_split, StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
import optuna
from optuna import Trial, visualization
from optuna.samplers import TPESampler

random_state = 101
path_import_and_export = "../../../../Thesis_data/processed_data/"

In [None]:
def objective_rf(trial):
    params = {
        "criterion": trial.suggest_categorical("criterion", ["entropy", "gini"]),
        "max_depth": trial.suggest_int("max_depth", 10, 200, step=10),
        "max_features": trial.suggest_categorical("max_features", ['auto', 'sqrt','log2', None]),
        "min_samples_leaf": trial.suggest_float("min_samples_leaf", 0, 0.5, step=0.1),
        "min_samples_split": trial.suggest_float("min_samples_split", 0, 1),
        "n_estimators": trial.suggest_int("n_estimators", 10, 810, step=50),
    }

    model_rfClass = RandomForestClassifier(**params, random_state=101, n_jobs=-1) #enable_categorical=True

    skfold = StratifiedKFold(n_splits=3, random_state=101, shuffle=True)
    auc_scores = []

    for train_index, val_index in skfold.split(X_train, y_train):
        X_train_fold, y_train_fold = X_train.iloc[train_index], y_train.iloc[train_index]
        X_val_fold, y_val_fold = X_train.iloc[val_index], y_train.iloc[val_index]

        model_rfClass.fit(X_train_fold, y_train_fold)
        y_pred = model_rfClass.predict(X_val_fold)
        auc_scores.append(roc_auc_score(y_val_fold, y_pred))

    return np.mean(auc_scores)

In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(objective_rf, n_trials=1)

In [None]:
best_params = study.best_params
final_rf_model = RandomForestClassifier(**best_params, random_state=101, n_jobs=-1, verbose=2)
final_rf_model.fit(X_train, y_train)

In [None]:
y_pred = final_rf_model.predict(X_test)
test_auroc = roc_auc_score(y_test, y_pred)

In [None]:
print(test_auroc)