In [1]:
# run model for predict f1 races

#imports 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, log_loss, mean_squared_error

In [2]:
# get data from csv
raw_data = pd.read_csv('../../datasets/f1_data.csv')
raw_data.head(20)

Unnamed: 0,driver_number,broadcast_name,country_code_x,year,session_name,date_start,time_start,date_end,time_end,gmt_offset,starting_position,wins_before,pit_stops_count,avg_pit_stop_duration,qualifying_position,position_category
0,1,M VERSTAPPEN,NED,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,15,1,0,0.0,1,top3
1,2,L SARGEANT,USA,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,20,0,0,0.0,16,no_points
2,4,L NORRIS,GBR,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,19,0,0,0.0,7,no_points
3,10,P GASLY,FRA,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,9,0,0,0.0,9,points
4,11,S PEREZ,MEX,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,1,0,0,0.0,2,winner
5,14,F ALONSO,ESP,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,2,0,0,0.0,3,top3
6,16,C LECLERC,MON,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,12,0,0,0.0,6,points
7,18,L STROLL,CAN,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,5,0,0,0.0,4,no_points
8,20,K MAGNUSSEN,DEN,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,13,0,0,0.0,17,points
9,21,N DE VRIES,NED,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,18,0,0,0.0,20,no_points


In [3]:
# split into X and y
# data without position category 
X = raw_data.drop(columns=['position_category'])
y = raw_data['position_category']

In [4]:
# wyswitel informacje o zbiorze danych
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

#wyswietl cechy
print("Features in X:", X.columns.tolist())
# wyswietl unikalne kategorie pozycji
print("Unique position categories in y:", y.unique())
print(X.dtypes)

Shape of X: (1635, 15)
Shape of y: (1635,)
Features in X: ['driver_number', 'broadcast_name', 'country_code_x', 'year', 'session_name', 'date_start', 'time_start', 'date_end', 'time_end', 'gmt_offset', 'starting_position', 'wins_before', 'pit_stops_count', 'avg_pit_stop_duration', 'qualifying_position']
Unique position categories in y: ['top3' 'no_points' 'points' 'winner']
driver_number              int64
broadcast_name            object
country_code_x            object
year                       int64
session_name              object
date_start                object
time_start                object
date_end                  object
time_end                  object
gmt_offset                object
starting_position          int64
wins_before                int64
pit_stops_count            int64
avg_pit_stop_duration    float64
qualifying_position        int64
dtype: object


In [5]:
# encode data
from sklearn.preprocessing import LabelEncoder, StandardScaler

for col in X.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])

In [6]:
#scaling data 
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
#split into train and validation sets

X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, test_size=0.3, stratify=y, random_state=42)

In [8]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

In [9]:
# compute specificity   
def compute_specificity(conf_mat):
    specificity = []
    for i in range(conf_mat.shape[0]):
        TP = conf_mat[i, i]
        FP = conf_mat[:, i].sum() - TP
        FN = conf_mat[i, :].sum() - TP
        TN = conf_mat.sum() - (TP + FP + FN)
        spec = TN / (TN + FP) if (TN + FP) > 0 else 0
        specificity.append(spec)
    return specificity

In [10]:
# hyperparameter tuning with Optuna

def objective(trial):
    # hyperparameters to tune
    rf_n_estimators = trial.suggest_int("rf_n_estimators", 50, 150)
    rf_max_depth = trial.suggest_int("rf_max_depth", 3, 15)

    gb_n_estimators = trial.suggest_int("gb_n_estimators", 50, 150)
    gb_learning_rate = trial.suggest_float("gb_learning_rate", 0.01, 0.3)
    gb_max_depth = trial.suggest_int("gb_max_depth", 2, 10)

    svm_C = trial.suggest_float("svm_C", 0.1, 10.0, log=True)
    svm_kernel = trial.suggest_categorical("svm_kernel", ["linear", "rbf"])

    # models to ensemble
    clf_rf = RandomForestClassifier(n_estimators=rf_n_estimators, max_depth=rf_max_depth, random_state=42)
    clf_gb = GradientBoostingClassifier(n_estimators=gb_n_estimators, learning_rate=gb_learning_rate,
                                        max_depth=gb_max_depth, random_state=42)
    clf_svm = SVC(C=svm_C, kernel=svm_kernel, probability=True, random_state=42)

    # ensemble model 
    ensemble = VotingClassifier(
        estimators=[('rf', clf_rf), ('gb', clf_gb), ('svm', clf_svm)],
        voting='soft'
    )

    # encode labels
    le = LabelEncoder()
    y_train_enc = le.fit_transform(y_train)
    y_valid_enc = le.transform(y_valid)

    class_labels = le.classes_
    print("Klasy w macierzy pomyÅ‚ek:", class_labels)

    # cross-validation setup
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    # lists to store metrics for each fold
    acc_list, prec_list, rec_list, f1_list, spec_list = [], [], [], [], []
    mse_list, rmse_list, logloss_list = [], [], []

    # new metrics lists
    prec_macro_list, prec_weighted_list = [], []
    rec_macro_list, rec_weighted_list = [], []
    f1_macro_list, f1_weighted_list = [], []

    # globally summed confusion matrix
    conf_mat_sum = np.zeros((len(np.unique(y_train_enc)), len(np.unique(y_train_enc))), dtype=int)

    # cross-validation loop
    for train_index, test_index in kf.split(X_train, y_train_enc):
        X_tr, X_te = X_train[train_index], X_train[test_index]
        y_tr, y_te = y_train_enc[train_index], y_train_enc[test_index]

        ensemble.fit(X_tr, y_tr)
        y_pred_cv = ensemble.predict(X_te)
        y_proba_cv = ensemble.predict_proba(X_te)

        conf_mat_cv = confusion_matrix(y_te, y_pred_cv)
        conf_mat_sum += conf_mat_cv  # each fold summed to global confusion matrix

        acc_list.append(accuracy_score(y_te, y_pred_cv))
        prec_list.append(precision_score(y_te, y_pred_cv, average="micro", zero_division=0))
        rec_list.append(recall_score(y_te, y_pred_cv, average="micro", zero_division=0))
        f1_list.append(f1_score(y_te, y_pred_cv, average="micro", zero_division=0))

        # new metrics calculations
        prec_macro_list.append(precision_score(y_te, y_pred_cv, average="macro", zero_division=0))
        prec_weighted_list.append(precision_score(y_te, y_pred_cv, average="weighted", zero_division=0))

        rec_macro_list.append(recall_score(y_te, y_pred_cv, average="macro", zero_division=0))
        rec_weighted_list.append(recall_score(y_te, y_pred_cv, average="weighted", zero_division=0))

        f1_macro_list.append(f1_score(y_te, y_pred_cv, average="macro", zero_division=0))
        f1_weighted_list.append(f1_score(y_te, y_pred_cv, average="weighted", zero_division=0))

        spec_cv = np.mean(compute_specificity(conf_mat_cv))
        spec_list.append(spec_cv)

        mse_list.append(mean_squared_error(y_te, y_pred_cv))
        rmse_list.append(np.sqrt(mse_list[-1]))
        logloss_list.append(log_loss(y_te, y_proba_cv))

    # average and std of metrics across folds
    acc, acc_std = np.mean(acc_list), np.std(acc_list, ddof=0)
    prec, prec_std = np.mean(prec_list), np.std(prec_list, ddof=0)
    rec, rec_std = np.mean(rec_list), np.std(rec_list, ddof=0)
    f1, f1_std = np.mean(f1_list), np.std(f1_list, ddof=0)
    spec, spec_std = np.mean(spec_list), np.std(spec_list, ddof=0)

    mse, mse_std = np.mean(mse_list), np.std(mse_list, ddof=0)
    rmse, rmse_std = np.mean(rmse_list), np.std(rmse_list, ddof=0)
    logloss, logloss_std = np.mean(logloss_list), np.std(logloss_list, ddof=0)

    # new metrics averages and stds
    prec_macro, prec_macro_std = np.mean(prec_macro_list), np.std(prec_macro_list, ddof=0)
    prec_weighted, prec_weighted_std = np.mean(prec_weighted_list), np.std(prec_weighted_list, ddof=0)

    rec_macro, rec_macro_std = np.mean(rec_macro_list), np.std(rec_macro_list, ddof=0)
    rec_weighted, rec_weighted_std = np.mean(rec_weighted_list), np.std(rec_weighted_list, ddof=0)

    f1_macro, f1_macro_std = np.mean(f1_macro_list), np.std(f1_macro_list, ddof=0)
    f1_weighted, f1_weighted_std = np.mean(f1_weighted_list), np.std(f1_weighted_list, ddof=0)

    # set metrics to return 
    trial.set_user_attr("accuracy", acc)
    trial.set_user_attr("accuracy_std", acc_std)
    trial.set_user_attr("precision", prec)
    trial.set_user_attr("precision_std", prec_std)
    trial.set_user_attr("recall", rec)
    trial.set_user_attr("recall_std", rec_std)
    trial.set_user_attr("f1_score", f1)
    trial.set_user_attr("f1_std", f1_std)
    trial.set_user_attr("specificity_avg", spec)
    trial.set_user_attr("specificity_std", spec_std)

    trial.set_user_attr("mse", mse)
    trial.set_user_attr("mse_std", mse_std)
    trial.set_user_attr("rmse", rmse)
    trial.set_user_attr("rmse_std", rmse_std)
    trial.set_user_attr("logloss", logloss)
    trial.set_user_attr("logloss_std", logloss_std)

    # set new metrics to return
    trial.set_user_attr("precision_macro", prec_macro)
    trial.set_user_attr("precision_macro_std", prec_macro_std)
    trial.set_user_attr("precision_weighted", prec_weighted)
    trial.set_user_attr("precision_weighted_std", prec_weighted_std)

    trial.set_user_attr("recall_macro", rec_macro)
    trial.set_user_attr("recall_macro_std", rec_macro_std)
    trial.set_user_attr("recall_weighted", rec_weighted)
    trial.set_user_attr("recall_weighted_std", rec_weighted_std)

    trial.set_user_attr("f1_macro", f1_macro)
    trial.set_user_attr("f1_macro_std", f1_macro_std)
    trial.set_user_attr("f1_weighted", f1_weighted)
    trial.set_user_attr("f1_weighted_std", f1_weighted_std)

    # save confusion matrix
    trial.set_user_attr("confusion_matrix", conf_mat_sum.tolist())

    # return metric to optimize
    return f1  # or acc if you are optimizing accuracy


In [11]:
import optuna

# run the optimization
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

  from .autonotebook import tqdm as notebook_tqdm
[I 2025-09-21 22:36:01,850] A new study created in memory with name: no-name-25668c09-bfd0-491f-8afe-4cbb7139ccef


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:36:06,107] Trial 0 finished with value: 0.7691948211139202 and parameters: {'rf_n_estimators': 122, 'rf_max_depth': 15, 'gb_n_estimators': 67, 'gb_learning_rate': 0.03954693916454726, 'gb_max_depth': 4, 'svm_C': 2.313609966033525, 'svm_kernel': 'rbf'}. Best is trial 0 with value: 0.7691948211139202.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:36:12,575] Trial 1 finished with value: 0.8015475369646824 and parameters: {'rf_n_estimators': 136, 'rf_max_depth': 13, 'gb_n_estimators': 68, 'gb_learning_rate': 0.19175708504361783, 'gb_max_depth': 6, 'svm_C': 7.147781815466811, 'svm_kernel': 'rbf'}. Best is trial 1 with value: 0.8015475369646824.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:36:39,945] Trial 2 finished with value: 0.7718417222094537 and parameters: {'rf_n_estimators': 54, 'rf_max_depth': 4, 'gb_n_estimators': 131, 'gb_learning_rate': 0.014978056595348756, 'gb_max_depth': 10, 'svm_C': 5.420546660924338, 'svm_kernel': 'linear'}. Best is trial 1 with value: 0.8015475369646824.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:37:00,673] Trial 3 finished with value: 0.8076802267677928 and parameters: {'rf_n_estimators': 57, 'rf_max_depth': 11, 'gb_n_estimators': 98, 'gb_learning_rate': 0.06658209640396702, 'gb_max_depth': 10, 'svm_C': 2.5326040636381366, 'svm_kernel': 'linear'}. Best is trial 3 with value: 0.8076802267677928.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:37:07,435] Trial 4 finished with value: 0.8024094077989735 and parameters: {'rf_n_estimators': 73, 'rf_max_depth': 10, 'gb_n_estimators': 142, 'gb_learning_rate': 0.2653525309810649, 'gb_max_depth': 4, 'svm_C': 1.0559435002802147, 'svm_kernel': 'rbf'}. Best is trial 3 with value: 0.8076802267677928.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:37:29,071] Trial 5 finished with value: 0.8006856661303916 and parameters: {'rf_n_estimators': 121, 'rf_max_depth': 7, 'gb_n_estimators': 130, 'gb_learning_rate': 0.09554832013738912, 'gb_max_depth': 9, 'svm_C': 3.524587440954072, 'svm_kernel': 'rbf'}. Best is trial 3 with value: 0.8076802267677928.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:37:39,914] Trial 6 finished with value: 0.7989274496284379 and parameters: {'rf_n_estimators': 105, 'rf_max_depth': 7, 'gb_n_estimators': 137, 'gb_learning_rate': 0.09347732842120801, 'gb_max_depth': 6, 'svm_C': 9.545457241428032, 'svm_kernel': 'rbf'}. Best is trial 3 with value: 0.8076802267677928.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:37:46,973] Trial 7 finished with value: 0.7919290584539953 and parameters: {'rf_n_estimators': 71, 'rf_max_depth': 8, 'gb_n_estimators': 120, 'gb_learning_rate': 0.160915195311529, 'gb_max_depth': 5, 'svm_C': 0.14227514857926957, 'svm_kernel': 'linear'}. Best is trial 3 with value: 0.8076802267677928.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:37:51,120] Trial 8 finished with value: 0.7997854899256875 and parameters: {'rf_n_estimators': 51, 'rf_max_depth': 11, 'gb_n_estimators': 59, 'gb_learning_rate': 0.2476414263212554, 'gb_max_depth': 5, 'svm_C': 5.152302985984261, 'svm_kernel': 'rbf'}. Best is trial 3 with value: 0.8076802267677928.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:37:55,933] Trial 9 finished with value: 0.7928139125105339 and parameters: {'rf_n_estimators': 116, 'rf_max_depth': 14, 'gb_n_estimators': 114, 'gb_learning_rate': 0.22767402222968058, 'gb_max_depth': 3, 'svm_C': 0.8399859812003606, 'svm_kernel': 'linear'}. Best is trial 3 with value: 0.8076802267677928.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:38:08,000] Trial 10 finished with value: 0.7910633570826631 and parameters: {'rf_n_estimators': 85, 'rf_max_depth': 4, 'gb_n_estimators': 91, 'gb_learning_rate': 0.09238030955208901, 'gb_max_depth': 8, 'svm_C': 0.31039764308166096, 'svm_kernel': 'linear'}. Best is trial 3 with value: 0.8076802267677928.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:38:11,243] Trial 11 finished with value: 0.7674634183712555 and parameters: {'rf_n_estimators': 78, 'rf_max_depth': 11, 'gb_n_estimators': 91, 'gb_learning_rate': 0.2960308369167067, 'gb_max_depth': 2, 'svm_C': 1.2685653468414009, 'svm_kernel': 'linear'}. Best is trial 3 with value: 0.8076802267677928.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:38:21,291] Trial 12 finished with value: 0.8129127403661993 and parameters: {'rf_n_estimators': 67, 'rf_max_depth': 10, 'gb_n_estimators': 146, 'gb_learning_rate': 0.2927153269805989, 'gb_max_depth': 8, 'svm_C': 0.7518719972686361, 'svm_kernel': 'rbf'}. Best is trial 12 with value: 0.8129127403661993.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:38:35,144] Trial 13 finished with value: 0.8094269516586226 and parameters: {'rf_n_estimators': 63, 'rf_max_depth': 12, 'gb_n_estimators': 106, 'gb_learning_rate': 0.1395529568643979, 'gb_max_depth': 8, 'svm_C': 0.46117166206498045, 'svm_kernel': 'linear'}. Best is trial 12 with value: 0.8129127403661993.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:38:52,680] Trial 14 finished with value: 0.8120202252355782 and parameters: {'rf_n_estimators': 89, 'rf_max_depth': 13, 'gb_n_estimators': 148, 'gb_learning_rate': 0.14863742338466537, 'gb_max_depth': 8, 'svm_C': 0.4173105454553678, 'svm_kernel': 'rbf'}. Best is trial 12 with value: 0.8129127403661993.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:39:06,605] Trial 15 finished with value: 0.8085267754539187 and parameters: {'rf_n_estimators': 89, 'rf_max_depth': 9, 'gb_n_estimators': 150, 'gb_learning_rate': 0.20434102216576688, 'gb_max_depth': 8, 'svm_C': 0.2690477848030219, 'svm_kernel': 'rbf'}. Best is trial 12 with value: 0.8129127403661993.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:39:21,183] Trial 16 finished with value: 0.8094116295104573 and parameters: {'rf_n_estimators': 96, 'rf_max_depth': 13, 'gb_n_estimators': 150, 'gb_learning_rate': 0.15053559214787787, 'gb_max_depth': 7, 'svm_C': 0.6038841110056608, 'svm_kernel': 'rbf'}. Best is trial 12 with value: 0.8129127403661993.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:39:31,165] Trial 17 finished with value: 0.8146594652570289 and parameters: {'rf_n_estimators': 103, 'rf_max_depth': 15, 'gb_n_estimators': 122, 'gb_learning_rate': 0.29649295406868903, 'gb_max_depth': 7, 'svm_C': 0.12754019278346054, 'svm_kernel': 'rbf'}. Best is trial 17 with value: 0.8146594652570289.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:39:41,419] Trial 18 finished with value: 0.8164023596108174 and parameters: {'rf_n_estimators': 107, 'rf_max_depth': 15, 'gb_n_estimators': 122, 'gb_learning_rate': 0.2852631078707718, 'gb_max_depth': 7, 'svm_C': 0.1795022590132043, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:39:52,414] Trial 19 finished with value: 0.7971730636635256 and parameters: {'rf_n_estimators': 146, 'rf_max_depth': 15, 'gb_n_estimators': 121, 'gb_learning_rate': 0.26029663748792264, 'gb_max_depth': 7, 'svm_C': 0.10864655259278849, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:40:02,548] Trial 20 finished with value: 0.8059105186547153 and parameters: {'rf_n_estimators': 107, 'rf_max_depth': 15, 'gb_n_estimators': 113, 'gb_learning_rate': 0.2780585372343583, 'gb_max_depth': 7, 'svm_C': 0.17422172600792696, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:40:13,091] Trial 21 finished with value: 0.808545928139125 and parameters: {'rf_n_estimators': 111, 'rf_max_depth': 14, 'gb_n_estimators': 131, 'gb_learning_rate': 0.28694310430344094, 'gb_max_depth': 9, 'svm_C': 0.20889673317876217, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:40:24,596] Trial 22 finished with value: 0.7989197885543553 and parameters: {'rf_n_estimators': 99, 'rf_max_depth': 3, 'gb_n_estimators': 124, 'gb_learning_rate': 0.22943403305352195, 'gb_max_depth': 7, 'svm_C': 0.11256004178446305, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:40:35,045] Trial 23 finished with value: 0.8015513675017237 and parameters: {'rf_n_estimators': 131, 'rf_max_depth': 6, 'gb_n_estimators': 139, 'gb_learning_rate': 0.2978190170432012, 'gb_max_depth': 9, 'svm_C': 0.230305641415185, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:40:43,615] Trial 24 finished with value: 0.8067953727112542 and parameters: {'rf_n_estimators': 95, 'rf_max_depth': 14, 'gb_n_estimators': 109, 'gb_learning_rate': 0.23614777566261563, 'gb_max_depth': 6, 'svm_C': 0.33829053269015436, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:40:50,029] Trial 25 finished with value: 0.8041637937638857 and parameters: {'rf_n_estimators': 79, 'rf_max_depth': 12, 'gb_n_estimators': 102, 'gb_learning_rate': 0.26852944077030494, 'gb_max_depth': 5, 'svm_C': 1.6384736174133452, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:40:59,345] Trial 26 finished with value: 0.8024285604841799 and parameters: {'rf_n_estimators': 104, 'rf_max_depth': 10, 'gb_n_estimators': 90, 'gb_learning_rate': 0.2040491811784626, 'gb_max_depth': 7, 'svm_C': 0.6387719629735615, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:41:09,119] Trial 27 finished with value: 0.8033019229295947 and parameters: {'rf_n_estimators': 126, 'rf_max_depth': 12, 'gb_n_estimators': 127, 'gb_learning_rate': 0.24967524373869945, 'gb_max_depth': 6, 'svm_C': 0.15932636193765945, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:41:22,524] Trial 28 finished with value: 0.8146594652570289 and parameters: {'rf_n_estimators': 63, 'rf_max_depth': 15, 'gb_n_estimators': 80, 'gb_learning_rate': 0.18044915291766728, 'gb_max_depth': 9, 'svm_C': 0.13760353879221768, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:41:35,354] Trial 29 finished with value: 0.805029495135218 and parameters: {'rf_n_estimators': 114, 'rf_max_depth': 15, 'gb_n_estimators': 74, 'gb_learning_rate': 0.1922532718321059, 'gb_max_depth': 9, 'svm_C': 0.10209206468394925, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:41:49,215] Trial 30 finished with value: 0.8137822722745728 and parameters: {'rf_n_estimators': 121, 'rf_max_depth': 15, 'gb_n_estimators': 83, 'gb_learning_rate': 0.21583463158267374, 'gb_max_depth': 10, 'svm_C': 0.14309068889129536, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:42:05,523] Trial 31 finished with value: 0.8120163946985368 and parameters: {'rf_n_estimators': 119, 'rf_max_depth': 15, 'gb_n_estimators': 80, 'gb_learning_rate': 0.17040620153585986, 'gb_max_depth': 10, 'svm_C': 0.14904628155283445, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:42:23,933] Trial 32 finished with value: 0.8120278863096605 and parameters: {'rf_n_estimators': 134, 'rf_max_depth': 14, 'gb_n_estimators': 85, 'gb_learning_rate': 0.12780276847738803, 'gb_max_depth': 10, 'svm_C': 0.20351644930365295, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:42:34,252] Trial 33 finished with value: 0.8120355473837432 and parameters: {'rf_n_estimators': 145, 'rf_max_depth': 14, 'gb_n_estimators': 54, 'gb_learning_rate': 0.21288021871789503, 'gb_max_depth': 9, 'svm_C': 0.127784117072192, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:42:48,959] Trial 34 finished with value: 0.8137899333486555 and parameters: {'rf_n_estimators': 124, 'rf_max_depth': 13, 'gb_n_estimators': 68, 'gb_learning_rate': 0.18076758115971314, 'gb_max_depth': 10, 'svm_C': 0.18677784520165175, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:43:01,148] Trial 35 finished with value: 0.80941929058454 and parameters: {'rf_n_estimators': 108, 'rf_max_depth': 13, 'gb_n_estimators': 67, 'gb_learning_rate': 0.18125941236395313, 'gb_max_depth': 9, 'svm_C': 0.2570047017577737, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:43:16,940] Trial 36 finished with value: 0.7954301693097372 and parameters: {'rf_n_estimators': 127, 'rf_max_depth': 14, 'gb_n_estimators': 73, 'gb_learning_rate': 0.025935463091563887, 'gb_max_depth': 10, 'svm_C': 0.35599930152640513, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:43:23,042] Trial 37 finished with value: 0.7971807247376083 and parameters: {'rf_n_estimators': 138, 'rf_max_depth': 13, 'gb_n_estimators': 63, 'gb_learning_rate': 0.12641768438446147, 'gb_max_depth': 6, 'svm_C': 0.18001216077685114, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:43:35,684] Trial 38 finished with value: 0.8085382670650425 and parameters: {'rf_n_estimators': 101, 'rf_max_depth': 15, 'gb_n_estimators': 99, 'gb_learning_rate': 0.18275622955559634, 'gb_max_depth': 8, 'svm_C': 0.4655973627025619, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:43:38,598] Trial 39 finished with value: 0.7753275109170306 and parameters: {'rf_n_estimators': 58, 'rf_max_depth': 12, 'gb_n_estimators': 51, 'gb_learning_rate': 0.11247962958394751, 'gb_max_depth': 4, 'svm_C': 0.24981988015364434, 'svm_kernel': 'linear'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:43:51,711] Trial 40 finished with value: 0.8085382670650425 and parameters: {'rf_n_estimators': 113, 'rf_max_depth': 13, 'gb_n_estimators': 72, 'gb_learning_rate': 0.06154745755943197, 'gb_max_depth': 9, 'svm_C': 0.10221367491531484, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:44:02,926] Trial 41 finished with value: 0.8146632957940703 and parameters: {'rf_n_estimators': 122, 'rf_max_depth': 15, 'gb_n_estimators': 80, 'gb_learning_rate': 0.2780736262796775, 'gb_max_depth': 10, 'svm_C': 0.13818186771155122, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:44:15,636] Trial 42 finished with value: 0.8102964835669961 and parameters: {'rf_n_estimators': 140, 'rf_max_depth': 15, 'gb_n_estimators': 61, 'gb_learning_rate': 0.28029838991090844, 'gb_max_depth': 10, 'svm_C': 0.12970797437377096, 'svm_kernel': 'rbf'}. Best is trial 18 with value: 0.8164023596108174.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:44:32,408] Trial 43 finished with value: 0.8199034704665594 and parameters: {'rf_n_estimators': 128, 'rf_max_depth': 14, 'gb_n_estimators': 77, 'gb_learning_rate': 0.2560481189664057, 'gb_max_depth': 10, 'svm_C': 0.1706824675188329, 'svm_kernel': 'rbf'}. Best is trial 43 with value: 0.8199034704665594.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:44:46,203] Trial 44 finished with value: 0.8129050792921169 and parameters: {'rf_n_estimators': 118, 'rf_max_depth': 14, 'gb_n_estimators': 77, 'gb_learning_rate': 0.25811052644208177, 'gb_max_depth': 9, 'svm_C': 0.15804032377743582, 'svm_kernel': 'rbf'}. Best is trial 43 with value: 0.8199034704665594.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:44:58,098] Trial 45 finished with value: 0.8094116295104573 and parameters: {'rf_n_estimators': 132, 'rf_max_depth': 15, 'gb_n_estimators': 86, 'gb_learning_rate': 0.2689802029998402, 'gb_max_depth': 10, 'svm_C': 0.12677519886384342, 'svm_kernel': 'linear'}. Best is trial 43 with value: 0.8199034704665594.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:45:04,743] Trial 46 finished with value: 0.806780050563089 and parameters: {'rf_n_estimators': 91, 'rf_max_depth': 14, 'gb_n_estimators': 96, 'gb_learning_rate': 0.2438370010007208, 'gb_max_depth': 5, 'svm_C': 0.29500844666324866, 'svm_kernel': 'rbf'}. Best is trial 43 with value: 0.8199034704665594.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:45:15,595] Trial 47 finished with value: 0.8111583544012871 and parameters: {'rf_n_estimators': 80, 'rf_max_depth': 15, 'gb_n_estimators': 136, 'gb_learning_rate': 0.2782248686081048, 'gb_max_depth': 8, 'svm_C': 2.333356919060961, 'svm_kernel': 'rbf'}. Best is trial 43 with value: 0.8199034704665594.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:45:28,303] Trial 48 finished with value: 0.8181567455757296 and parameters: {'rf_n_estimators': 110, 'rf_max_depth': 14, 'gb_n_estimators': 116, 'gb_learning_rate': 0.25417107972995484, 'gb_max_depth': 9, 'svm_C': 3.4798207554818212, 'svm_kernel': 'linear'}. Best is trial 43 with value: 0.8199034704665594.


Klasy w macierzy pomyÅ‚ek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-21 22:45:40,458] Trial 49 finished with value: 0.8076649046196277 and parameters: {'rf_n_estimators': 111, 'rf_max_depth': 11, 'gb_n_estimators': 116, 'gb_learning_rate': 0.25582054693180734, 'gb_max_depth': 8, 'svm_C': 3.3387834271544357, 'svm_kernel': 'linear'}. Best is trial 43 with value: 0.8199034704665594.


In [12]:
# get the best hyperparameters
best_params = study.best_trial.params

# show best hyperparameters
print("ðŸŽ¯ Najlepsze hiperparametry:")
for param, value in best_params.items():
    print(f"{param}: {value}")

ðŸŽ¯ Najlepsze hiperparametry:
rf_n_estimators: 128
rf_max_depth: 14
gb_n_estimators: 77
gb_learning_rate: 0.2560481189664057
gb_max_depth: 10
svm_C: 0.1706824675188329
svm_kernel: rbf


In [13]:
best_trial = study.best_trial


print("\nðŸ“Š Metryki najlepszej prÃ³by:")

print("--- Mikro ---")
print(f"F1-score (micro): {best_trial.user_attrs['f1_score']:.4f} Â± {best_trial.user_attrs['f1_std']:.4f}")
print(f"Accuracy: {best_trial.user_attrs['accuracy']:.4f} Â± {best_trial.user_attrs['accuracy_std']:.4f}")
print(f"Precision (micro): {best_trial.user_attrs['precision']:.4f} Â± {best_trial.user_attrs['precision_std']:.4f}")
print(f"Recall / Sensitivity (micro): {best_trial.user_attrs['recall']:.4f} Â± {best_trial.user_attrs['recall_std']:.4f}")

print("\n--- Makro ---")
print(f"F1-score (macro): {best_trial.user_attrs['f1_macro']:.4f} Â± {best_trial.user_attrs['f1_macro_std']:.4f}")
print(f"Precision (macro): {best_trial.user_attrs['precision_macro']:.4f} Â± {best_trial.user_attrs['precision_macro_std']:.4f}")
print(f"Recall / Sensitivity (macro): {best_trial.user_attrs['recall_macro']:.4f} Â± {best_trial.user_attrs['recall_macro_std']:.4f}")

print("\n--- Weighted ---")
print(f"F1-score (weighted): {best_trial.user_attrs['f1_weighted']:.4f} Â± {best_trial.user_attrs['f1_weighted_std']:.4f}")
print(f"Precision (weighted): {best_trial.user_attrs['precision_weighted']:.4f} Â± {best_trial.user_attrs['precision_weighted_std']:.4f}")
print(f"Recall / Sensitivity (weighted): {best_trial.user_attrs['recall_weighted']:.4f} Â± {best_trial.user_attrs['recall_weighted_std']:.4f}")

print("\n--- PozostaÅ‚e ---")
print(f"Specificity (avg): {best_trial.user_attrs['specificity_avg']:.4f} Â± {best_trial.user_attrs['specificity_std']:.4f}")
print(f"MSE: {best_trial.user_attrs['mse']:.4f} Â± {best_trial.user_attrs['mse_std']:.4f}")
print(f"RMSE: {best_trial.user_attrs['rmse']:.4f} Â± {best_trial.user_attrs['rmse_std']:.4f}")
print(f"LogLoss: {best_trial.user_attrs['logloss']:.4f} Â± {best_trial.user_attrs['logloss_std']:.4f}")

# metrics for raport master thesis (macro, confusion matrix)
print("\nðŸ“Š Do raportu ---")
print(f"F1-score (macro): {best_trial.user_attrs['f1_macro']:.4f} Â± {best_trial.user_attrs['f1_macro_std']:.4f}")
print(f"Precision (macro): {best_trial.user_attrs['precision_macro']:.4f} Â± {best_trial.user_attrs['precision_macro_std']:.4f}")
print(f"Recall / Sensitivity (macro): {best_trial.user_attrs['recall_macro']:.4f} Â± {best_trial.user_attrs['recall_macro_std']:.4f}")
print(f"Specificity (avg): {best_trial.user_attrs['specificity_avg']:.4f} Â± {best_trial.user_attrs['specificity_std']:.4f}")
print(f"Accuracy: {best_trial.user_attrs['accuracy']:.4f} Â± {best_trial.user_attrs['accuracy_std']:.4f}")

print("\nðŸ§® Confusion Matrix:")
print(np.array(best_trial.user_attrs["confusion_matrix"]))


ðŸ“Š Metryki najlepszej prÃ³by:
--- Mikro ---
F1-score (micro): 0.8199 Â± 0.0266
Accuracy: 0.8199 Â± 0.0266
Precision (micro): 0.8199 Â± 0.0266
Recall / Sensitivity (micro): 0.8199 Â± 0.0266

--- Makro ---
F1-score (macro): 0.7691 Â± 0.0299
Precision (macro): 0.7949 Â± 0.0410
Recall / Sensitivity (macro): 0.7515 Â± 0.0231

--- Weighted ---
F1-score (weighted): 0.8191 Â± 0.0263
Precision (weighted): 0.8216 Â± 0.0258
Recall / Sensitivity (weighted): 0.8199 Â± 0.0266

--- PozostaÅ‚e ---
Specificity (avg): 0.9279 Â± 0.0105
MSE: 0.2465 Â± 0.0338
RMSE: 0.4952 Â± 0.0355
LogLoss: 0.5306 Â± 0.0512

ðŸ“Š Do raportu ---
F1-score (macro): 0.7691 Â± 0.0299
Precision (macro): 0.7949 Â± 0.0410
Recall / Sensitivity (macro): 0.7515 Â± 0.0231
Specificity (avg): 0.9279 Â± 0.0105
Accuracy: 0.8199 Â± 0.0266

ðŸ§® Confusion Matrix:
[[505  61   3   1]
 [ 59 322  17   4]
 [  5  34  71   5]
 [  1   8   8  40]]


In [14]:
print(optuna.__version__)

4.5.0
