In [42]:
# read data from CSV

import pandas as pd

data = pd.read_csv('f1_data.csv')

data.head()


Unnamed: 0,driver_number,broadcast_name,country_code_x,year,session_name,date_start,time_start,date_end,time_end,gmt_offset,starting_position,wins_before,pit_stops_count,avg_pit_stop_duration,qualifying_position,position_category
0,1,M VERSTAPPEN,NED,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,15,1,0,0.0,1,top3
1,2,L SARGEANT,USA,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,20,0,0,0.0,16,no_points
2,4,L NORRIS,GBR,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,19,0,0,0.0,7,no_points
3,10,P GASLY,FRA,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,9,0,0,0.0,9,points
4,11,S PEREZ,MEX,2023,Race,2023-03-19,17:00:00,2023-03-19,19:00:00,03:00:00,1,0,0,0.0,2,winner


In [43]:
# data without position category 
X = data.drop(columns=['position_category'])
y = data['position_category']



In [44]:
# wyswitel informacje o zbiorze danych
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

#wyswietl cechy
print("Features in X:", X.columns.tolist())
# wyswietl unikalne kategorie pozycji
print("Unique position categories in y:", y.unique())

Shape of X: (1635, 15)
Shape of y: (1635,)
Features in X: ['driver_number', 'broadcast_name', 'country_code_x', 'year', 'session_name', 'date_start', 'time_start', 'date_end', 'time_end', 'gmt_offset', 'starting_position', 'wins_before', 'pit_stops_count', 'avg_pit_stop_duration', 'qualifying_position']
Unique position categories in y: ['top3' 'no_points' 'points' 'winner']


In [45]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

for col in X.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])

In [46]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, log_loss, mean_squared_error


In [47]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [48]:
X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y, test_size=0.3, stratify=y, random_state=42)

In [49]:
from sklearn.preprocessing import LabelEncoder


In [51]:
def compute_specificity(conf_mat):
    specificity = []
    for i in range(conf_mat.shape[0]):
        TP = conf_mat[i, i]
        FP = conf_mat[:, i].sum() - TP
        FN = conf_mat[i, :].sum() - TP
        TN = conf_mat.sum() - (TP + FP + FN)
        spec = TN / (TN + FP) if (TN + FP) > 0 else 0
        specificity.append(spec)
    return specificity

In [52]:
from sklearn.model_selection import StratifiedKFold
import numpy as np

def objective(trial):
    # === Hiperparametry ===
    rf_n_estimators = trial.suggest_int("rf_n_estimators", 50, 150)
    rf_max_depth = trial.suggest_int("rf_max_depth", 3, 15)

    gb_n_estimators = trial.suggest_int("gb_n_estimators", 50, 150)
    gb_learning_rate = trial.suggest_float("gb_learning_rate", 0.01, 0.3)
    gb_max_depth = trial.suggest_int("gb_max_depth", 2, 10)

    svm_C = trial.suggest_float("svm_C", 0.1, 10.0, log=True)
    svm_kernel = trial.suggest_categorical("svm_kernel", ["linear", "rbf"])

    # === Modele ===
    clf_rf = RandomForestClassifier(n_estimators=rf_n_estimators, max_depth=rf_max_depth, random_state=42)
    clf_gb = GradientBoostingClassifier(n_estimators=gb_n_estimators, learning_rate=gb_learning_rate,
                                        max_depth=gb_max_depth, random_state=42)
    clf_svm = SVC(C=svm_C, kernel=svm_kernel, probability=True, random_state=42)

    # === Ensemble ===
    ensemble = VotingClassifier(
        estimators=[('rf', clf_rf), ('gb', clf_gb), ('svm', clf_svm)],
        voting='soft'
    )

    # === Label encoding ===
    le = LabelEncoder()
    y_train_enc = le.fit_transform(y_train)
    y_valid_enc = le.transform(y_valid)

    class_labels = le.classes_
    print("Klasy w macierzy pomy≈Çek:", class_labels)

    # === k-Fold Cross-Validation ===
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    acc_list, prec_list, rec_list, f1_list, spec_list = [], [], [], [], []
    mse_list, rmse_list, logloss_list = [], [], []

    # nowe metryki
    prec_macro_list, prec_weighted_list = [], []
    rec_macro_list, rec_weighted_list = [], []
    f1_macro_list, f1_weighted_list = [], []

    # globalna macierz pomy≈Çek
    conf_mat_sum = np.zeros((len(np.unique(y_train_enc)), len(np.unique(y_train_enc))), dtype=int)

    for train_index, test_index in kf.split(X_train, y_train_enc):
        X_tr, X_te = X_train[train_index], X_train[test_index]
        y_tr, y_te = y_train_enc[train_index], y_train_enc[test_index]

        ensemble.fit(X_tr, y_tr)
        y_pred_cv = ensemble.predict(X_te)
        y_proba_cv = ensemble.predict_proba(X_te)

        conf_mat_cv = confusion_matrix(y_te, y_pred_cv)
        conf_mat_sum += conf_mat_cv  # sumowanie po wszystkich foldach

        acc_list.append(accuracy_score(y_te, y_pred_cv))
        prec_list.append(precision_score(y_te, y_pred_cv, average="micro", zero_division=0))
        rec_list.append(recall_score(y_te, y_pred_cv, average="micro", zero_division=0))
        f1_list.append(f1_score(y_te, y_pred_cv, average="micro", zero_division=0))

        # nowe metryki
        prec_macro_list.append(precision_score(y_te, y_pred_cv, average="macro", zero_division=0))
        prec_weighted_list.append(precision_score(y_te, y_pred_cv, average="weighted", zero_division=0))

        rec_macro_list.append(recall_score(y_te, y_pred_cv, average="macro", zero_division=0))
        rec_weighted_list.append(recall_score(y_te, y_pred_cv, average="weighted", zero_division=0))

        f1_macro_list.append(f1_score(y_te, y_pred_cv, average="macro", zero_division=0))
        f1_weighted_list.append(f1_score(y_te, y_pred_cv, average="weighted", zero_division=0))

        spec_cv = np.mean(compute_specificity(conf_mat_cv))
        spec_list.append(spec_cv)

        mse_list.append(mean_squared_error(y_te, y_pred_cv))
        rmse_list.append(np.sqrt(mse_list[-1]))
        logloss_list.append(log_loss(y_te, y_proba_cv))

    # === ≈örednie i SD dla metryk ===
    acc, acc_std = np.mean(acc_list), np.std(acc_list, ddof=0)
    prec, prec_std = np.mean(prec_list), np.std(prec_list, ddof=0)
    rec, rec_std = np.mean(rec_list), np.std(rec_list, ddof=0)
    f1, f1_std = np.mean(f1_list), np.std(f1_list, ddof=0)
    spec, spec_std = np.mean(spec_list), np.std(spec_list, ddof=0)

    mse, mse_std = np.mean(mse_list), np.std(mse_list, ddof=0)
    rmse, rmse_std = np.mean(rmse_list), np.std(rmse_list, ddof=0)
    logloss, logloss_std = np.mean(logloss_list), np.std(logloss_list, ddof=0)

    # === ≈örednie i SD dla nowych metryk ===
    prec_macro, prec_macro_std = np.mean(prec_macro_list), np.std(prec_macro_list, ddof=0)
    prec_weighted, prec_weighted_std = np.mean(prec_weighted_list), np.std(prec_weighted_list, ddof=0)

    rec_macro, rec_macro_std = np.mean(rec_macro_list), np.std(rec_macro_list, ddof=0)
    rec_weighted, rec_weighted_std = np.mean(rec_weighted_list), np.std(rec_weighted_list, ddof=0)

    f1_macro, f1_macro_std = np.mean(f1_macro_list), np.std(f1_macro_list, ddof=0)
    f1_weighted, f1_weighted_std = np.mean(f1_weighted_list), np.std(f1_weighted_list, ddof=0)

    # === Zapis do triala ===
    trial.set_user_attr("accuracy", acc)
    trial.set_user_attr("accuracy_std", acc_std)
    trial.set_user_attr("precision", prec)
    trial.set_user_attr("precision_std", prec_std)
    trial.set_user_attr("recall", rec)
    trial.set_user_attr("recall_std", rec_std)
    trial.set_user_attr("f1_score", f1)
    trial.set_user_attr("f1_std", f1_std)
    trial.set_user_attr("specificity_avg", spec)
    trial.set_user_attr("specificity_std", spec_std)

    trial.set_user_attr("mse", mse)
    trial.set_user_attr("mse_std", mse_std)
    trial.set_user_attr("rmse", rmse)
    trial.set_user_attr("rmse_std", rmse_std)
    trial.set_user_attr("logloss", logloss)
    trial.set_user_attr("logloss_std", logloss_std)

    # nowe metryki
    trial.set_user_attr("precision_macro", prec_macro)
    trial.set_user_attr("precision_macro_std", prec_macro_std)
    trial.set_user_attr("precision_weighted", prec_weighted)
    trial.set_user_attr("precision_weighted_std", prec_weighted_std)

    trial.set_user_attr("recall_macro", rec_macro)
    trial.set_user_attr("recall_macro_std", rec_macro_std)
    trial.set_user_attr("recall_weighted", rec_weighted)
    trial.set_user_attr("recall_weighted_std", rec_weighted_std)

    trial.set_user_attr("f1_macro", f1_macro)
    trial.set_user_attr("f1_macro_std", f1_macro_std)
    trial.set_user_attr("f1_weighted", f1_weighted)
    trial.set_user_attr("f1_weighted_std", f1_weighted_std)

    # zapis ca≈Çej macierzy pomy≈Çek (zsumowanej z 5 fold√≥w)
    trial.set_user_attr("confusion_matrix", conf_mat_sum.tolist())

    return f1  # lub acc je≈õli optymalizujesz dok≈Çadno≈õƒá


In [53]:
import optuna

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2025-09-14 01:13:12,636] A new study created in memory with name: no-name-e7c2ba2e-d7f7-4173-9f0a-1f830d48a247


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:13:22,770] Trial 0 finished with value: 0.8015513675017237 and parameters: {'rf_n_estimators': 100, 'rf_max_depth': 15, 'gb_n_estimators': 78, 'gb_learning_rate': 0.09707853710220406, 'gb_max_depth': 6, 'svm_C': 0.8532952121933164, 'svm_kernel': 'linear'}. Best is trial 0 with value: 0.8015513675017237.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:13:42,973] Trial 1 finished with value: 0.8024208994100972 and parameters: {'rf_n_estimators': 58, 'rf_max_depth': 3, 'gb_n_estimators': 144, 'gb_learning_rate': 0.1875845773770531, 'gb_max_depth': 9, 'svm_C': 0.41242558617397584, 'svm_kernel': 'rbf'}. Best is trial 1 with value: 0.8024208994100972.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:13:50,320] Trial 2 finished with value: 0.7438634796598483 and parameters: {'rf_n_estimators': 136, 'rf_max_depth': 13, 'gb_n_estimators': 134, 'gb_learning_rate': 0.022506005860690643, 'gb_max_depth': 3, 'svm_C': 0.7763250499999047, 'svm_kernel': 'linear'}. Best is trial 1 with value: 0.8024208994100972.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:13:56,961] Trial 3 finished with value: 0.7272657626599249 and parameters: {'rf_n_estimators': 103, 'rf_max_depth': 3, 'gb_n_estimators': 124, 'gb_learning_rate': 0.09467582010176248, 'gb_max_depth': 3, 'svm_C': 2.123220233014084, 'svm_kernel': 'linear'}. Best is trial 1 with value: 0.8024208994100972.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:14:06,290] Trial 4 finished with value: 0.7464873975331342 and parameters: {'rf_n_estimators': 85, 'rf_max_depth': 9, 'gb_n_estimators': 77, 'gb_learning_rate': 0.017742851144165996, 'gb_max_depth': 6, 'svm_C': 0.3600835184541513, 'svm_kernel': 'rbf'}. Best is trial 1 with value: 0.8024208994100972.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:14:15,189] Trial 5 finished with value: 0.7910556960085804 and parameters: {'rf_n_estimators': 59, 'rf_max_depth': 10, 'gb_n_estimators': 107, 'gb_learning_rate': 0.16083015830921998, 'gb_max_depth': 5, 'svm_C': 8.491868302705123, 'svm_kernel': 'rbf'}. Best is trial 1 with value: 0.8024208994100972.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:14:28,726] Trial 6 finished with value: 0.8059143491917566 and parameters: {'rf_n_estimators': 90, 'rf_max_depth': 3, 'gb_n_estimators': 56, 'gb_learning_rate': 0.29429349397746274, 'gb_max_depth': 10, 'svm_C': 0.6954077876997617, 'svm_kernel': 'linear'}. Best is trial 6 with value: 0.8059143491917566.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:14:47,610] Trial 7 finished with value: 0.8085382670650425 and parameters: {'rf_n_estimators': 130, 'rf_max_depth': 7, 'gb_n_estimators': 138, 'gb_learning_rate': 0.21647963222494848, 'gb_max_depth': 10, 'svm_C': 0.5953985002460054, 'svm_kernel': 'linear'}. Best is trial 7 with value: 0.8085382670650425.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:14:52,559] Trial 8 finished with value: 0.7639776296636789 and parameters: {'rf_n_estimators': 109, 'rf_max_depth': 11, 'gb_n_estimators': 74, 'gb_learning_rate': 0.11416409918112166, 'gb_max_depth': 3, 'svm_C': 2.052022748816438, 'svm_kernel': 'linear'}. Best is trial 7 with value: 0.8085382670650425.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:15:01,892] Trial 9 finished with value: 0.7657128629433846 and parameters: {'rf_n_estimators': 128, 'rf_max_depth': 7, 'gb_n_estimators': 139, 'gb_learning_rate': 0.0691844347368668, 'gb_max_depth': 4, 'svm_C': 0.9732431418880085, 'svm_kernel': 'rbf'}. Best is trial 7 with value: 0.8085382670650425.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:15:17,848] Trial 10 finished with value: 0.8006588523711023 and parameters: {'rf_n_estimators': 149, 'rf_max_depth': 6, 'gb_n_estimators': 110, 'gb_learning_rate': 0.2361767554297945, 'gb_max_depth': 8, 'svm_C': 0.13284313084881902, 'svm_kernel': 'linear'}. Best is trial 7 with value: 0.8085382670650425.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:15:30,145] Trial 11 finished with value: 0.8076534130085038 and parameters: {'rf_n_estimators': 83, 'rf_max_depth': 6, 'gb_n_estimators': 55, 'gb_learning_rate': 0.2947191633690759, 'gb_max_depth': 10, 'svm_C': 0.2620314621500204, 'svm_kernel': 'linear'}. Best is trial 7 with value: 0.8085382670650425.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:15:39,697] Trial 12 finished with value: 0.800666513445185 and parameters: {'rf_n_estimators': 75, 'rf_max_depth': 6, 'gb_n_estimators': 53, 'gb_learning_rate': 0.2988560947808983, 'gb_max_depth': 8, 'svm_C': 0.14329270390681806, 'svm_kernel': 'linear'}. Best is trial 7 with value: 0.8085382670650425.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:15:56,457] Trial 13 finished with value: 0.8120393779207845 and parameters: {'rf_n_estimators': 121, 'rf_max_depth': 8, 'gb_n_estimators': 94, 'gb_learning_rate': 0.23641970846368035, 'gb_max_depth': 10, 'svm_C': 0.2597346751184316, 'svm_kernel': 'linear'}. Best is trial 13 with value: 0.8120393779207845.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:16:11,993] Trial 14 finished with value: 0.8076649046196277 and parameters: {'rf_n_estimators': 120, 'rf_max_depth': 8, 'gb_n_estimators': 93, 'gb_learning_rate': 0.22761961342256265, 'gb_max_depth': 8, 'svm_C': 0.23732471054084464, 'svm_kernel': 'linear'}. Best is trial 13 with value: 0.8120393779207845.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:16:30,076] Trial 15 finished with value: 0.8111736765494524 and parameters: {'rf_n_estimators': 145, 'rf_max_depth': 11, 'gb_n_estimators': 120, 'gb_learning_rate': 0.2252518251987284, 'gb_max_depth': 9, 'svm_C': 2.048451834276172, 'svm_kernel': 'linear'}. Best is trial 13 with value: 0.8120393779207845.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:16:45,539] Trial 16 finished with value: 0.8120393779207845 and parameters: {'rf_n_estimators': 150, 'rf_max_depth': 12, 'gb_n_estimators': 118, 'gb_learning_rate': 0.255331124601011, 'gb_max_depth': 7, 'svm_C': 2.587523249523148, 'svm_kernel': 'linear'}. Best is trial 13 with value: 0.8120393779207845.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:17:00,123] Trial 17 finished with value: 0.80853060599096 and parameters: {'rf_n_estimators': 116, 'rf_max_depth': 14, 'gb_n_estimators': 95, 'gb_learning_rate': 0.2614069661886794, 'gb_max_depth': 7, 'svm_C': 6.316142752680977, 'svm_kernel': 'linear'}. Best is trial 13 with value: 0.8120393779207845.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:17:13,340] Trial 18 finished with value: 0.8006703439822264 and parameters: {'rf_n_estimators': 139, 'rf_max_depth': 12, 'gb_n_estimators': 87, 'gb_learning_rate': 0.18218824471025882, 'gb_max_depth': 7, 'svm_C': 3.375494436777288, 'svm_kernel': 'rbf'}. Best is trial 13 with value: 0.8120393779207845.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:17:24,355] Trial 19 finished with value: 0.8050333256722594 and parameters: {'rf_n_estimators': 122, 'rf_max_depth': 9, 'gb_n_estimators': 119, 'gb_learning_rate': 0.25042344999788996, 'gb_max_depth': 5, 'svm_C': 4.104728324204769, 'svm_kernel': 'linear'}. Best is trial 13 with value: 0.8120393779207845.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:17:29,810] Trial 20 finished with value: 0.772707423580786 and parameters: {'rf_n_estimators': 138, 'rf_max_depth': 12, 'gb_n_estimators': 106, 'gb_learning_rate': 0.19718576812616492, 'gb_max_depth': 2, 'svm_C': 1.4252799099017972, 'svm_kernel': 'linear'}. Best is trial 13 with value: 0.8120393779207845.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:17:45,950] Trial 21 finished with value: 0.8207768329119742 and parameters: {'rf_n_estimators': 150, 'rf_max_depth': 11, 'gb_n_estimators': 120, 'gb_learning_rate': 0.26548198115479055, 'gb_max_depth': 9, 'svm_C': 2.362048869342139, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:18:01,932] Trial 22 finished with value: 0.8207768329119742 and parameters: {'rf_n_estimators': 149, 'rf_max_depth': 10, 'gb_n_estimators': 128, 'gb_learning_rate': 0.2692134072339164, 'gb_max_depth': 9, 'svm_C': 3.678079118915089, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:18:20,504] Trial 23 finished with value: 0.8120393779207845 and parameters: {'rf_n_estimators': 129, 'rf_max_depth': 10, 'gb_n_estimators': 130, 'gb_learning_rate': 0.2719408391419464, 'gb_max_depth': 9, 'svm_C': 5.592880692413843, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:18:42,814] Trial 24 finished with value: 0.8129127403661993 and parameters: {'rf_n_estimators': 142, 'rf_max_depth': 9, 'gb_n_estimators': 150, 'gb_learning_rate': 0.2797955820303543, 'gb_max_depth': 9, 'svm_C': 1.4837659033415846, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:19:04,663] Trial 25 finished with value: 0.807657243545545 and parameters: {'rf_n_estimators': 144, 'rf_max_depth': 10, 'gb_n_estimators': 150, 'gb_learning_rate': 0.27308889890276317, 'gb_max_depth': 9, 'svm_C': 1.5523414134015285, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:19:25,518] Trial 26 finished with value: 0.812908909829158 and parameters: {'rf_n_estimators': 137, 'rf_max_depth': 9, 'gb_n_estimators': 150, 'gb_learning_rate': 0.27973795830635206, 'gb_max_depth': 8, 'svm_C': 1.2551791130407965, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:19:53,612] Trial 27 finished with value: 0.8111621849383284 and parameters: {'rf_n_estimators': 150, 'rf_max_depth': 11, 'gb_n_estimators': 128, 'gb_learning_rate': 0.14148520885732183, 'gb_max_depth': 9, 'svm_C': 3.8453063986067746, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:20:14,016] Trial 28 finished with value: 0.8085382670650425 and parameters: {'rf_n_estimators': 132, 'rf_max_depth': 13, 'gb_n_estimators': 114, 'gb_learning_rate': 0.20802145595272875, 'gb_max_depth': 8, 'svm_C': 9.825867609746616, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:20:29,617] Trial 29 finished with value: 0.8076649046196277 and parameters: {'rf_n_estimators': 142, 'rf_max_depth': 15, 'gb_n_estimators': 142, 'gb_learning_rate': 0.15359137940516965, 'gb_max_depth': 6, 'svm_C': 2.7581947218610363, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:20:45,751] Trial 30 finished with value: 0.8094154600474986 and parameters: {'rf_n_estimators': 111, 'rf_max_depth': 8, 'gb_n_estimators': 132, 'gb_learning_rate': 0.24274976359881065, 'gb_max_depth': 7, 'svm_C': 5.428276877294115, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:21:00,162] Trial 31 finished with value: 0.811158354401287 and parameters: {'rf_n_estimators': 136, 'rf_max_depth': 9, 'gb_n_estimators': 145, 'gb_learning_rate': 0.2791767835235319, 'gb_max_depth': 8, 'svm_C': 1.1840342685080647, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:21:15,658] Trial 32 finished with value: 0.8041791159120508 and parameters: {'rf_n_estimators': 142, 'rf_max_depth': 10, 'gb_n_estimators': 147, 'gb_learning_rate': 0.27783184595326305, 'gb_max_depth': 9, 'svm_C': 1.6168772444351043, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:21:30,601] Trial 33 finished with value: 0.8146709568681529 and parameters: {'rf_n_estimators': 134, 'rf_max_depth': 9, 'gb_n_estimators': 136, 'gb_learning_rate': 0.26710664398064654, 'gb_max_depth': 9, 'svm_C': 1.124198430249209, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:21:46,440] Trial 34 finished with value: 0.8129127403661993 and parameters: {'rf_n_estimators': 146, 'rf_max_depth': 11, 'gb_n_estimators': 136, 'gb_learning_rate': 0.26075149885512094, 'gb_max_depth': 10, 'svm_C': 0.9073163926127977, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:22:08,300] Trial 35 finished with value: 0.8041752853750095 and parameters: {'rf_n_estimators': 126, 'rf_max_depth': 13, 'gb_n_estimators': 125, 'gb_learning_rate': 0.1779183467128005, 'gb_max_depth': 9, 'svm_C': 2.9310503151221456, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:22:27,679] Trial 36 finished with value: 0.8050563088945071 and parameters: {'rf_n_estimators': 134, 'rf_max_depth': 7, 'gb_n_estimators': 134, 'gb_learning_rate': 0.20324912332724251, 'gb_max_depth': 9, 'svm_C': 1.811650027951256, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:22:43,947] Trial 37 finished with value: 0.8111621849383284 and parameters: {'rf_n_estimators': 51, 'rf_max_depth': 9, 'gb_n_estimators': 142, 'gb_learning_rate': 0.2515585194817339, 'gb_max_depth': 10, 'svm_C': 0.6484437460257425, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:22:58,349] Trial 38 finished with value: 0.8129127403661993 and parameters: {'rf_n_estimators': 99, 'rf_max_depth': 10, 'gb_n_estimators': 125, 'gb_learning_rate': 0.29997556523684854, 'gb_max_depth': 9, 'svm_C': 2.3763978947522917, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:23:16,741] Trial 39 finished with value: 0.804167624300927 and parameters: {'rf_n_estimators': 141, 'rf_max_depth': 4, 'gb_n_estimators': 138, 'gb_learning_rate': 0.22379164908965152, 'gb_max_depth': 10, 'svm_C': 0.4982020558796629, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:23:29,651] Trial 40 finished with value: 0.8015322148165172 and parameters: {'rf_n_estimators': 100, 'rf_max_depth': 8, 'gb_n_estimators': 105, 'gb_learning_rate': 0.2833270760738653, 'gb_max_depth': 7, 'svm_C': 1.0710309378658949, 'svm_kernel': 'linear'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:23:45,197] Trial 41 finished with value: 0.8199073010036008 and parameters: {'rf_n_estimators': 146, 'rf_max_depth': 11, 'gb_n_estimators': 136, 'gb_learning_rate': 0.2640994111102331, 'gb_max_depth': 10, 'svm_C': 0.8114740849258611, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:24:01,788] Trial 42 finished with value: 0.8111775070864937 and parameters: {'rf_n_estimators': 147, 'rf_max_depth': 11, 'gb_n_estimators': 132, 'gb_learning_rate': 0.2636967984859988, 'gb_max_depth': 10, 'svm_C': 0.7995941582671431, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:24:34,827] Trial 43 finished with value: 0.8050486478204244 and parameters: {'rf_n_estimators': 135, 'rf_max_depth': 12, 'gb_n_estimators': 141, 'gb_learning_rate': 0.058807068731689865, 'gb_max_depth': 9, 'svm_C': 0.49041416260829435, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:24:51,606] Trial 44 finished with value: 0.805918179728798 and parameters: {'rf_n_estimators': 150, 'rf_max_depth': 10, 'gb_n_estimators': 113, 'gb_learning_rate': 0.24032649252193986, 'gb_max_depth': 8, 'svm_C': 4.658220538670296, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:25:06,485] Trial 45 finished with value: 0.813782272274573 and parameters: {'rf_n_estimators': 143, 'rf_max_depth': 13, 'gb_n_estimators': 62, 'gb_learning_rate': 0.2869830297073981, 'gb_max_depth': 10, 'svm_C': 1.9013597694478264, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:25:21,220] Trial 46 finished with value: 0.8155251666283613 and parameters: {'rf_n_estimators': 131, 'rf_max_depth': 13, 'gb_n_estimators': 63, 'gb_learning_rate': 0.29095867482285753, 'gb_max_depth': 10, 'svm_C': 3.168845218768298, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:25:37,098] Trial 47 finished with value: 0.8129127403661993 and parameters: {'rf_n_estimators': 124, 'rf_max_depth': 14, 'gb_n_estimators': 82, 'gb_learning_rate': 0.26494711996198134, 'gb_max_depth': 10, 'svm_C': 3.302679285587625, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:25:53,930] Trial 48 finished with value: 0.815532827702444 and parameters: {'rf_n_estimators': 132, 'rf_max_depth': 14, 'gb_n_estimators': 65, 'gb_learning_rate': 0.24715651197141453, 'gb_max_depth': 10, 'svm_C': 7.583374988108422, 'svm_kernel': 'rbf'}. Best is trial 21 with value: 0.8207768329119742.


Klasy w macierzy pomy≈Çek: ['no_points' 'points' 'top3' 'winner']


[I 2025-09-14 01:26:12,301] Trial 49 finished with value: 0.8225158967287214 and parameters: {'rf_n_estimators': 116, 'rf_max_depth': 15, 'gb_n_estimators': 70, 'gb_learning_rate': 0.21815227101702045, 'gb_max_depth': 10, 'svm_C': 6.512159685772161, 'svm_kernel': 'rbf'}. Best is trial 49 with value: 0.8225158967287214.


In [54]:
best_params = study.best_trial.params

print("üéØ Najlepsze hiperparametry:")
for param, value in best_params.items():
    print(f"{param}: {value}")


üéØ Najlepsze hiperparametry:
rf_n_estimators: 116
rf_max_depth: 15
gb_n_estimators: 70
gb_learning_rate: 0.21815227101702045
gb_max_depth: 10
svm_C: 6.512159685772161
svm_kernel: rbf


In [55]:
best_trial = study.best_trial


print("\nüìä Metryki najlepszej pr√≥by:")

# --- Mikro ---
print("--- Mikro ---")
print(f"F1-score (micro): {best_trial.user_attrs['f1_score']:.4f} ¬± {best_trial.user_attrs['f1_std']:.4f}")
print(f"Accuracy: {best_trial.user_attrs['accuracy']:.4f} ¬± {best_trial.user_attrs['accuracy_std']:.4f}")
print(f"Precision (micro): {best_trial.user_attrs['precision']:.4f} ¬± {best_trial.user_attrs['precision_std']:.4f}")
print(f"Recall / Sensitivity (micro): {best_trial.user_attrs['recall']:.4f} ¬± {best_trial.user_attrs['recall_std']:.4f}")

# --- Makro ---
print("\n--- Makro ---")
print(f"F1-score (macro): {best_trial.user_attrs['f1_macro']:.4f} ¬± {best_trial.user_attrs['f1_macro_std']:.4f}")
print(f"Precision (macro): {best_trial.user_attrs['precision_macro']:.4f} ¬± {best_trial.user_attrs['precision_macro_std']:.4f}")
print(f"Recall / Sensitivity (macro): {best_trial.user_attrs['recall_macro']:.4f} ¬± {best_trial.user_attrs['recall_macro_std']:.4f}")

# --- Weighted ---
print("\n--- Weighted ---")
print(f"F1-score (weighted): {best_trial.user_attrs['f1_weighted']:.4f} ¬± {best_trial.user_attrs['f1_weighted_std']:.4f}")
print(f"Precision (weighted): {best_trial.user_attrs['precision_weighted']:.4f} ¬± {best_trial.user_attrs['precision_weighted_std']:.4f}")
print(f"Recall / Sensitivity (weighted): {best_trial.user_attrs['recall_weighted']:.4f} ¬± {best_trial.user_attrs['recall_weighted_std']:.4f}")

# --- Pozosta≈Çe ---
print("\n--- Pozosta≈Çe ---")
print(f"Specificity (avg): {best_trial.user_attrs['specificity_avg']:.4f} ¬± {best_trial.user_attrs['specificity_std']:.4f}")
print(f"MSE: {best_trial.user_attrs['mse']:.4f} ¬± {best_trial.user_attrs['mse_std']:.4f}")
print(f"RMSE: {best_trial.user_attrs['rmse']:.4f} ¬± {best_trial.user_attrs['rmse_std']:.4f}")
print(f"LogLoss: {best_trial.user_attrs['logloss']:.4f} ¬± {best_trial.user_attrs['logloss_std']:.4f}")

print("\nüßÆ Confusion Matrix:")
print(np.array(best_trial.user_attrs["confusion_matrix"]))



üìä Metryki najlepszej pr√≥by:
--- Mikro ---
F1-score (micro): 0.8225 ¬± 0.0402
Accuracy: 0.8225 ¬± 0.0402
Precision (micro): 0.8225 ¬± 0.0402
Recall / Sensitivity (micro): 0.8225 ¬± 0.0402

--- Makro ---
F1-score (macro): 0.7728 ¬± 0.0415
Precision (macro): 0.8020 ¬± 0.0482
Recall / Sensitivity (macro): 0.7534 ¬± 0.0374

--- Weighted ---
F1-score (weighted): 0.8219 ¬± 0.0401
Precision (weighted): 0.8250 ¬± 0.0396
Recall / Sensitivity (weighted): 0.8225 ¬± 0.0402

--- Pozosta≈Çe ---
Specificity (avg): 0.9292 ¬± 0.0166
MSE: 0.2238 ¬± 0.0534
RMSE: 0.4694 ¬± 0.0591
LogLoss: 0.5116 ¬± 0.0477

üßÆ Confusion Matrix:
[[505  62   2   1]
 [ 57 325  18   2]
 [  4  35  71   5]
 [  0   7  10  40]]


In [None]:
# Wy≈õwietlenie etykiet klas w kolejno≈õci odpowiadajƒÖcej macierzy pomy≈Çek
class_labels = le.classes_
print("Klasy w macierzy pomy≈Çek:", class_labels)

# Macierz pomy≈Çek zsumowana po 5 foldach
conf_mat_global = np.array(best_trial.user_attrs["confusion_matrix"])
print("Globalna macierz pomy≈Çek:\n", conf_mat_global)

Klasy w macierzy pomy≈Çek: ['-03:00:00' '-04:00:00' '-05:00:00' '-06:00:00' '-08:00:00' '01:00:00'
 '02:00:00' '03:00:00' '04:00:00' '08:00:00' '09:00:00' '10:00:00'
 '11:00:00']
Globalna macierz pomy≈Çek:
 [[509  55   5   1]
 [ 63 315  20   4]
 [  5  45  60   5]
 [  1  10   8  38]]


In [None]:
!pip freeze > requirements.txt

In [None]:
# # Definicja klasyfikator√≥w
# svm_clf = SVC(probability=True, kernel='rbf', random_state=42)
# rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
# gb_clf = GradientBoostingClassifier(n_estimators=100, random_state=42)

In [None]:
# Model zespo≈Çowy VotingClassifier
# ensemble_clf = VotingClassifier(
#     estimators=[('svm', svm_clf), ('rf', rf_clf), ('gb', gb_clf)],
#     voting='soft'
# )

In [None]:
# ensemble_clf.fit(X_train, y_train)

In [None]:
# y_pred = ensemble_clf.predict(X_test)

In [None]:
# # Predykcje klasowe i probabilistyczne
# y_pred = ensemble_clf.predict(X_test)
# y_proba = ensemble_clf.predict_proba(X_test)  # potrzebne do LogLoss

In [None]:

# # Macierz pomy≈Çek
# cm = confusion_matrix(y_test, y_pred)
# print("Macierz pomy≈Çek:\n", cm)

In [None]:
# # Obliczamy metryki na poziomie makro (≈õrednia po klasach)
# accuracy = accuracy_score(y_test, y_pred)
# precision = precision_score(y_test, y_pred, average='macro')
# recall = recall_score(y_test, y_pred, average='macro')
# f1 = f1_score(y_test, y_pred, average='macro')

# print(f"Accuracy: {accuracy:.4f}")
# print(f"Precision (macro): {precision:.4f}")
# print(f"Recall / Sensitivity (macro): {recall:.4f}")
# print(f"F1-score (macro): {f1:.4f}")

In [None]:
# specificity_per_class = []
# for i in range(len(cm)):
#     TP = cm[i, i]
#     FP = cm[:, i].sum() - TP
#     FN = cm[i, :].sum() - TP
#     TN = cm.sum() - (TP + FP + FN)
#     specificity = TN / (TN + FP) if (TN + FP) > 0 else 0
#     specificity_per_class.append(specificity)

# print("Specificity per class:", specificity_per_class)
# print(f"Average specificity: {np.mean(specificity_per_class):.4f}")

In [None]:
# # Konwersja etykiet na numeryczne, je≈õli trzeba
# from sklearn.preprocessing import LabelEncoder
# le = LabelEncoder()
# y_test_num = le.fit_transform(y_test)
# y_pred_num = le.transform(y_pred)

# mse = mean_squared_error(y_test_num, y_pred_num)
# rmse = np.sqrt(mse)

# print(f"MSE: {mse:.4f}")
# print(f"RMSE: {rmse:.4f}")

In [None]:
# # LogLoss (wymaga y_test w formie numerycznej oraz prawdopodobie≈Ñstw z modelu)
# logloss = log_loss(y_test_num, y_proba)
# print(f"LogLoss: {logloss:.4f}")