In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# Import Twojej klasy
from mini_auto_ml import MiniAutoML

In [6]:
# ==================================================
# 1. Wczytanie danych
# ==================================================

X = pd.read_csv('../Datasets/X.csv')
y = pd.read_csv('../Datasets/y.csv')

# Jeśli y wczytało się jako DataFrame (1 kolumna), zamieniamy na Series
if isinstance(y, pd.DataFrame):
    y = y.iloc[:, 0]

print(f"Dane wczytane: X={X.shape}, y={y.shape}")


# Podział Train/Test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(f"Train: {X_train.shape}, Test: {X_test.shape}")

Dane wczytane: X=(3481, 16), y=(3481,)
Train: (2784, 16), Test: (697, 16)


In [7]:
from sklearn.linear_model import LogisticRegression

In [8]:

# ==================================================
# 2. Uruchomienie MiniAutoML
# ==================================================

mini = MiniAutoML(
    time_budget=30,       # Czas na szukanie modeli (np. 120s)
    cv_folds=5,            # Walidacja krzyżowa
    task='classification', # Ważne: typ zadania
    method='xgboost'       # Metoda selekcji cech
    ,final_estimator=LogisticRegression(random_state=42, penalty='l2', C=0.1)
    ,ensemble_type='pseudo_autogluon'
)

# A. FIT: Preprocessing -> Feature Selection -> Szukanie Modeli -> Refit
result = mini.fit(X_train, y_train)

# Sprawdzenie, jakie modele zostały wybrane
print("\n>>> Wybrane modele (Baza do stackingu):")
if hasattr(mini, 'final_models'):
    for i, m in enumerate(mini.final_models):
        print(f"   {i+1}. {m.__class__.__name__}")


[I 2026-01-26 11:50:25,749] A new study created in memory with name: no-name-801d9012-aa76-4152-8117-1dfbb7191bbd


[MiniAutoML] Krok 3: Szukanie Modeli (Budżet: 30s)...
[Sanitizer] Czyszczenie danych dla CatBoosta (klucz: 'cat_not_enc')...
[Etap 1: Hyperband] Start. Budżet: 12.0s


[I 2026-01-26 11:50:26,180] Trial 5 finished with value: -0.6957063127243088 and parameters: {'config_name': 'LogisticRegression_5'}. Best is trial 5 with value: -0.6957063127243088.
[I 2026-01-26 11:50:26,196] Trial 9 finished with value: -0.6957063127243088 and parameters: {'config_name': 'LogisticRegression_5'}. Best is trial 5 with value: -0.6957063127243088.
[I 2026-01-26 11:50:27,314] Trial 4 pruned. 
[I 2026-01-26 11:50:27,362] Trial 13 pruned. 
[I 2026-01-26 11:50:27,831] Trial 7 pruned. 
[I 2026-01-26 11:50:27,869] Trial 8 finished with value: -0.6926335162386557 and parameters: {'config_name': 'LightGBM_6'}. Best is trial 8 with value: -0.6926335162386557.
[I 2026-01-26 11:50:27,869] Trial 11 finished with value: -0.6926335162386557 and parameters: {'config_name': 'LightGBM_6'}. Best is trial 8 with value: -0.6926335162386557.
[I 2026-01-26 11:50:27,883] Trial 1 finished with value: -0.6934823618289329 and parameters: {'config_name': 'LightGBM_4'}. Best is trial 8 with value:

[Etap 2: Full CV] Weryfikacja 4 modeli.
[Etap 2] Selekcja finałowa:
 Akceptacja: LightGBM_6 (Grupa: fast_boostings) -> Score: 0.5321
 Akceptacja: LogisticRegression_5 (Grupa: simple_models) -> Score: 0.5317
 Akceptacja: XGBoost_7 (Grupa: fast_boostings) -> Score: 0.5041
[Etap 3: Greedy] Start. Kandydaci: 3. Limit: -129.28s
[Etap 3] Koniec. Wybrano 3 modeli.
[ModelSelector] Koniec procesu. Wybrano 3 modeli.
[MiniAutoML] Krok 4: Refit (3 modeli)...


ValueError: Input X contains NaN.
LogisticRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [5]:
result

<mini_auto_ml.MiniAutoML at 0x1dd435bb910>

In [6]:
np.mean(mini.predict(X_train) == y_train)

np.float64(0.6954022988505747)

In [7]:
y_pred = mini.predict(X_test)

In [8]:
np.mean(y_pred == y_test)

np.float64(0.5710186513629842)

In [9]:
y_pred_probas = mini.predict_proba(X_test)

In [10]:
from sklearn.metrics import roc_auc_score

In [11]:
roc_auc_score(y_true=y_test, y_score=y_pred_probas[:, 1])

np.float64(0.5771344845284019)

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_true=y_test, y_score=y_pred)