In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Модели «по умолчанию», которые обычно работают хорошо из коробки
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

class DefaultClassifier:
    def __init__(self, test_size=0.2, random_state=42):
        self.test_size = test_size
        self.random_state = random_state
        self.scaler = StandardScaler()
        self.models = {
            'LogisticRegression': LogisticRegression(max_iter=1000),
            'RandomForest': RandomForestClassifier(n_estimators=200, random_state=random_state),
            'SVM': SVC(kernel='rbf', probability=True),
            'KNN': KNeighborsClassifier(),
            'NaiveBayes': GaussianNB()
        }
        self.best_model = None
        self.best_score = 0
        self.results = {}

    def fit(self, X, y):
        # Разбиение данных
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=self.test_size, random_state=self.random_state, stratify=y
        )

        # Масштабирование (важно для логистической регрессии, SVM, KNN)
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scalер.transform(X_test)

        print("Обучение моделей...\n")
        print("-" * 60)

        for name, model in self.models.items():
            # Для моделей, чувствительных к масштабу, используем отмасштабированные данные
            if name in ['LogisticRegression', 'SVM', 'KNN']:
                X_tr = X_train_scaled
                X_te = X_test_scaled
            else:
                X_tr = X_train
                X_te = X_test

            # Обучение
            model.fit(X_tr, y_train)
            y_pred = model.predict(X_te)

            # Оценка
            score = accuracy_score(y_test, y_pred)
            self.results[name] = score

            print(f"{name:20} → Accuracy: {score:.4f}")

            # Сохраняем лучшую модель
            if score > self.best_score:
                self.best_score = score
                self.best_model = model
                self.best_model_name = name
                self.X_test = X_te
                self.y_test = y_test
                self.y_pred = y_pred

        print("-" * 60)
        print(f"Лучшая модель: {self.best_model_name} → {self.best_score:.4f}")

    def report(self):
        if self.best_model is None:
            print("Сначала запустите .fit()")
            return

        print(f"\nПодробный отчёт для лучшей модели: {self.best_model_name}")
        print("\nClassification Report:")
        print(classification_report(self.y_test, self.y_pred))
        print("\nConfusion Matrix:")
        print(confusion_matrix(self.y_test, self.y_pred))

    def predict(self, X_new):
        # Предобрабатываем новые данные так же
        if hasattr(self.scaler, 'scale_'):
            if self.best_model_name in ['LogisticRegression', 'SVM', 'KNN']:
                X_new = self.scaler.transform(X_new)
        return self.best_model.predict(X_new)

    def predict_proba(self, X_new):
        if hasattr(self.scaler, 'scale_'):
            if self.best_model_name in ['LogisticRegression', 'SVM', 'KNN']:
                X_new = self.scaler.transform(X_new)
        return self.best_model.predict_proba(X_new)


# Пример использования:
if __name__ == "__main__":
    from sklearn.datasets import load_iris
    data = load_iris()
    X = data.data
    y = data.target

    clf = DefaultClassifier()
    clf.fit(X, y)
    clf.report()