In [120]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split

from sklearn import svm
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier

In [121]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [122]:
names = ['SVM linear',
         'Nearest Neighbors 3',
         'Linear Discriminant Analysis'
         ]

classifiers = [
    svm.SVC(kernel='linear', max_iter=1000),
    KNeighborsClassifier(n_neighbors=3),
    LinearDiscriminantAnalysis(solver='svd'),

]

In [123]:
from sklearn.datasets import load_wine

data = load_wine()

X, y = data.data, data.target
feature_names = data.feature_names

In [124]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [125]:
best_features = {}
best_accuracies = {}

In [126]:
from itertools import combinations
from sklearn.metrics import accuracy_score, classification_report

for clf_name, clf in zip(names, classifiers):
    best_accuracy = 0
    best_pair = None

    for feature_pair in combinations(range(X.shape[1]), 2):
        X_train_pair = X_train[:, list(feature_pair)]
        X_test_pair = X_test[:, list(feature_pair)]

        clf.fit(X_train_pair, y_train)
        y_pred = clf.predict(X_test_pair)
        accuracy = accuracy_score(y_test, y_pred)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_pair = feature_pair

    best_features[clf_name] = [feature_names[i] for i in best_pair]
    best_accuracies[clf_name] = best_accuracy
    print(f"{clf_name} - Лучшая пара признаков: {best_features[clf_name]}, Точность: {best_accuracy:.4f}")

    X_train_best = X_train[:, list(best_pair)]
    X_test_best = X_test[:, list(best_pair)]
    clf.fit(X_train_best, y_train)
    y_pred_best = clf.predict(X_test_best)
    print(f"\nClassification Report for {clf_name}:\n{classification_report(y_test, y_pred_best)}\n")

SVM linear - Лучшая пара признаков: ['alcohol', 'flavanoids'], Точность: 0.9444

Classification Report for SVM linear:
              precision    recall  f1-score   support

           0       0.90      1.00      0.95        19
           1       1.00      0.90      0.95        21
           2       0.93      0.93      0.93        14

    accuracy                           0.94        54
   macro avg       0.94      0.94      0.94        54
weighted avg       0.95      0.94      0.94        54


Nearest Neighbors 3 - Лучшая пара признаков: ['alcohol', 'flavanoids'], Точность: 0.9074

Classification Report for Nearest Neighbors 3:
              precision    recall  f1-score   support

           0       0.90      1.00      0.95        19
           1       0.94      0.81      0.87        21
           2       0.87      0.93      0.90        14

    accuracy                           0.91        54
   macro avg       0.91      0.91      0.91        54
weighted avg       0.91      0.91   

In [127]:
##Вывод
print("\nИтоговые лучшие пары признаков и точности:")
for clf_name in best_features:
    print(f"{clf_name}:\n\tПара признаков - {best_features[clf_name]},\n\taccuracy - {best_accuracies[clf_name]:.4f}")

##Лучше всего себя показал LDA и пара 'hue', 'proline'
##Скорее всего это связанно с тем, что эта пара наиболее хорошо разделимая
##Nearest Neighbors 3 показал себя хуже всего, потому что количество соседей для данного дата сета выбрано не оптимально.


Итоговые лучшие пары признаков и точности:
SVM linear:
	Пара признаков - ['alcohol', 'flavanoids'],
	accuracy - 0.9444
Nearest Neighbors 3:
	Пара признаков - ['alcohol', 'flavanoids'],
	accuracy - 0.9074
Linear Discriminant Analysis:
	Пара признаков - ['hue', 'proline'],
	accuracy - 0.9630
