**оценка классификатора и поиск хороших гиперпараметров для модели**


Евгений Борисов <esborisov@sevsu.ru>


# загружаем датасет

In [1]:
from sklearn.datasets import load_wine

X,target = load_wine(return_X_y=True)
display(X.shape)

(178, 13)

In [2]:
# разделяем датасет на учебные и тестовые данные
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,target,test_size=.5)
display( ( X_train.shape, X_test.shape) )

del X
del target

((89, 13), (89, 13))

# загружаем и обучаем модель классификатора

In [3]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier().fit(X_train,y_train)

# тестируем классификатор

In [4]:
from sklearn.metrics import classification_report

predicted = model.predict(X_test)

# таблица метрик качества классификации на тестовом наборе
print( classification_report(y_test,predicted) )

              precision    recall  f1-score   support

           0       0.70      0.96      0.81        24
           1       0.79      0.66      0.72        35
           2       0.63      0.57      0.60        30

    accuracy                           0.71        89
   macro avg       0.71      0.73      0.71        89
weighted avg       0.71      0.71      0.70        89



# поиск хороших гиперпараметров модели

In [5]:
# параметры модели
help(KNeighborsClassifier)

Help on class KNeighborsClassifier in module sklearn.neighbors._classification:

class KNeighborsClassifier(sklearn.neighbors._base.KNeighborsMixin, sklearn.base.ClassifierMixin, sklearn.neighbors._base.NeighborsBase)
 |  KNeighborsClassifier(n_neighbors=5, *, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None)
 |  
 |  Classifier implementing the k-nearest neighbors vote.
 |  
 |  Read more in the :ref:`User Guide <classification>`.
 |  
 |  Parameters
 |  ----------
 |  n_neighbors : int, default=5
 |      Number of neighbors to use by default for :meth:`kneighbors` queries.
 |  
 |  weights : {'uniform', 'distance'}, callable or None, default='uniform'
 |      Weight function used in prediction.  Possible values:
 |  
 |      - 'uniform' : uniform weights.  All points in each neighborhood
 |        are weighted equally.
 |      - 'distance' : weight points by the inverse of their distance.
 |        in this case, closer neighb

In [6]:
from sklearn.model_selection import GridSearchCV

param_grid = { # списки значений гиперпараметров модели, которые будем тестировать
     'metric': ['euclidean','manhattan','chebyshev','minkowski','hamming','canberra','braycurtis',],
'n_neighbors': [1,3,5,9],
}

# запускаем поиск хороших сочетаний гиперпараметров модели
grid = GridSearchCV(
        estimator=KNeighborsClassifier(),
        param_grid=param_grid,
    ).fit(X_train,y_train)


display( grid.best_score_ )
display( grid.best_params_ )

model_ = grid.best_estimator_

0.9549019607843137

{'metric': 'canberra', 'n_neighbors': 3}

In [7]:
# проверяем результат (сравните с результатом выше, где параметры по умолчанию)
predicted_ = model_.predict(X_test)

# таблица метрик качества классификации на тестовом наборе
print( classification_report(y_test,predicted_) )

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        24
           1       1.00      0.86      0.92        35
           2       0.97      1.00      0.98        30

    accuracy                           0.94        89
   macro avg       0.94      0.95      0.94        89
weighted avg       0.95      0.94      0.94        89



In [8]:
# import pandas as pd
# pd.DataFrame(grid.cv_results_).sample(3)

In [9]:
# from sklearn.metrics import DistanceMetric

# # dir(DistanceMetric)
# DistanceMetric.get_metric?