# 超参数：运行机器学习算法之前需要制定的参数
模型参数：在算法运行过程中学习到的参数

In [1]:
import numpy as np
from sklearn import datasets

In [2]:
digits = datasets.load_digits()
X = digits.data
y = digits.target

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=666)

In [5]:
from sklearn.neighbors import KNeighborsClassifier

In [6]:
KN_Classifier = KNeighborsClassifier(n_neighbors=3)

In [7]:
KN_Classifier.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')

In [8]:
KN_Classifier.score(X_test, y_test)

0.9888888888888889

# 调参：寻找最好score的超参数k

In [9]:
best_score = 0.0
best_k = -1
for i in range(1, 11):
    KN_Classifier = KNeighborsClassifier(n_neighbors=i)
    KN_Classifier.fit(X_train, y_train)
    score = KN_Classifier.score(X_test, y_test)
    if score > best_score:
        best_score = score
        best_k = i

print("best_score=", best_score)
print("best_k=", best_k)

best_score= 0.9916666666666667
best_k= 4


# 是否考虑距离权重weights 

In [10]:
best_method = ""
best_score = 0.0
best_k = -1
for method in ["uniform", "distance"]:
    for i in range(1, 11):
        KN_Classifier = KNeighborsClassifier(n_neighbors=i, weights=method)
        KN_Classifier.fit(X_train, y_train)
        score = KN_Classifier.score(X_test, y_test)
        if score > best_score:
            best_score = score
            best_k = i
            best_method = method

print("best_method=", best_method)
print("best_score=", best_score)
print("best_k=", best_k)

best_method= uniform
best_score= 0.9916666666666667
best_k= 4


# 搜索明可夫斯基距离P

In [11]:
%%time

best_p = -1
best_score = 0.0
best_k = -1
for i in range(1, 11):
    for p in range(1, 6):
        KN_Classifier = KNeighborsClassifier(n_neighbors=i, weights="distance", p=p)
        KN_Classifier.fit(X_train, y_train)
        score = KN_Classifier.score(X_test, y_test)
        if score > best_score:
            best_score = score
            best_k = i
            best_p = p

print("best_p=", best_p)
print("best_score=", best_score)
print("best_k=", best_k)

best_p= 2
best_score= 0.9888888888888889
best_k= 3
Wall time: 23 s
