# 超参数和模型参数
* 超参数: 在算法运行前需要确定的参数
* 模型参数: 算法过程中学习的参数

* kNN算法没有模型参数
* kNN算法中的k是典型的超参数

In [10]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

In [11]:
digits = datasets.load_digits()
X = digits.data
y = digits.target

In [12]:
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [13]:
my_knn_clf = KNeighborsClassifier(n_neighbors=4)
my_knn_clf.fit(x_train,y_train)
my_knn_clf.score(x_test,y_test)

0.9888888888888889

## 寻找好的超参数
- 领域知识
- 经验数值
- 实验搜索

## 寻找最好的K

In [14]:
best_score = 0.0
best_k=-1
for k in range(1,11):
    knn_clf = KNeighborsClassifier(n_neighbors=k)
    knn_clf.fit(x_train,y_train)
    score = knn_clf.score(x_test,y_test)
    if score>best_score:
        best_k=k
        best_score=score
        
print("best_k=",best_k)
print("best_score=",best_score)

best_k= 1
best_score= 0.9916666666666667


## kNN的距离问题

In [16]:
best_score = 0.0
best_k=-1
best_method = ""
for method in ["uniform","distance"]:
    for k in range(1,11):
        knn_clf = KNeighborsClassifier(n_neighbors=k,weights=method)
        knn_clf.fit(x_train,y_train)
        score = knn_clf.score(x_test,y_test)
        if score>best_score:
            best_k=k
            best_score=score
            best_method=method
        
print("best_k=",best_k)
print("best_score=",best_score)
print("best_method=",best_method)

best_k= 1
best_score= 0.9916666666666667
best_method= uniform


## 明科夫斯基距离

In [19]:
%%time
best_score = 0.0
best_k=-1
best_p = -1

for k in range(1,11):
    for p in range(1,6):    
        knn_clf = KNeighborsClassifier(n_neighbors=k,weights="distance",p=p)
        knn_clf.fit(x_train,y_train)
        score = knn_clf.score(x_test,y_test)
        if score>best_score:
            best_k=k
            best_score=score
            best_p=p
        
print("best_k=",best_k)
print("best_score=",best_score)
print("best_p=",best_p)

best_k= 5
best_score= 0.9944444444444445
best_p= 4
CPU times: user 44 s, sys: 3.94 ms, total: 44 s
Wall time: 44 s
