## 交叉验证(cross validation)

## 超参数搜索-网格搜索(Grid Search)

In [15]:
import sklearn 
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In [16]:
# 1、获取数据
iris = load_iris()

In [17]:
# 2、划分数据集
x_train, x_test, y_train, y_test  = train_test_split(iris.data, iris.target, random_state = 22)

In [18]:
# 3、特征工程：标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)

# 当对测试集进行标准化时，因为要与训练集一样的处理，所以无需fit()
x_test = transfer.transform(x_test)

In [23]:
# 4、 KNN算法预估器
estimator = KNeighborsClassifier(n_neighbors=7)

In [24]:
# 5、 网格搜索与交叉验证
param_dict = {"n_neighbors":[1,3,5,7,9,11]}

estimator = GridSearchCV(estimator, param_grid=param_dict, cv=10)

In [25]:
# 6. KNN fit()
estimator.fit(x_train, y_train)



GridSearchCV(cv=10, error_score='raise-deprecating',
             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,
                                            metric='minkowski',
                                            metric_params=None, n_jobs=None,
                                            n_neighbors=7, p=2,
                                            weights='uniform'),
             iid='warn', n_jobs=None,
             param_grid={'n_neighbors': [1, 3, 5, 7, 9, 11]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [26]:
# 7、模型预估
# 方法1:直接比对真实值和预测值
y_predict = estimator.predict(x_test)

print("y_predict:\n",y_predict)
print("直接比对真实值和预测值:\n", y_test == y_predict)

# 方法2:计算准确率
score = estimator.score(x_test, y_test)
print("准确率为:\n",score)

# 查看参数

print("最佳参数：/n", estimator.best_params_)
print("最佳结果：/n", estimator.best_score_)
print("最佳估计器：/n", estimator.best_estimator_)
print("交叉验证结果：/n", estimator.cv_results_)

y_predict:
 [0 2 1 2 1 1 1 1 1 0 2 1 2 2 0 2 1 1 1 1 0 2 0 1 2 0 2 2 2 2 0 0 1 1 1 0 0
 0]
直接比对真实值和预测值:
 [ True  True  True  True  True  True  True False  True  True  True  True
  True  True  True  True  True  True False  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True]
准确率为:
 0.9473684210526315
最佳参数：/n {'n_neighbors': 7}
最佳结果：/n 0.9642857142857143
最佳估计器：/n KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=7, p=2,
                     weights='uniform')
交叉验证结果：/n {'mean_fit_time': array([0.00040574, 0.00032601, 0.00028596, 0.00025895, 0.00025907,
       0.00027893]), 'std_fit_time': array([1.08551002e-04, 6.02981468e-05, 4.21912243e-05, 9.95519625e-06,
       5.12372565e-06, 8.12103834e-05]), 'mean_score_time': array([0.00102944, 0.00086834, 0.00073111, 0.00074193, 0.00078964,
       0.00071754]), 'std_score_time': array([2.29774804