In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier


# 加载数据

In [2]:
data_raw = load_iris()

## 获取特征，标签数据 + 特征工程（标准化）

In [3]:
# 标准化数据
# 特征
data_feature = StandardScaler().fit_transform(data_raw.data)
# 标签
data_label = data_raw.target

In [4]:
data_feature[:5]

array([[-0.90068117,  1.01900435, -1.34022653, -1.3154443 ],
       [-1.14301691, -0.13197948, -1.34022653, -1.3154443 ],
       [-1.38535265,  0.32841405, -1.39706395, -1.3154443 ],
       [-1.50652052,  0.09821729, -1.2833891 , -1.3154443 ],
       [-1.02184904,  1.24920112, -1.34022653, -1.3154443 ]])

In [5]:
data_label

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [6]:
data_raw.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

# 训练集、测试集分割

In [7]:
# 验证集占数据集的20%
x_train, x_test, y_train, y_test = train_test_split(data_feature, data_label, test_size=0.2)

# 训练模型+交叉验证+网格搜索

## 实例化模型对象以及网格搜索

In [8]:
estimator = KNeighborsClassifier()
# 需要搜索的参数：
parameter = {"n_neighbors":[3, 5, 10, 15]}
# 3折交叉验证+网格搜索
estimator = GridSearchCV(estimator, param_grid=parameter, cv=3)

## 使用网格搜索得到的参数进行训练 

In [9]:
estimator.fit(x_train, y_train)

# 预测

In [10]:
result = estimator.predict(x_test)

compare = [i==j for i,j in zip(result, y_test)]

print(result)
print(y_test)
print(compare)

[2 1 2 0 1 0 2 1 0 2 2 2 0 1 0 0 0 0 2 0 0 1 2 1 0 1 2 1 2 0]
[2 1 2 0 1 0 2 1 0 2 2 2 0 2 0 0 0 0 2 0 0 1 2 1 0 1 2 1 2 0]
[np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.False_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_, np.True_]


In [11]:
print(estimator.best_params_)

{'n_neighbors': 10}


In [12]:
score = estimator.score(x_test,y_test)
print(score)

0.9666666666666667
