# 支持向量机实现人像分类


In [None]:
# 图像预处理
# 首先下载人脸数据集
from sklearn.datasets import fetch_lfw_people

# 加载数据集
faces = fetch_lfw_people(min_faces_per_person=60)
faces.target_names, faces.images.shape

需要注意，机器学习建模过程中，要尽量避免特征远大于样本数量的情形，这会影响训练出来模型的质量

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

### 代码开始 ### (≈4 行代码)
fig, axes = plt.subplots(1, 5, figsize=(12, 6))
for i, image in enumerate(faces.images[:5]):
    axes[i].imshow(image)
    axes[i].set_xlabel(faces.target_names[faces.target[i]])
### 代码结束 ###

In [None]:
# 压缩特征数
from sklearn.decomposition import PCA

# 直接运行，将数据特征缩减为 150 个
pca = PCA(n_components=150, whiten=True, random_state=42)
pca_data = pca.fit_transform(faces.data)
pca_data.shape

In [None]:
# 分割数据集
from sklearn.model_selection import train_test_split

### 代码开始 ### (≈1 行代码)
X_train, X_test, y_train, y_test = train_test_split(
    pca_data, faces.target, test_size=0.2, random_state=42)
### 代码结束 ###

X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
# 训练模型
### 代码开始 ### (≈4 行代码)
from sklearn.svm import SVC

model = SVC(C=10, gamma=0.001)
model.fit(X_train, y_train)
model.score(X_test, y_test)
### 代码结束 ###