In [None]:
from sklearn import svm

In [None]:
X = [[0, 0], [1, 1]]
y = [0, 1]
ss = svm.SVC()
ss.fit(X, y)
ss.predict([[2,2]])

# 手写数字识别

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import train_test_split

# 1 获取数据

In [None]:
train = pd.read_csv('./data/svm/train.csv')
train.head()

## 1.1确定目标值、特征值

In [None]:
train_image = train.iloc[:,1:]
train_image.head()

In [None]:
train_label = train.iloc[:,0]

## 1.2 查看具体图像

In [None]:
def to_plot(n):
    num = train_image.iloc[n,].values.reshape(28, 28)
    plt.imshow(num)
    plt.axis('off')
    plt.show()

In [None]:
to_plot(3)

# 2 数据基本处理
## 2.1 数据归一化处理

In [None]:
# 对数据特征值进行归一化处理
train_image = train_image.values / 255
train_label = train_label.values 

## 2.2 训练集划分

In [None]:
x_train, x_val, y_train, y_val = train_test_split(train_image, train_label, test_size=0.8, random_state=0) 

# 3 特征降维和模型训练

In [None]:
# 多次使用pca,确定最后的最优模型
import time
from sklearn.decomposition import PCA

def n_component_analysis(n, x_train, y_train, x_val, y_val):
    # 记录开始时间
    start = time.time()
    
    # PCA降维实现
    pca = PCA(n_components=n)
    print('特征降维，传递的参数为{}'.format(n))
    pca.fit(x_train)
    
    # 在训练集和测试进行降维
    x_train_pac = pca.transform(x_train)
    x_val_pac = pca.transform(x_val)
    
    # 利用SVC进行训练
    print('开始使用svc训练')
    ss = svm.SVC()
    ss.fit(x_train_pac, y_train)
    
    # 获取accuracy结果
    accuracy = ss.score(x_val_pac, y_val)
    
    # 记录结束时间
    end = time.time()
    print('准确率是:{},消耗时间:{}'.format(accuracy, int(end-start)))
    
    return accuracy

In [None]:
# 传递多个n_components,寻找合理的n_components
n_s = np.linspace(0.70, 0.9, num=5)

accuracy = []

for n in n_s:
    tmp =  n_component_analysis(n, x_train, y_train, x_val, y_val)
    accuracy.append(tmp)

In [None]:
# 准确率可视化展示
plt.plot(n_s, np.array(accuracy), 'r')
plt.show()

# 经过图形展示，选择合理的n_components，最后选择0.8

# 4 确定最优模型

In [None]:
pca = PCA(n_components=0.80)

pca.fit(x_train)
pca.n_components_

In [None]:
x_train_pca = pca.transform(x_train)
x_var_pca = pca.transform(x_val)

In [None]:
x_train_pca.shape

In [None]:
x_var_pca.shape

In [None]:
# 训练模型
ss1 = svm.SVC()

ss1.fit(x_train_pca, y_train)
ss1.score(x_var_pca, y_val)