In [15]:
from sklearn.datasets import load_digits
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
from sklearn import metrics
import numpy as np
digits = load_digits()

In [16]:
#划分训练集和测试集
x_train,x_test,y_train,y_test = train_test_split(digits.data,digits.target)

In [18]:
#PCA降维再svm分类
pipe = Pipeline([('reduce_dim',PCA()),('classify',SVC())])
N_FEATURES_OPTIONS = [2,4,8,16]
C_OPTIONS = [0.01,0.1,1,10]
KERNEL = ['linear','rbf']
param_grid = [
    {
        'reduce_dim':[PCA(iterated_power=7)],
        'reduce_dim__n_components':N_FEATURES_OPTIONS,
        'classify__C':C_OPTIONS,
        'classify__kernel':KERNEL
    },
    {
        'reduce_dim':[SelectKBest(chi2)],
        'reduce_dim__k':N_FEATURES_OPTIONS,
        'classify__C':C_OPTIONS,
        'classify__kernel':KERNEL
    },
]
grid = GridSearchCV(pipe, cv=3, n_jobs=2, param_grid=param_grid)
grid.fit(x_train, y_train)
#保存模型
joblib.dump(grid,'pca_svm_model.m')

['pca_svm_model.m']

In [24]:
#预测
y_hat = grid.predict(x_test)
precision = metrics.precision_score(y_test,y_hat,average=None)
print('Precision:%s'%(precision))

[ 1.          0.97959184  1.          0.90243902  0.97619048  1.          1.
  1.          0.97826087  0.975     ]


In [25]:
#最好的estimator
grid.best_estimator_

Pipeline(steps=[('reduce_dim', PCA(copy=True, iterated_power=7, n_components=16, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('classify', SVC(C=0.01, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

In [26]:
#数据进行缩放预处理再使用LinearSVC分类
pipe = Pipeline([('scaler',preprocessing.MinMaxScaler()),('classify',LinearSVC())])
# C_OPTIONS = [0.01,0.1,1,10]
C_OPTIONS = np.logspace(-3, 4, 8)
# RANDOM_STATE = [0,42]
# MAX_ITER = [800,1000,1200]
# PENALTY = ['l1','l2']
# LOSS = ['hinge','squared_hinge']
param_grid = {
        'classify__C':C_OPTIONS,
}
grid = GridSearchCV(pipe, param_grid ,cv=3, n_jobs=2)
grid.fit(x_train, y_train)
#保存模型
joblib.dump(grid,'scaler_linearsvm_model.m')

['scaler_linearsvm_model.m']

In [27]:
#预测
y_hat = grid.predict(x_test)
precision = metrics.precision_score(y_test,y_hat,average=None)
print('Precision:%s'%(precision))

Precision:[ 0.97674419  0.97916667  1.          0.97297297  0.97619048  0.97619048
  0.97826087  0.95744681  0.97916667  0.975     ]
