In [1]:
# 导入数据并描述
from sklearn.datasets import load_iris
iris = load_iris()
print(iris.feature_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [2]:
print(iris.target_names)
# print(iris.data)
# print(iris.target) 

['setosa' 'versicolor' 'virginica']


In [4]:
# 加载包
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

In [5]:
# 第一步 加载数据
iris = load_iris()
X,y = iris.data,iris.target

In [6]:
# 第二步 特征提取
# 主成分法：通过奇异值分解实现降维
pca = PCA(n_components=2)

In [7]:
# 最大分数法
# 根据方差分析计算类标签与特征之间的F值，进而选择F值最大的特征。
selection = SelectKBest(k=1)

In [8]:
# 集成方法
# 组合pca与单变量选择器，建立特征选择估计量。
combined_features = FeatureUnion(
    [
        ("pca",pca),("univ_select",selection)
    ]

)
# 参数是元组(tuples)型的，在每一个元组里，前一半是转换器的名字，后一半是转换器

In [10]:
# 建立估计量后，使用它的组合特征转换数据集
X_features = combined_features.fit(X,y).transform(X)

In [11]:
# 建立线性核函数的C支持向量分类器
svm = SVC(kernel="linear")

In [15]:
# pipline

pipeline = Pipeline(
    [
        ("features",combined_features),
        ("svm",svm)
    ]
)

In [13]:
# 最后在三种特征提取方法上做网格搜索

param_grid = dict(features__pca__n_components=[1, 2, 3],
                  features__univ_select__k=[1, 2],
                  svm__C=[0.1, 1, 10])

In [16]:
grid_search = GridSearchCV(
                            pipeline,
                               param_grid=param_grid,
verbose=10
)
grid_search.fit(X,y)
print(grid_search.best_estimator_)

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV] features__univ_select__k=1, features__pca__n_components=1, svm__C=0.1 
[CV]  features__univ_select__k=1, features__pca__n_components=1, svm__C=0.1, score=0.9607843137254902, total=   0.0s
[CV] features__univ_select__k=1, features__pca__n_components=1, svm__C=0.1 
[CV]  features__univ_select__k=1, features__pca__n_components=1, svm__C=0.1, score=0.9019607843137255, total=   0.0s
[CV] features__univ_select__k=1, features__pca__n_components=1, svm__C=0.1 
[CV]  features__univ_select__k=1, features__pca__n_components=1, svm__C=0.1, score=0.9791666666666666, total=   0.0s
[CV] features__univ_select__k=1, features__pca__n_components=1, svm__C=1 
[CV]  features__univ_select__k=1, features__pca__n_components=1, svm__C=1, score=0.9411764705882353, total=   0.0s
[CV] features__univ_select__k=1, features__pca__n_components=1, svm__C=1 
[CV]  features__univ_select__k=1, features__pca__n_components=1, svm__C=1, score=0.92156862745098

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.1s remaining:    0.0s


[CV]  features__univ_select__k=1, features__pca__n_components=1, svm__C=10, score=0.9791666666666666, total=   0.0s
[CV] features__univ_select__k=2, features__pca__n_components=1, svm__C=0.1 
[CV]  features__univ_select__k=2, features__pca__n_components=1, svm__C=0.1, score=0.9607843137254902, total=   0.0s
[CV] features__univ_select__k=2, features__pca__n_components=1, svm__C=0.1 
[CV]  features__univ_select__k=2, features__pca__n_components=1, svm__C=0.1, score=0.9215686274509803, total=   0.0s
[CV] features__univ_select__k=2, features__pca__n_components=1, svm__C=0.1 
[CV]  features__univ_select__k=2, features__pca__n_components=1, svm__C=0.1, score=0.9791666666666666, total=   0.0s
[CV] features__univ_select__k=2, features__pca__n_components=1, svm__C=1 
[CV]  features__univ_select__k=2, features__pca__n_components=1, svm__C=1, score=0.9607843137254902, total=   0.0s
[CV] features__univ_select__k=2, features__pca__n_components=1, svm__C=1 
[CV]  features__univ_select__k=2, features

[CV] features__univ_select__k=2, features__pca__n_components=3, svm__C=10 
[CV]  features__univ_select__k=2, features__pca__n_components=3, svm__C=10, score=1.0, total=   0.0s
Pipeline(memory=None,
     steps=[('features', FeatureUnion(n_jobs=1,
       transformer_list=[('pca', PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('univ_select', SelectKBest(k=2, score_func=<function f_classif at 0x000001C75E4D6048>))],
       transfo...,
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])


[Parallel(n_jobs=1)]: Done  54 out of  54 | elapsed:    0.6s finished
