In [1]:
import os
import numpy as np
from scipy import sparse
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import mglearn
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False
%config InlineBackend.figure_format = 'svg'
from mpl_toolkits.mplot3d import Axes3D, axes3d
from scipy.cluster import hierarchy
import seaborn as sns

In [41]:
from sklearn.svm import SVC
from sklearn.linear_model import Ridge
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline
from sklearn.feature_selection import SelectPercentile, f_regression

In [6]:
from sklearn.datasets import load_breast_cancer

In [7]:
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer['data'], cancer['target'], random_state=0)

In [8]:
scaler = MinMaxScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
svm = SVC().fit(X_train_scaled, y_train)
svm.score(X_test_scaled, y_test)

0.972027972027972

In [12]:
kind = [0.001, 0.01, 0.1, 1, 10, 100]
param_grid = {'C':kind, 'gamma':kind}
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=5).fit(X_train_scaled, y_train)
grid.best_score_, grid.best_params_, grid.score(X_test_scaled, y_test)

(0.9812311901504789, {'C': 1, 'gamma': 1}, 0.972027972027972)

In [15]:
pipe = Pipeline([('scaler', MinMaxScaler()), ('svm', SVC())]).fit(X_train, y_train)
pipe.score(X_test, y_test)

0.972027972027972

In [17]:
param_grid = {'svm__C':kind, 'svm__gamma':kind}
grid = GridSearchCV(pipe, param_grid=param_grid, cv=5).fit(X_train, y_train)
grid.best_score_, grid.score(X_test, y_test), grid.best_params_

(0.9812311901504789, 0.972027972027972, {'svm__C': 1, 'svm__gamma': 1})

In [21]:
rnd = np.random.RandomState(seed=0)
X = rnd.normal(size=(100, 10000))
y = rnd.normal(size=(100,))
select = SelectPercentile(score_func=f_regression, percentile=5).fit(X, y)
X_selected = select.transform(X)
X_selected.shape

(100, 500)

In [29]:
np.mean(cross_val_score(Ridge(), X_selected, y, cv=5))

0.9057953065239822

In [32]:
pipe = Pipeline([('select', SelectPercentile(score_func=f_regression, percentile=5)), ('ridge', Ridge())]).fit(X_selected, y)
cross_val_score(pipe, X, y, cv=5)

array([-0.97502994, -0.03166358, -0.03989415,  0.03018385, -0.2163673 ])

In [34]:
pipe.steps

[('select',
  SelectPercentile(percentile=5,
                   score_func=<function f_regression at 0x000001BD30196F70>)),
 ('ridge', Ridge())]

In [38]:
pipe_long = Pipeline([('scaler', MinMaxScaler()), ('svm', SVC(C=100))])
pipe_short = make_pipeline(MinMaxScaler(), SVC(C=100))

In [39]:
pipe_short.steps

[('minmaxscaler', MinMaxScaler()), ('svc', SVC(C=100))]