In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.datasets import samples_generator
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np
import pandas

In [None]:
# generate dataset
N_SAMPLES = 100
N_FEATURES = 10
X, y = samples_generator.make_classification(
    n_samples=N_SAMPLES,
    n_features=N_FEATURES,
    n_informative=3,
    n_redundant=0,
    random_state=1
)

# split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=0
)

# pipline
pipeline = Pipeline([
    # preprocessing
    ('std_scale', StandardScaler()),
    ('pca', PCA()),
    # classifier
    ('svm', svm.SVC()),
])

# hyper parameters
hyper_parameters = {
    'pca__n_components': range(2, 3),
    'svm__kernel': ['linear', 'poly', 'sigmoid'],
    'svm__C': np.logspace(0, 2, 10).tolist(),
    'svm__gamma': np.logspace(-3, 0, 10).tolist()
}

# grid search cross validation
cv = KFold(
    n_splits=5,
    shuffle=True,
    random_state=1,
)
clf = GridSearchCV(
    estimator=pipeline,
    param_grid=hyper_parameters,
    cv=cv,
    n_jobs=-1,
    verbose=1,
)

# train
clf.fit(X_train, y_train)

# predict
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)

# report
print('train', classification_report(y_train, y_train_pred))
print('test', classification_report(y_test, y_test_pred))