In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.datasets import samples_generator
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np
import pandas



In [2]:
# generate dataset
N_SAMPLES = 100
N_FEATURES = 10
X, y = samples_generator.make_classification(
    n_samples=N_SAMPLES,
    n_features=N_FEATURES,
    n_informative=3,
    n_redundant=0,
    random_state=1
)

# split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=0
)

# pipline
pipeline = Pipeline([
    # preprocessing
    ('std_scale', StandardScaler()),
    ('pca', PCA()),
    # classifier
    ('svm', svm.SVC()),
])

# hyper parameters
hyper_parameters = {
    'pca__n_components': range(2, 3),
    'svm__kernel': ['linear', 'poly', 'sigmoid'],
    'svm__C': np.logspace(0, 2, 10).tolist(),
    'svm__gamma': np.logspace(-3, 0, 10).tolist()
}

# grid search cross validation
cv = KFold(
    n_splits=5,
    shuffle=True,
    random_state=1,
)
clf = GridSearchCV(
    estimator=pipeline,
    param_grid=hyper_parameters,
    cv=cv,
    n_jobs=-1,
    verbose=1,
)

# train
clf.fit(X_train, y_train)

# predict
y_train_pred = clf.predict(X_train)
y_test_pred = clf.predict(X_test)

# report
print('train', classification_report(y_train, y_train_pred))
print('test', classification_report(y_test, y_test_pred))

Fitting 5 folds for each of 300 candidates, totalling 1500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    1.4s


[Parallel(n_jobs=-1)]: Done 1400 tasks      | elapsed:    2.7s


train               precision    recall  f1-score   support

           0       0.68      0.61      0.64        44
           1       0.57      0.64      0.61        36

    accuracy                           0.62        80
   macro avg       0.62      0.63      0.62        80
weighted avg       0.63      0.62      0.63        80

test               precision    recall  f1-score   support

           0       0.55      0.86      0.67         7
           1       0.89      0.62      0.73        13

    accuracy                           0.70        20
   macro avg       0.72      0.74      0.70        20
weighted avg       0.77      0.70      0.71        20



[Parallel(n_jobs=-1)]: Done 1500 out of 1500 | elapsed:    3.3s finished
