In [9]:
# Grid Search
import numpy as np
from sklearn.datasets import make_swiss_roll

X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)
y = t > 6.9

In [10]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.decomposition import KernelPCA

# first step of the pipeline is the dimensionality reduction, then the actual model
clf = Pipeline([
    ("kpca", KernelPCA(n_components=2)),
    ("log_reg", LogisticRegression())
])

# parameters to search
param_grid = [{
    "kpca__gamma": np.linspace(0.03, 0.05, 10),
    "kpca__kernel": ["rbf", "sigmoid"]
}]

# search for the parameters
grid_search = GridSearchCV(clf, param_grid, cv=3)
grid_search.fit(X, y)

GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('kpca', KernelPCA(n_components=2)),
                                       ('log_reg', LogisticRegression())]),
             param_grid=[{'kpca__gamma': array([0.03      , 0.03222222, 0.03444444, 0.03666667, 0.03888889,
       0.04111111, 0.04333333, 0.04555556, 0.04777778, 0.05      ]),
                          'kpca__kernel': ['rbf', 'sigmoid']}])

In [11]:
# display best parameters
grid_search.best_params_

{'kpca__gamma': 0.043333333333333335, 'kpca__kernel': 'rbf'}

In [12]:
# retrain on the best parameters with the whole dataset (in case a subpart was used for the grid search to save time)
# grid_search.best_estimator_.fit(X, y)

In [14]:
from sklearn.metrics import mean_squared_error

# predict on the best parameters (on the test data) and measure the accuracy score
y_pred = grid_search.best_estimator_.predict(X)

In [16]:
from sklearn.metrics import mean_squared_error
# Measure performance using the reconstruction pre-image
# Another approach would be to use the reduced training set with the target values with different KernelPCA hyperparameters
# (basically to just use the output from this step further and compare the end results)

rbf_pca = KernelPCA(n_components=2, kernel="rbf", gamma=0.0433, fit_inverse_transform=True)
X_reduced = rbf_pca.fit_transform(X)
X_preimage = rbf_pca.inverse_transform(X_reduced)

mean_squared_error(X, X_preimage)

32.7863087957661