In [1]:
import numpy as np
import pandas as pd

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('iris.csv')

In [4]:
df.drop('Id',axis=1,inplace=True)

In [5]:
df['Species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [6]:
dict1 = dict()
dict1['Iris-setosa'] = 0
dict1['Iris-versicolor'] = 1
dict1['Iris-virginica'] = 2

In [7]:
dict1

{'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}

In [8]:
df['Species'] = df['Species'].map(dict1)

In [9]:
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [10]:
X = df.drop('Species',axis=1)
y = df['Species']

**As KPCA is an unsupervised learning algorithm, there is no obvious performance measure to help you select the best kernel and hyperparameter values. That said, dimensionality reduction is often a preparation steo for a supervised learning task, so you can use grid search to select the kernel and hyperparameters that lead to the best performance on that task.**

In [15]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.decomposition import KernelPCA

clf = Pipeline([
                ('kpca',KernelPCA(n_components=2)),
                ('log_reg',LogisticRegression())
])

parameter = {
                'kpca__gamma':np.linspace(0.03,0.05,10),
                'kpca__kernel': ['rbf','sigmoid']
}
    
grid = GridSearchCV(clf,parameter,cv=5)
grid.fit(X, y)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('kpca', KernelPCA(n_components=2)),
                                       ('log_reg', LogisticRegression())]),
             param_grid={'kpca__gamma': array([0.03      , 0.03222222, 0.03444444, 0.03666667, 0.03888889,
       0.04111111, 0.04333333, 0.04555556, 0.04777778, 0.05      ]),
                         'kpca__kernel': ['rbf', 'sigmoid']})

In [16]:
grid.best_params_

{'kpca__gamma': 0.03, 'kpca__kernel': 'rbf'}

In [17]:
clf.named_steps['kpca'].set_params(gamma=0.03,kernel='rbf')

KernelPCA(gamma=0.03, kernel='rbf', n_components=2)

In [18]:
clf

Pipeline(steps=[('kpca', KernelPCA(gamma=0.03, kernel='rbf', n_components=2)),
                ('log_reg', LogisticRegression())])