In [1]:
# ! pip install gokinjo scikit-learn

In [2]:
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate
from sklearn.manifold import TSNE

from gokinjo import knn_kfold_extract

### Prepare

In [3]:
dataset = datasets.load_digits()
X, y = dataset.data, dataset.target

clf = RandomForestClassifier(n_estimators=100, random_state=42)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

### Step 1: raw data

In [4]:
score = cross_validate(clf, X, y, cv=skf)
score['test_score'].mean()

0.975493504158074

### Step 2: raw -> k-NN feature

In [5]:
X_knn_feature_raw = knn_kfold_extract(X, y)
score = cross_validate(clf, X_knn_feature_raw, y, cv=skf)
score['test_score'].mean()

0.9788486920319253

### Step 3: raw -> t-SNE feature

In [6]:
tsne = TSNE()
X_transformed = tsne.fit_transform(X)
score = cross_validate(clf, X_transformed, y, cv=skf)
score['test_score'].mean()

0.9872038651829402

### Step 4: raw -> t-SNE -> k-NN stacking

In [7]:
# pattern 4: raw -> t-SNE -> k-NN stacking
X_knn_feature_tsne = knn_kfold_extract(X_transformed, y)
score = cross_validate(clf, X_knn_feature_tsne, y, cv=skf)
score['test_score'].mean()

0.9899682038544867