In [1]:
# ! pip install gokinjo scikit-learn

In [2]:
import numpy as np
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate

from gokinjo.backend_sklearn import ScikitTransformer
# from gokinjo.backend_annoy import AnnoyTransformer  # alternatively

### Prepare

In [3]:
dataset = datasets.load_digits()
X, y = dataset.data, dataset.target

clf = RandomForestClassifier(n_estimators=100, random_state=42)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

### Extract feature using scikit-learn interface

In [4]:
# scikit-learn interface
k = 1
transformer = ScikitTransformer(n_neighbors=k)

In [5]:
# working space
number_of_classes = np.unique(y)
X_knn = np.empty([0, len(number_of_classes) * k])

# feature extraction
kf = KFold(n_splits=20, shuffle=False)  # use normal k-Fold to simplify
for train_index, test_index in kf.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # fit to training data
    transformer.fit(X_train, y_train)

    # extract feature to test data
    X_test_knn = transformer.transform(X_test)

    # store feature fragment
    X_knn = np.append(X_knn, X_test_knn, axis=0)

### Evaluate: raw data

In [6]:
score = cross_validate(clf, X, y, cv=skf)
score['test_score'].mean()

0.975493504158074

### Evaluate: raw -> k-NN feature

In [7]:
score = cross_validate(clf, X_knn, y, cv=skf)
score['test_score'].mean()

0.9782572815114076