# Introduction
Using iris dataset to demonstrate nested cross validation.

In [1]:
import numpy as np
from sklearn.datasets import load_iris, make_classification
from sklearn.svm import SVC
from sklearn.model_selection import LeaveOneGroupOut, KFold

# Create the dataset
iris = load_iris()
X_iris = iris.data
_, y_iris = make_classification(n_samples=iris.data.shape[0], random_state=10) # generate random binary labels
groups = iris.target # use iris flower types as machine IDs

# create a svm model
svm = SVC(kernel="rbf")


# Create spliters
outer_cv = LeaveOneGroupOut()
inner_cv = KFold(n_splits=2, shuffle=True, random_state=10)

nested_scores = {}

for inx, (train_index, test_index) in enumerate(outer_cv.split(X_iris, y_iris, groups)):
    X_inner = X_iris[train_index]
    y_inner = y_iris[train_index]

    best_model = None # TODO: use GridSearchCV
    best_score = 0
    for inner_train_index, inner_test_index in inner_cv.split(X_inner):
        svm.fit(X_inner[inner_train_index], y_inner[inner_train_index])
        current_score =  svm.score(X_inner[inner_test_index], y_inner[inner_test_index])
        if current_score > best_score:
            best_model = svm
            best_score = current_score
        
    nested_scores[inx] = best_model.score(X_iris[test_index], y_iris[test_index])

In [2]:
nested_scores

{0: 0.54, 1: 0.52, 2: 0.52}

# Using the wrapper, NestedCVWrapper

In [3]:
from utils.cv_wrapper import NestedCVWrapper

nestedCV = NestedCVWrapper(
    LeaveOneGroupOut(), 
    KFold(n_splits=2, shuffle=True, random_state=10)
)

nestedCV.run(SVC(kernel="rbf"), X_iris, y_iris, groups)

In [4]:
nestedCV.nested_scores

{0: 0.54, 1: 0.52, 2: 0.52}

In [5]:
nestedCV.nested_models

{0: SVC(), 1: SVC(), 2: SVC()}