In [1]:
from sklearn import cross_validation, datasets

import numpy as np

In [2]:
iris = datasets.load_iris()

In [3]:
train_data, test_data, train_labels, test_labels = \
cross_validation.train_test_split(iris.data, iris.target, test_size=0.3)

In [4]:
print(len(test_data)/len(iris.data))

0.3


In [5]:
print('train_data size: {}\ntest_data size: {}'.format(len(train_data), len(test_data)))

train_data size: 105
test_data size: 45


In [7]:
print('train_data:\n{}\ntest_data:\n{}'.format(train_data[:5], test_data[:5]))

train_data:
[[ 5.2  4.1  1.5  0.1]
 [ 5.   3.6  1.4  0.2]
 [ 6.7  3.1  5.6  2.4]
 [ 6.4  2.9  4.3  1.3]
 [ 6.1  2.8  4.7  1.2]]
test_data:
[[ 5.5  3.5  1.3  0.2]
 [ 5.5  2.4  3.8  1.1]
 [ 6.7  3.3  5.7  2.5]
 [ 5.1  3.7  1.5  0.4]
 [ 7.3  2.9  6.3  1.8]]


In [8]:
print('train_labels:\n{}\ntest_labels:\n{}'.format(train_labels[:5], test_labels[:5]))

train_labels:
[0 0 2 1 1]
test_labels:
[0 1 2 0 2]


# Kfold

In [10]:
for train_indices, test_indices in cross_validation.KFold(10, n_folds=5):
    print(train_indices, test_indices)

[2 3 4 5 6 7 8 9] [0 1]
[0 1 4 5 6 7 8 9] [2 3]
[0 1 2 3 6 7 8 9] [4 5]
[0 1 2 3 4 5 8 9] [6 7]
[0 1 2 3 4 5 6 7] [8 9]


In [13]:
for train_indices, test_indices in cross_validation.KFold(10, n_folds=2, shuffle=True):
    print(train_indices, test_indices)

[2 3 4 7 9] [0 1 5 6 8]
[0 1 5 6 8] [2 3 4 7 9]


In [12]:
for train_indices, test_indices in cross_validation.KFold(10, n_folds=2, 
                                                          shuffle=True, random_state=1):
    print(train_indices, test_indices)

[1 3 5 7 8] [0 2 4 6 9]
[0 2 4 6 9] [1 3 5 7 8]


# StratifiedKFold

In [15]:
target = np.array([0]*5 + [1]*5)
print(target)

for train_indices, test_indices in cross_validation.StratifiedKFold(target, 
                                                                    n_folds=2, 
                                                                    shuffle=True,
                                                                   random_state=0):
    print(train_indices, test_indices)

[0 0 0 0 0 1 1 1 1 1]
[3 4 8 9] [0 1 2 5 6 7]
[0 1 2 5 6 7] [3 4 8 9]


In [16]:
target = np.array([0, 1]*5)
print(target)

for train_indices, test_indices in cross_validation.StratifiedKFold(target, 
                                                                    n_folds=2, 
                                                                    shuffle=True,
                                                                   random_state=0):
    print(train_indices, test_indices)

[0 1 0 1 0 1 0 1 0 1]
[6 7 8 9] [0 1 2 3 4 5]
[0 1 2 3 4 5] [6 7 8 9]


# ShuffleSplit

In [17]:
for train_indices, test_indices in cross_validation.ShuffleSplit(10, 
                                                                 n_iter=10, 
                                                                 test_size=0.2):
    print(train_indices, test_indices)

[0 2 4 3 9 7 6 5] [1 8]
[2 8 6 5 1 7 4 0] [9 3]
[1 5 9 6 0 3 8 4] [7 2]
[5 2 3 9 6 4 0 7] [1 8]
[5 7 1 2 0 8 6 9] [3 4]
[1 3 5 0 2 9 7 8] [4 6]
[4 9 7 2 0 3 1 8] [6 5]
[9 0 1 7 2 4 3 6] [5 8]
[0 9 5 6 7 2 4 8] [1 3]
[2 8 1 3 7 0 6 5] [4 9]


# StratifiedShuffleSplit

In [19]:
target = np.array([0]*5 + [1]*5)
print(target)
for train_indices, test_indices in cross_validation.StratifiedShuffleSplit(target, 
                                                                 n_iter=4, 
                                                                 test_size=0.2):
    print(train_indices, test_indices)

[0 0 0 0 0 1 1 1 1 1]
[8 6 4 5 7 3 2 0] [9 1]
[2 4 5 0 3 7 8 9] [1 6]
[3 8 6 2 7 0 5 4] [9 1]
[8 3 0 1 4 6 7 5] [2 9]


# Leave_one_Out

In [20]:
for train_indices, test_indices in cross_validation.LeaveOneOut(10):
    print(train_indices, test_indices)

[1 2 3 4 5 6 7 8 9] [0]
[0 2 3 4 5 6 7 8 9] [1]
[0 1 3 4 5 6 7 8 9] [2]
[0 1 2 4 5 6 7 8 9] [3]
[0 1 2 3 5 6 7 8 9] [4]
[0 1 2 3 4 6 7 8 9] [5]
[0 1 2 3 4 5 7 8 9] [6]
[0 1 2 3 4 5 6 8 9] [7]
[0 1 2 3 4 5 6 7 9] [8]
[0 1 2 3 4 5 6 7 8] [9]
