In [1]:
from sklearn.datasets import make_classification
import pandas as pd
import numpy as np

In [2]:
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

In [3]:
def k_fold_indices(data, k):
    fold_size = len(data) // k
    indices = np.arange(len(data))
    folds = []
    for i in range(k):
        test_indices = indices[i*fold_size : (i+1) * fold_size]
        train_indices = np.concatenate([indices[:(i * fold_size)], indices[(i+1)*fold_size :]])
        folds.append((train_indices, test_indices))
        
    return folds

In [4]:
X, y = make_classification(n_samples = 10, n_features = 4, n_classes = 2)

In [5]:
X

array([[ 0.26873299,  1.87842041,  1.25802716,  0.07917083],
       [-0.79789995,  1.17266552,  1.23437875,  0.10342124],
       [-1.09808556, -0.70831385, -0.01090763,  0.02588079],
       [-0.53230188, -1.9094698 , -1.1583332 , -0.06599012],
       [-0.5204572 ,  1.71119242,  1.50186279,  0.11491328],
       [-0.0941372 , -1.22116717, -0.85530947, -0.0559742 ],
       [ 0.76744182,  1.75070859,  0.93211053,  0.04488059],
       [-1.74156476, -1.11637901, -0.0121403 ,  0.04139837],
       [ 0.36411633, -1.15530867, -1.01989955, -0.07829535],
       [ 2.27619091, -0.71624786, -1.58571657, -0.1631935 ]])

In [6]:
k = 5

fold_indices = k_fold_indices(X, k)

In [7]:
model = DecisionTreeClassifier()
scores = []

for train_indices, test_indices in fold_indices:
    X_train, y_train = X[train_indices], y[train_indices]
    X_test, y_test = X[test_indices], y[test_indices]
    
    print("Train Data:")
    print(X_train)
    print("Test Data:")
    print(X_test)
    
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    fold_score = accuracy_score(y_test, y_pred)
    scores.append(fold_score)
    print()

Train Data:
[[-1.09808556 -0.70831385 -0.01090763  0.02588079]
 [-0.53230188 -1.9094698  -1.1583332  -0.06599012]
 [-0.5204572   1.71119242  1.50186279  0.11491328]
 [-0.0941372  -1.22116717 -0.85530947 -0.0559742 ]
 [ 0.76744182  1.75070859  0.93211053  0.04488059]
 [-1.74156476 -1.11637901 -0.0121403   0.04139837]
 [ 0.36411633 -1.15530867 -1.01989955 -0.07829535]
 [ 2.27619091 -0.71624786 -1.58571657 -0.1631935 ]]
Test Data:
[[ 0.26873299  1.87842041  1.25802716  0.07917083]
 [-0.79789995  1.17266552  1.23437875  0.10342124]]

Train Data:
[[ 0.26873299  1.87842041  1.25802716  0.07917083]
 [-0.79789995  1.17266552  1.23437875  0.10342124]
 [-0.5204572   1.71119242  1.50186279  0.11491328]
 [-0.0941372  -1.22116717 -0.85530947 -0.0559742 ]
 [ 0.76744182  1.75070859  0.93211053  0.04488059]
 [-1.74156476 -1.11637901 -0.0121403   0.04139837]
 [ 0.36411633 -1.15530867 -1.01989955 -0.07829535]
 [ 2.27619091 -0.71624786 -1.58571657 -0.1631935 ]]
Test Data:
[[-1.09808556 -0.70831385 -0.010

In [8]:
mean_score = np.mean(scores)

In [9]:
scores

[1.0, 1.0, 1.0, 1.0, 1.0]

In [10]:
mean_score

1.0

In [11]:
X, y = make_classification(n_samples = 5, n_features = 4, n_classes = 2)

In [12]:
X

array([[-0.88862148, -0.61982482, -0.43226031, -0.49932794],
       [-0.04595153,  0.13127699, -0.12473522, -0.13499417],
       [ 0.05028376,  2.48049073, -1.50844908, -1.60632854],
       [ 1.46430106,  1.01728086,  0.71485586,  0.82554217],
       [ 2.64012228, -0.63396495,  2.83601535,  3.13820003]])

In [13]:
k = 5

fold_indices = k_fold_indices(X, k)

In [14]:
fold_indices

[(array([1, 2, 3, 4]), array([0])),
 (array([0, 2, 3, 4]), array([1])),
 (array([0, 1, 3, 4]), array([2])),
 (array([0, 1, 2, 4]), array([3])),
 (array([0, 1, 2, 3]), array([4]))]

In [15]:
model = DecisionTreeClassifier()
scores = []

for train_indices, test_indices in fold_indices:
    X_train, y_train = X[train_indices], y[train_indices]
    X_test, y_test = X[test_indices], y[test_indices]
    
    print("Train Data:")
    print(X_train)
    print("Test Data:")
    print(X_test)
    
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    fold_score = accuracy_score(y_test, y_pred)
    scores.append(fold_score)
    print()

Train Data:
[[-0.04595153  0.13127699 -0.12473522 -0.13499417]
 [ 0.05028376  2.48049073 -1.50844908 -1.60632854]
 [ 1.46430106  1.01728086  0.71485586  0.82554217]
 [ 2.64012228 -0.63396495  2.83601535  3.13820003]]
Test Data:
[[-0.88862148 -0.61982482 -0.43226031 -0.49932794]]

Train Data:
[[-0.88862148 -0.61982482 -0.43226031 -0.49932794]
 [ 0.05028376  2.48049073 -1.50844908 -1.60632854]
 [ 1.46430106  1.01728086  0.71485586  0.82554217]
 [ 2.64012228 -0.63396495  2.83601535  3.13820003]]
Test Data:
[[-0.04595153  0.13127699 -0.12473522 -0.13499417]]

Train Data:
[[-0.88862148 -0.61982482 -0.43226031 -0.49932794]
 [-0.04595153  0.13127699 -0.12473522 -0.13499417]
 [ 1.46430106  1.01728086  0.71485586  0.82554217]
 [ 2.64012228 -0.63396495  2.83601535  3.13820003]]
Test Data:
[[ 0.05028376  2.48049073 -1.50844908 -1.60632854]]

Train Data:
[[-0.88862148 -0.61982482 -0.43226031 -0.49932794]
 [-0.04595153  0.13127699 -0.12473522 -0.13499417]
 [ 0.05028376  2.48049073 -1.50844908 -1.60

In [16]:
mean_score = np.mean(scores)

In [17]:
scores

[1.0, 1.0, 1.0, 0.0, 0.0]

In [18]:
mean_score

0.6

In [19]:
X, y = make_classification(n_samples = 20, n_features = 4, n_classes = 2)

In [20]:
k = 10

fold_indices = k_fold_indices(X, k)

In [21]:
fold_indices

[(array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         19]),
  array([0, 1])),
 (array([ 0,  1,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         19]),
  array([2, 3])),
 (array([ 0,  1,  2,  3,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         19]),
  array([4, 5])),
 (array([ 0,  1,  2,  3,  4,  5,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         19]),
  array([6, 7])),
 (array([ 0,  1,  2,  3,  4,  5,  6,  7, 10, 11, 12, 13, 14, 15, 16, 17, 18,
         19]),
  array([8, 9])),
 (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 12, 13, 14, 15, 16, 17, 18,
         19]),
  array([10, 11])),
 (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 14, 15, 16, 17, 18,
         19]),
  array([12, 13])),
 (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 17, 18,
         19]),
  array([14, 15])),
 (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 18,
         19]),
  array([16, 17])),
 (

In [22]:
model = DecisionTreeClassifier()
scores = []

for train_indices, test_indices in fold_indices:
    X_train, y_train = X[train_indices], y[train_indices]
    X_test, y_test = X[test_indices], y[test_indices]
    
    print("Train Data:")
    print(X_train)
    print("Test Data:")
    print(X_test)
    
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    fold_score = accuracy_score(y_test, y_pred)
    scores.append(fold_score)
    print()

Train Data:
[[-2.99003142 -2.10303871 -2.79449539 -1.49989063]
 [ 0.57789222  1.19575619 -0.05495273 -0.90869696]
 [ 1.72669956  0.77092431  1.94817437  1.53971784]
 [ 1.37662127  1.64975242  0.77280587 -0.34434583]
 [ 0.44285383 -0.67028384  1.15404955  1.71300872]
 [ 1.77081465  1.3297011   1.59153288  0.76043472]
 [-0.89371165  0.29571255 -1.53210344 -1.85191615]
 [-0.01573629  0.55222982 -0.43937975 -0.86329011]
 [-0.39818879 -0.7084339  -0.04920068  0.45075448]
 [-0.57624547 -1.21278432  0.07020257  0.93713994]
 [-3.15922096 -2.05848479 -3.07592417 -1.83312533]
 [-1.61347211 -0.15815557 -2.2442805  -2.29250459]
 [-0.33946779  0.46748658 -0.84971358 -1.2427655 ]
 [-0.3365713   0.41032634 -0.80237728 -1.15141815]
 [-0.34605448 -1.08950046  0.31445651  1.11125865]
 [-0.02123848 -0.94705056  0.68287371  1.4048059 ]
 [ 0.8233793   0.79062243  0.610084    0.09186085]
 [ 2.75229292  1.41572027  2.96441329  2.17043951]]
Test Data:
[[ 1.04175586  0.53089523  1.12578498  0.82905601]
 [-0.97

In [23]:
mean_score = np.mean(scores)

In [24]:
scores

[0.5, 1.0, 0.5, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.5]

In [25]:
mean_score

0.75