In [124]:
"""
Testing of generate_k_fold_set() function with test set modification
""";

In [1]:
import numpy as np
import os
import pickle
import argparse
import numpy as np
import time
import tqdm

In [2]:
def traffic_sign(aligned=True):
    if aligned:
        return load_traffic('data', kind='aligned')
    return load_traffic('data', kind='unaligned')

In [3]:
def load_traffic(path, kind='train', subclass=None):
    import os
    import gzip
    import numpy as np

    t_file = "./train" + "_wb_" + kind +".p"

    """Load traffic data from `path`"""
    with open(t_file, mode='rb') as f:
        train = pickle.load(f)

    images, labels = train['features'], train['labels']
    images = images.reshape((images.shape[0], -1))

    return images, labels

In [4]:
load_data = traffic_sign

In [5]:
X, y = load_data()

In [83]:
"""
Original function
"""
def generate_k_fold_set(dataset, k = 5): 
    
    X, y = dataset
    order = np.random.permutation(len(X))
    fold_width = len(X) // k
    l_idx, r_idx = 0, fold_width
    
    for i in range(k):
        train = np.concatenate([X[order[:l_idx]], X[order[r_idx:]]]), np.concatenate([y[order[:l_idx]], y[order[r_idx:]]])
        validation = X[order[l_idx:r_idx]], y[order[l_idx:r_idx]]
        yield train, validation
        l_idx, r_idx = r_idx, r_idx + fold_width


In [None]:
"""
Modified function to include test set generation
"""
def generate_k_fold_set(dataset, k = 5): 
    
    X, y = dataset
    m = len(X)
    order = np.random.permutation(m)
    fold_width = m // k
    l_idx = 0
    m_idx = 1 * fold_width
    r_idx = 2 * fold_width
    
    for i in range(k):
        train = np.concatenate([X[order[:l_idx]], X[order[r_idx:]]]), np.concatenate([y[order[:l_idx]], y[order[r_idx:]]])
        validation = X[order[l_idx:m_idx]], y[order[l_idx:m_idx]]
        test = X[order[m_idx:r_idx]], y[order[m_idx:r_idx]]
        yield train, validation, test
        l_idx, m_idx, r_idx = m_idx, m_idx + fold_width, (r_idx + fold_width) % m
    

##### Testing generation of k-fold sets using subset of training data

In [102]:
tmpX = X[:20, :3]

In [103]:
tmpX.shape

(20, 3)

In [104]:
tmpY = y[:20]

In [105]:
tmpY.shape

(20,)

In [125]:
for train, valid, test in generate_k_fold_set((tmpX, tmpY), k=2):
    print("Train: ", train)
    print("Valid: ", valid)
    print("Test: ", test)
    print("")
    break

Train:  (array([], shape=(0, 3), dtype=uint8), array([], dtype=uint8))
Valid:  (array([[29, 31, 32],
       [28, 29, 29],
       [28, 27, 27],
       [29, 27, 28],
       [25, 24, 25],
       [27, 27, 29],
       [26, 25, 25],
       [26, 27, 28],
       [31, 33, 31],
       [29, 27, 28]], dtype=uint8), array([41, 41, 41, 41, 41, 41, 41, 41, 41, 41], dtype=uint8))
Test:  (array([[31, 32, 31],
       [32, 30, 30],
       [30, 31, 30],
       [31, 31, 30],
       [29, 28, 27],
       [33, 32, 32],
       [29, 29, 30],
       [32, 33, 32],
       [27, 27, 27],
       [33, 31, 30]], dtype=uint8), array([41, 41, 41, 41, 41, 41, 41, 41, 41, 41], dtype=uint8))



In [126]:
for train, valid, test in generate_k_fold_set((tmpX, tmpY), k=4):
    print("Train: ", train)
    print("Valid: ", valid)
    print("Test: ", test)
    print("")
    break

Train:  (array([[30, 31, 30],
       [28, 29, 29],
       [27, 27, 27],
       [29, 29, 30],
       [26, 25, 25],
       [32, 30, 30],
       [31, 32, 31],
       [28, 27, 27],
       [33, 31, 30],
       [32, 33, 32]], dtype=uint8), array([41, 41, 41, 41, 41, 41, 41, 41, 41, 41], dtype=uint8))
Valid:  (array([[29, 27, 28],
       [26, 27, 28],
       [29, 28, 27],
       [27, 27, 29],
       [29, 31, 32]], dtype=uint8), array([41, 41, 41, 41, 41], dtype=uint8))
Test:  (array([[31, 33, 31],
       [25, 24, 25],
       [33, 32, 32],
       [31, 31, 30],
       [29, 27, 28]], dtype=uint8), array([41, 41, 41, 41, 41], dtype=uint8))



In [122]:
for train, valid, test in generate_k_fold_set((tmpX, tmpY), k=6):
    print("Train: ", train)
    print("Valid: ", valid)
    print("Test: ", test)
    print("")
    break

Train:  (array([[28, 29, 29],
       [30, 31, 30],
       [27, 27, 27],
       [33, 32, 32],
       [29, 29, 30],
       [29, 27, 28],
       [28, 27, 27],
       [31, 31, 30],
       [26, 25, 25],
       [31, 33, 31],
       [25, 24, 25],
       [32, 33, 32],
       [29, 27, 28],
       [32, 30, 30]], dtype=uint8), array([41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41],
      dtype=uint8))
Valid:  (array([[29, 31, 32],
       [29, 28, 27],
       [33, 31, 30]], dtype=uint8), array([41, 41, 41], dtype=uint8))
Test:  (array([[27, 27, 29],
       [26, 27, 28],
       [31, 32, 31]], dtype=uint8), array([41, 41, 41], dtype=uint8))

