In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import numpy as np
from sklearn.model_selection import TimeSeriesSplit

In [3]:
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])
y = np.array([1, 2, 3, 4, 5, 6])

tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0] TEST: [1]
TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
TRAIN: [0 1 2 3 4] TEST: [5]


In [4]:
tscv = TimeSeriesSplit(n_splits=4)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
TRAIN: [0 1 2 3 4] TEST: [5]


In [5]:
tscv = TimeSeriesSplit(n_splits=3)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
TRAIN: [0 1 2 3 4] TEST: [5]


In [6]:
tscv = TimeSeriesSplit(n_splits=2)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0 1] TEST: [2 3]
TRAIN: [0 1 2 3] TEST: [4 5]


### Tutorial

In [7]:
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])
y = np.array([1, 2, 3, 4, 5, 6])

X.shape
X
y.shape
y

(6, 2)

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10],
       [11, 12]])

(6,)

array([1, 2, 3, 4, 5, 6])

### `n_splits=2`

In [8]:
tscv = TimeSeriesSplit(n_splits=2)
print(tscv)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    print("TRAIN ............")
    X_train, X_test = X[train_index], X[test_index]
    X_train.shape, X_test.shape
    X_train, X_test
    
    print("TEST ............")
    y_train, y_test = y[train_index], y[test_index]
    y_train.shape, y_test.shape
    y_train, y_test

TimeSeriesSplit(max_train_size=None, n_splits=2)
TRAIN: [0 1] TEST: [2 3]
TRAIN ............


((2, 2), (2, 2))

(array([[1, 2],
        [3, 4]]),
 array([[5, 6],
        [7, 8]]))

TEST ............


((2,), (2,))

(array([1, 2]), array([3, 4]))

TRAIN: [0 1 2 3] TEST: [4 5]
TRAIN ............


((4, 2), (2, 2))

(array([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]]),
 array([[ 9, 10],
        [11, 12]]))

TEST ............


((4,), (2,))

(array([1, 2, 3, 4]), array([5, 6]))

### `n_splits=5`

In [9]:
tscv = TimeSeriesSplit(n_splits=5)
print(tscv)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    print("TRAIN ............")
    X_train, X_test = X[train_index], X[test_index]
    #print('X_train.shape, X_test.shape: ', X_train.shape, X_test.shape)
    X_train, X_test
    
    print("TEST ............")
    y_train, y_test = y[train_index], y[test_index]
   #y_train.shape, y_test.shape
    y_train, y_test

TimeSeriesSplit(max_train_size=None, n_splits=5)
TRAIN: [0] TEST: [1]
TRAIN ............


(array([[1, 2]]), array([[3, 4]]))

TEST ............


(array([1]), array([2]))

TRAIN: [0 1] TEST: [2]
TRAIN ............


(array([[1, 2],
        [3, 4]]),
 array([[5, 6]]))

TEST ............


(array([1, 2]), array([3]))

TRAIN: [0 1 2] TEST: [3]
TRAIN ............


(array([[1, 2],
        [3, 4],
        [5, 6]]),
 array([[7, 8]]))

TEST ............


(array([1, 2, 3]), array([4]))

TRAIN: [0 1 2 3] TEST: [4]
TRAIN ............


(array([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]]),
 array([[ 9, 10]]))

TEST ............


(array([1, 2, 3, 4]), array([5]))

TRAIN: [0 1 2 3 4] TEST: [5]
TRAIN ............


(array([[ 1,  2],
        [ 3,  4],
        [ 5,  6],
        [ 7,  8],
        [ 9, 10]]),
 array([[11, 12]]))

TEST ............


(array([1, 2, 3, 4, 5]), array([6]))

### `n_splites=3`

In [10]:
tscv = TimeSeriesSplit(n_splits=3)
print(tscv)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    print("TRAIN ............")
    X_train, X_test = X[train_index], X[test_index]
    #X_train.shape, X_test.shape
    X_train, X_test
    
    print("TEST ............")
    y_train, y_test = y[train_index], y[test_index]
    #y_train.shape, y_test.shape
    y_train, y_test

TimeSeriesSplit(max_train_size=None, n_splits=3)
TRAIN: [0 1 2] TEST: [3]
TRAIN ............


(array([[1, 2],
        [3, 4],
        [5, 6]]),
 array([[7, 8]]))

TEST ............


(array([1, 2, 3]), array([4]))

TRAIN: [0 1 2 3] TEST: [4]
TRAIN ............


(array([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]]),
 array([[ 9, 10]]))

TEST ............


(array([1, 2, 3, 4]), array([5]))

TRAIN: [0 1 2 3 4] TEST: [5]
TRAIN ............


(array([[ 1,  2],
        [ 3,  4],
        [ 5,  6],
        [ 7,  8],
        [ 9, 10]]),
 array([[11, 12]]))

TEST ............


(array([1, 2, 3, 4, 5]), array([6]))

### Get the last element of generator

In [11]:
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])
y = np.array([1, 2, 3, 4, 5, 6])

tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in list(tscv.split(X))[-1:]: # Get the last element of generator
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0 1 2 3 4] TEST: [5]


### Splitting 3D array

In [12]:
X_3D = np.array([[['S0-L0-D0', 'S0-L0-D1', 'S0-L0-D2', 'S0-L0-D3', 'S0-L0-D4'],
                  ['S0-L1-D0', 'S0-L1-D1', 'S0-L1-D2', 'S0-L1-D3', 'S0-L1-D4'],
                  ['S0-L2-D0', 'S0-L2-D1', 'S0-L2-D2', 'S0-L2-D3', 'S0-L2-D4']],
                 [['S1-L0-D0', 'S1-L0-D1', 'S1-L0-D2', 'S1-L0-D3', 'S1-L0-D4'],
                  ['S1-L1-D0', 'S1-L1-D1', 'S1-L1-D2', 'S1-L1-D3', 'S1-L1-D4'],
                  ['S1-L2-D0', 'S1-L2-D1', 'S1-L2-D2', 'S1-L2-D3', 'S1-L2-D4']]])

# Get the shape of 3D array
print('********** d_sku,d_loc,d_day **********')
d_sku,d_loc,d_day = X_3D.shape; d_sku,d_loc,d_day 

# for each SKU
for s in range(d_sku):
    # extract the LOC x DAY 
    print('***** X_2D : LOC x DAY ******')
    X_2D = X_3D[s,:,:]; X_2D.shape; X_2D
    
    print('******* X_2D.T :  DAY x LOC *******')
    X_2D.T
    
    # Create a TimeSeriesSplit
    n_splits = d_day-1; n_splits
    tscv = TimeSeriesSplit(n_splits=n_splits)

    # for each split
    for train_index, test_index in tscv.split(X_2D.T): # transpose to DAY x LOC
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X_2D.T[train_index].T, X_2D.T[test_index].T # do the split, and transpose back to LOC x DAY
        #X_train.shape, X_test.shape
        print('X_train: \n', X_train)
        print('X_test: \n', X_test)

********** d_sku,d_loc,d_day **********


(2, 3, 5)

***** X_2D : LOC x DAY ******


(3, 5)

array([['S0-L0-D0', 'S0-L0-D1', 'S0-L0-D2', 'S0-L0-D3', 'S0-L0-D4'],
       ['S0-L1-D0', 'S0-L1-D1', 'S0-L1-D2', 'S0-L1-D3', 'S0-L1-D4'],
       ['S0-L2-D0', 'S0-L2-D1', 'S0-L2-D2', 'S0-L2-D3', 'S0-L2-D4']],
      dtype='<U8')

******* X_2D.T :  DAY x LOC *******


array([['S0-L0-D0', 'S0-L1-D0', 'S0-L2-D0'],
       ['S0-L0-D1', 'S0-L1-D1', 'S0-L2-D1'],
       ['S0-L0-D2', 'S0-L1-D2', 'S0-L2-D2'],
       ['S0-L0-D3', 'S0-L1-D3', 'S0-L2-D3'],
       ['S0-L0-D4', 'S0-L1-D4', 'S0-L2-D4']], dtype='<U8')

4

TRAIN: [0] TEST: [1]
X_train: 
 [['S0-L0-D0']
 ['S0-L1-D0']
 ['S0-L2-D0']]
X_test: 
 [['S0-L0-D1']
 ['S0-L1-D1']
 ['S0-L2-D1']]
TRAIN: [0 1] TEST: [2]
X_train: 
 [['S0-L0-D0' 'S0-L0-D1']
 ['S0-L1-D0' 'S0-L1-D1']
 ['S0-L2-D0' 'S0-L2-D1']]
X_test: 
 [['S0-L0-D2']
 ['S0-L1-D2']
 ['S0-L2-D2']]
TRAIN: [0 1 2] TEST: [3]
X_train: 
 [['S0-L0-D0' 'S0-L0-D1' 'S0-L0-D2']
 ['S0-L1-D0' 'S0-L1-D1' 'S0-L1-D2']
 ['S0-L2-D0' 'S0-L2-D1' 'S0-L2-D2']]
X_test: 
 [['S0-L0-D3']
 ['S0-L1-D3']
 ['S0-L2-D3']]
TRAIN: [0 1 2 3] TEST: [4]
X_train: 
 [['S0-L0-D0' 'S0-L0-D1' 'S0-L0-D2' 'S0-L0-D3']
 ['S0-L1-D0' 'S0-L1-D1' 'S0-L1-D2' 'S0-L1-D3']
 ['S0-L2-D0' 'S0-L2-D1' 'S0-L2-D2' 'S0-L2-D3']]
X_test: 
 [['S0-L0-D4']
 ['S0-L1-D4']
 ['S0-L2-D4']]
***** X_2D : LOC x DAY ******


(3, 5)

array([['S1-L0-D0', 'S1-L0-D1', 'S1-L0-D2', 'S1-L0-D3', 'S1-L0-D4'],
       ['S1-L1-D0', 'S1-L1-D1', 'S1-L1-D2', 'S1-L1-D3', 'S1-L1-D4'],
       ['S1-L2-D0', 'S1-L2-D1', 'S1-L2-D2', 'S1-L2-D3', 'S1-L2-D4']],
      dtype='<U8')

******* X_2D.T :  DAY x LOC *******


array([['S1-L0-D0', 'S1-L1-D0', 'S1-L2-D0'],
       ['S1-L0-D1', 'S1-L1-D1', 'S1-L2-D1'],
       ['S1-L0-D2', 'S1-L1-D2', 'S1-L2-D2'],
       ['S1-L0-D3', 'S1-L1-D3', 'S1-L2-D3'],
       ['S1-L0-D4', 'S1-L1-D4', 'S1-L2-D4']], dtype='<U8')

4

TRAIN: [0] TEST: [1]
X_train: 
 [['S1-L0-D0']
 ['S1-L1-D0']
 ['S1-L2-D0']]
X_test: 
 [['S1-L0-D1']
 ['S1-L1-D1']
 ['S1-L2-D1']]
TRAIN: [0 1] TEST: [2]
X_train: 
 [['S1-L0-D0' 'S1-L0-D1']
 ['S1-L1-D0' 'S1-L1-D1']
 ['S1-L2-D0' 'S1-L2-D1']]
X_test: 
 [['S1-L0-D2']
 ['S1-L1-D2']
 ['S1-L2-D2']]
TRAIN: [0 1 2] TEST: [3]
X_train: 
 [['S1-L0-D0' 'S1-L0-D1' 'S1-L0-D2']
 ['S1-L1-D0' 'S1-L1-D1' 'S1-L1-D2']
 ['S1-L2-D0' 'S1-L2-D1' 'S1-L2-D2']]
X_test: 
 [['S1-L0-D3']
 ['S1-L1-D3']
 ['S1-L2-D3']]
TRAIN: [0 1 2 3] TEST: [4]
X_train: 
 [['S1-L0-D0' 'S1-L0-D1' 'S1-L0-D2' 'S1-L0-D3']
 ['S1-L1-D0' 'S1-L1-D1' 'S1-L1-D2' 'S1-L1-D3']
 ['S1-L2-D0' 'S1-L2-D1' 'S1-L2-D2' 'S1-L2-D3']]
X_test: 
 [['S1-L0-D4']
 ['S1-L1-D4']
 ['S1-L2-D4']]


### Make into a function
- input: 2D or 3D array `X`, number of splits `n_splits`
- return `dict` of `dict`

In [13]:
def split_it(d, train_test_splits, X_2D, tscv):
    
    # key for first level dict
    key_sku = str(d) # first dimention (e.g. sku)

    # first level dict
    train_test_splits[key_sku] = {}

    # for each split
    for i, [train_index, test_index] in enumerate(tscv.split(X_2D.T)): # transpose from STEP x LOC
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X_2D.T[train_index].T, X_2D.T[test_index].T # do the split, and transpose back to STEP x DAY
        #X_train.shape, X_test.shape
        #print('X_train: \n', X_train)
        #print('X_test: \n', X_test)

        # key for 2nd level dictionary
        key_split_n = str(i)  # split_n

        # add to dict of dicts
        train_test_splits[key_sku][key_split_n] = {'X_train': X_train, 'X_test': X_test}

    return (train_test_splits)

def time_series_split(X, n_splits):
    '''
    input: ndarray X of [LOC x SKU x STEP] or [SKU x STEP]
    output: a dictionary consist of n_splits
    '''
    # Create a dict to hold the splits
    train_test_splits = {} # dict of dicts 

    # Create a TimeSeriesSplit
    tscv = TimeSeriesSplit(n_splits=n_splits)

    ### If X is [LOC X SKU X STEP]
    if len(X.shape) == 3:
        # Get the shape of 3D array
        d1,d2,d3 = X.shape

        # for each d1
        for d in range(d1):
            # extract the LOC x STEP
            X_2D = X[d,:,:]; X_2D
        
            train_test_splits = split_it(d=d, train_test_splits=train_test_splits, X_2D=X_2D, tscv=tscv)
   
    ### if X is [SKU X STEP]
    elif len(X.shape) == 2:
        d1, d2 = X.shape 

        #for each sku
        for d in range(d1):
            X_2D = X[d, :]; X_2D
            print(d)
            train_test_splits = split_it(d=d, train_test_splits=train_test_splits, X_2D=X_2D, tscv=tscv)

    return(train_test_splits)


#### use case 1: `[SKU, LOC, DAY]`

In [14]:
X_3D = np.array([[['S0-L0-D0', 'S0-L0-D1', 'S0-L0-D2', 'S0-L0-D3', 'S0-L0-D4'],
                  ['S0-L1-D0', 'S0-L1-D1', 'S0-L1-D2', 'S0-L1-D3', 'S0-L1-D4'],
                  ['S0-L2-D0', 'S0-L2-D1', 'S0-L2-D2', 'S0-L2-D3', 'S0-L2-D4']],
                 [['S1-L0-D0', 'S1-L0-D1', 'S1-L0-D2', 'S1-L0-D3', 'S1-L0-D4'],
                  ['S1-L1-D0', 'S1-L1-D1', 'S1-L1-D2', 'S1-L1-D3', 'S1-L1-D4'],
                  ['S1-L2-D0', 'S1-L2-D1', 'S1-L2-D2', 'S1-L2-D3', 'S1-L2-D4']]])

X_3D.shape; X_3D

n_splits = X_3D.shape[-1]-1; n_splits

train_test_splits = time_series_split(X_3D, n_splits=n_splits)

(2, 3, 5)

array([[['S0-L0-D0', 'S0-L0-D1', 'S0-L0-D2', 'S0-L0-D3', 'S0-L0-D4'],
        ['S0-L1-D0', 'S0-L1-D1', 'S0-L1-D2', 'S0-L1-D3', 'S0-L1-D4'],
        ['S0-L2-D0', 'S0-L2-D1', 'S0-L2-D2', 'S0-L2-D3', 'S0-L2-D4']],

       [['S1-L0-D0', 'S1-L0-D1', 'S1-L0-D2', 'S1-L0-D3', 'S1-L0-D4'],
        ['S1-L1-D0', 'S1-L1-D1', 'S1-L1-D2', 'S1-L1-D3', 'S1-L1-D4'],
        ['S1-L2-D0', 'S1-L2-D1', 'S1-L2-D2', 'S1-L2-D3', 'S1-L2-D4']]],
      dtype='<U8')

4

TRAIN: [0] TEST: [1]
TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
TRAIN: [0] TEST: [1]
TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]


In [15]:
#train_test_splits_dict_of_dicts

In [16]:
for sku in range(X_3D.shape[0]):
    for split in range(n_splits):
        sku, split
        train_test_splits[str(sku)][str(split)]['X_train']  # sku, split, X_train
        train_test_splits[str(sku)][str(split)]['X_test']  # sku, split, X_test

(0, 0)

array([['S0-L0-D0'],
       ['S0-L1-D0'],
       ['S0-L2-D0']], dtype='<U8')

array([['S0-L0-D1'],
       ['S0-L1-D1'],
       ['S0-L2-D1']], dtype='<U8')

(0, 1)

array([['S0-L0-D0', 'S0-L0-D1'],
       ['S0-L1-D0', 'S0-L1-D1'],
       ['S0-L2-D0', 'S0-L2-D1']], dtype='<U8')

array([['S0-L0-D2'],
       ['S0-L1-D2'],
       ['S0-L2-D2']], dtype='<U8')

(0, 2)

array([['S0-L0-D0', 'S0-L0-D1', 'S0-L0-D2'],
       ['S0-L1-D0', 'S0-L1-D1', 'S0-L1-D2'],
       ['S0-L2-D0', 'S0-L2-D1', 'S0-L2-D2']], dtype='<U8')

array([['S0-L0-D3'],
       ['S0-L1-D3'],
       ['S0-L2-D3']], dtype='<U8')

(0, 3)

array([['S0-L0-D0', 'S0-L0-D1', 'S0-L0-D2', 'S0-L0-D3'],
       ['S0-L1-D0', 'S0-L1-D1', 'S0-L1-D2', 'S0-L1-D3'],
       ['S0-L2-D0', 'S0-L2-D1', 'S0-L2-D2', 'S0-L2-D3']], dtype='<U8')

array([['S0-L0-D4'],
       ['S0-L1-D4'],
       ['S0-L2-D4']], dtype='<U8')

(1, 0)

array([['S1-L0-D0'],
       ['S1-L1-D0'],
       ['S1-L2-D0']], dtype='<U8')

array([['S1-L0-D1'],
       ['S1-L1-D1'],
       ['S1-L2-D1']], dtype='<U8')

(1, 1)

array([['S1-L0-D0', 'S1-L0-D1'],
       ['S1-L1-D0', 'S1-L1-D1'],
       ['S1-L2-D0', 'S1-L2-D1']], dtype='<U8')

array([['S1-L0-D2'],
       ['S1-L1-D2'],
       ['S1-L2-D2']], dtype='<U8')

(1, 2)

array([['S1-L0-D0', 'S1-L0-D1', 'S1-L0-D2'],
       ['S1-L1-D0', 'S1-L1-D1', 'S1-L1-D2'],
       ['S1-L2-D0', 'S1-L2-D1', 'S1-L2-D2']], dtype='<U8')

array([['S1-L0-D3'],
       ['S1-L1-D3'],
       ['S1-L2-D3']], dtype='<U8')

(1, 3)

array([['S1-L0-D0', 'S1-L0-D1', 'S1-L0-D2', 'S1-L0-D3'],
       ['S1-L1-D0', 'S1-L1-D1', 'S1-L1-D2', 'S1-L1-D3'],
       ['S1-L2-D0', 'S1-L2-D1', 'S1-L2-D2', 'S1-L2-D3']], dtype='<U8')

array([['S1-L0-D4'],
       ['S1-L1-D4'],
       ['S1-L2-D4']], dtype='<U8')

In [17]:
train_test_splits['1']['2']['X_train']  # sku, split, X_train
train_test_splits['1']['2']['X_test']  # sku, split, X_test

array([['S1-L0-D0', 'S1-L0-D1', 'S1-L0-D2'],
       ['S1-L1-D0', 'S1-L1-D1', 'S1-L1-D2'],
       ['S1-L2-D0', 'S1-L2-D1', 'S1-L2-D2']], dtype='<U8')

array([['S1-L0-D3'],
       ['S1-L1-D3'],
       ['S1-L2-D3']], dtype='<U8')

#### use case 2: `[LOC, SKU, DAY]`

In [26]:
X_3D = np.array([[['L0-S0-D0', 'L0-S0-D1', 'L0-S0-D2', 'L0-S0-D3', 'L0-S0-D4'],
                  ['L0-S1-D0', 'L0-S1-D1', 'L0-S1-D2', 'L0-S1-D3', 'L0-S1-D4']],
                 [['L1-S0-D0', 'L1-S0-D1', 'L1-S0-D2', 'L1-S0-D3', 'L1-S0-D4'],
                  ['L1-S1-D0', 'L1-S1-D1', 'L1-S1-D2', 'L1-S1-D3', 'L1-S1-D4']],
                 [['L2-S0-D0', 'L2-S0-D1', 'L2-S0-D2', 'L2-S0-D3', 'L2-S0-D4'],
                  ['L2-S1-D0', 'L2-S1-D1', 'L2-S1-D2', 'L2-S1-D3', 'L2-S1-D4']]])

X_3D.shape; X_3D

# transpose from [LOC x SKU x DAY] to [SKU x LOC x DAY]
X_3DT = np.transpose(X_3D, (1,0,2)) ; X_3DT.shape; X_3DT

n_splits = X_3DT.shape[-1]-1; n_splits

train_test_splits = time_series_split(X_3DT, n_splits=n_splits)

(3, 2, 5)

array([[['L0-S0-D0', 'L0-S0-D1', 'L0-S0-D2', 'L0-S0-D3', 'L0-S0-D4'],
        ['L0-S1-D0', 'L0-S1-D1', 'L0-S1-D2', 'L0-S1-D3', 'L0-S1-D4']],

       [['L1-S0-D0', 'L1-S0-D1', 'L1-S0-D2', 'L1-S0-D3', 'L1-S0-D4'],
        ['L1-S1-D0', 'L1-S1-D1', 'L1-S1-D2', 'L1-S1-D3', 'L1-S1-D4']],

       [['L2-S0-D0', 'L2-S0-D1', 'L2-S0-D2', 'L2-S0-D3', 'L2-S0-D4'],
        ['L2-S1-D0', 'L2-S1-D1', 'L2-S1-D2', 'L2-S1-D3', 'L2-S1-D4']]],
      dtype='<U8')

(2, 3, 5)

array([[['L0-S0-D0', 'L0-S0-D1', 'L0-S0-D2', 'L0-S0-D3', 'L0-S0-D4'],
        ['L1-S0-D0', 'L1-S0-D1', 'L1-S0-D2', 'L1-S0-D3', 'L1-S0-D4'],
        ['L2-S0-D0', 'L2-S0-D1', 'L2-S0-D2', 'L2-S0-D3', 'L2-S0-D4']],

       [['L0-S1-D0', 'L0-S1-D1', 'L0-S1-D2', 'L0-S1-D3', 'L0-S1-D4'],
        ['L1-S1-D0', 'L1-S1-D1', 'L1-S1-D2', 'L1-S1-D3', 'L1-S1-D4'],
        ['L2-S1-D0', 'L2-S1-D1', 'L2-S1-D2', 'L2-S1-D3', 'L2-S1-D4']]],
      dtype='<U8')

4

TRAIN: [0] TEST: [1]
TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
TRAIN: [0] TEST: [1]
TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]


In [28]:
train_test_splits

{'0': {'0': {'X_train': array([['L0-S0-D0'],
          ['L1-S0-D0'],
          ['L2-S0-D0']], dtype='<U8'),
   'X_test': array([['L0-S0-D1'],
          ['L1-S0-D1'],
          ['L2-S0-D1']], dtype='<U8')},
  '1': {'X_train': array([['L0-S0-D0', 'L0-S0-D1'],
          ['L1-S0-D0', 'L1-S0-D1'],
          ['L2-S0-D0', 'L2-S0-D1']], dtype='<U8'),
   'X_test': array([['L0-S0-D2'],
          ['L1-S0-D2'],
          ['L2-S0-D2']], dtype='<U8')},
  '2': {'X_train': array([['L0-S0-D0', 'L0-S0-D1', 'L0-S0-D2'],
          ['L1-S0-D0', 'L1-S0-D1', 'L1-S0-D2'],
          ['L2-S0-D0', 'L2-S0-D1', 'L2-S0-D2']], dtype='<U8'),
   'X_test': array([['L0-S0-D3'],
          ['L1-S0-D3'],
          ['L2-S0-D3']], dtype='<U8')},
  '3': {'X_train': array([['L0-S0-D0', 'L0-S0-D1', 'L0-S0-D2', 'L0-S0-D3'],
          ['L1-S0-D0', 'L1-S0-D1', 'L1-S0-D2', 'L1-S0-D3'],
          ['L2-S0-D0', 'L2-S0-D1', 'L2-S0-D2', 'L2-S0-D3']], dtype='<U8'),
   'X_test': array([['L0-S0-D4'],
          ['L1-S0-D4'],
          ['L2-

In [31]:
for sku in range(X_3D.shape[1]):
    for split in range(n_splits):
        sku, split
        print('X_train shape: ', train_test_splits[str(sku)][str(split)]['X_train'].shape) # [LOC x DAY]
        train_test_splits[str(sku)][str(split)]['X_train']  # sku, split, X_train
        print('X_test shape: ', train_test_splits[str(sku)][str(split)]['X_test'].shape) # [LOC x DAY]
        train_test_splits[str(sku)][str(split)]['X_test']  # sku, split, X_test

(0, 0)

X_train shape:  (3, 1)


array([['L0-S0-D0'],
       ['L1-S0-D0'],
       ['L2-S0-D0']], dtype='<U8')

X_test shape:  (3, 1)


array([['L0-S0-D1'],
       ['L1-S0-D1'],
       ['L2-S0-D1']], dtype='<U8')

(0, 1)

X_train shape:  (3, 2)


array([['L0-S0-D0', 'L0-S0-D1'],
       ['L1-S0-D0', 'L1-S0-D1'],
       ['L2-S0-D0', 'L2-S0-D1']], dtype='<U8')

X_test shape:  (3, 1)


array([['L0-S0-D2'],
       ['L1-S0-D2'],
       ['L2-S0-D2']], dtype='<U8')

(0, 2)

X_train shape:  (3, 3)


array([['L0-S0-D0', 'L0-S0-D1', 'L0-S0-D2'],
       ['L1-S0-D0', 'L1-S0-D1', 'L1-S0-D2'],
       ['L2-S0-D0', 'L2-S0-D1', 'L2-S0-D2']], dtype='<U8')

X_test shape:  (3, 1)


array([['L0-S0-D3'],
       ['L1-S0-D3'],
       ['L2-S0-D3']], dtype='<U8')

(0, 3)

X_train shape:  (3, 4)


array([['L0-S0-D0', 'L0-S0-D1', 'L0-S0-D2', 'L0-S0-D3'],
       ['L1-S0-D0', 'L1-S0-D1', 'L1-S0-D2', 'L1-S0-D3'],
       ['L2-S0-D0', 'L2-S0-D1', 'L2-S0-D2', 'L2-S0-D3']], dtype='<U8')

X_test shape:  (3, 1)


array([['L0-S0-D4'],
       ['L1-S0-D4'],
       ['L2-S0-D4']], dtype='<U8')

(1, 0)

X_train shape:  (3, 1)


array([['L0-S1-D0'],
       ['L1-S1-D0'],
       ['L2-S1-D0']], dtype='<U8')

X_test shape:  (3, 1)


array([['L0-S1-D1'],
       ['L1-S1-D1'],
       ['L2-S1-D1']], dtype='<U8')

(1, 1)

X_train shape:  (3, 2)


array([['L0-S1-D0', 'L0-S1-D1'],
       ['L1-S1-D0', 'L1-S1-D1'],
       ['L2-S1-D0', 'L2-S1-D1']], dtype='<U8')

X_test shape:  (3, 1)


array([['L0-S1-D2'],
       ['L1-S1-D2'],
       ['L2-S1-D2']], dtype='<U8')

(1, 2)

X_train shape:  (3, 3)


array([['L0-S1-D0', 'L0-S1-D1', 'L0-S1-D2'],
       ['L1-S1-D0', 'L1-S1-D1', 'L1-S1-D2'],
       ['L2-S1-D0', 'L2-S1-D1', 'L2-S1-D2']], dtype='<U8')

X_test shape:  (3, 1)


array([['L0-S1-D3'],
       ['L1-S1-D3'],
       ['L2-S1-D3']], dtype='<U8')

(1, 3)

X_train shape:  (3, 4)


array([['L0-S1-D0', 'L0-S1-D1', 'L0-S1-D2', 'L0-S1-D3'],
       ['L1-S1-D0', 'L1-S1-D1', 'L1-S1-D2', 'L1-S1-D3'],
       ['L2-S1-D0', 'L2-S1-D1', 'L2-S1-D2', 'L2-S1-D3']], dtype='<U8')

X_test shape:  (3, 1)


array([['L0-S1-D4'],
       ['L1-S1-D4'],
       ['L2-S1-D4']], dtype='<U8')

In [21]:
train_test_splits['0']['3']['X_train']  # sku, split, X_train
train_test_splits['0']['3']['X_test']  # sku, split, X_test

array([['L0-S0-D0', 'L0-S0-D1', 'L0-S0-D2', 'L0-S0-D3'],
       ['L1-S0-D0', 'L1-S0-D1', 'L1-S0-D2', 'L1-S0-D3'],
       ['L2-S0-D0', 'L2-S0-D1', 'L2-S0-D2', 'L2-S0-D3']], dtype='<U8')

array([['L0-S0-D4'],
       ['L1-S0-D4'],
       ['L2-S0-D4']], dtype='<U8')

#### use case 3: `[SKU, DAY]`

In [22]:
X_2D = np.array([['S0-D0', 'S0-D1', 'S0-D2', 'S0-D3', 'S0-D4'],
                 ['S1-D0', 'S1-D1', 'S1-D2', 'S1-D3', 'S1-D4'],
                 ['S2-D0', 'S2-D1', 'S2-D2', 'S2-D3', 'S2-D4']])
X_2D.shape
X_2D

(3, 5)

array([['S0-D0', 'S0-D1', 'S0-D2', 'S0-D3', 'S0-D4'],
       ['S1-D0', 'S1-D1', 'S1-D2', 'S1-D3', 'S1-D4'],
       ['S2-D0', 'S2-D1', 'S2-D2', 'S2-D3', 'S2-D4']], dtype='<U5')

In [23]:
n_splits = 2

train_test_splits = time_series_split(X_2D, n_splits=n_splits)

0
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
1
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
2
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]


In [24]:
#train_test_splits_dict_of_dicts

In [25]:
for sku in range(X_2D.shape[0]):
    for split in range(n_splits):
        sku, split
        train_test_splits[str(sku)][str(split)]['X_train'].shape
        train_test_splits[str(sku)][str(split)]['X_train']# sku, split, X_train
        train_test_splits[str(sku)][str(split)]['X_test'].shape
        train_test_splits[str(sku)][str(split)]['X_test']  # sku, split, X_test

(0, 0)

(3,)

array(['S0-D0', 'S0-D1', 'S0-D2'], dtype='<U5')

(1,)

array(['S0-D3'], dtype='<U5')

(0, 1)

(4,)

array(['S0-D0', 'S0-D1', 'S0-D2', 'S0-D3'], dtype='<U5')

(1,)

array(['S0-D4'], dtype='<U5')

(1, 0)

(3,)

array(['S1-D0', 'S1-D1', 'S1-D2'], dtype='<U5')

(1,)

array(['S1-D3'], dtype='<U5')

(1, 1)

(4,)

array(['S1-D0', 'S1-D1', 'S1-D2', 'S1-D3'], dtype='<U5')

(1,)

array(['S1-D4'], dtype='<U5')

(2, 0)

(3,)

array(['S2-D0', 'S2-D1', 'S2-D2'], dtype='<U5')

(1,)

array(['S2-D3'], dtype='<U5')

(2, 1)

(4,)

array(['S2-D0', 'S2-D1', 'S2-D2', 'S2-D3'], dtype='<U5')

(1,)

array(['S2-D4'], dtype='<U5')