In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import numpy as np
from sklearn.model_selection import TimeSeriesSplit

In [3]:
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])
y = np.array([1, 2, 3, 4, 5, 6])

tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0] TEST: [1]
TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
TRAIN: [0 1 2 3 4] TEST: [5]


In [4]:
tscv = TimeSeriesSplit(n_splits=4)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
TRAIN: [0 1 2 3 4] TEST: [5]


In [5]:
tscv = TimeSeriesSplit(n_splits=3)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
TRAIN: [0 1 2 3 4] TEST: [5]


In [6]:
tscv = TimeSeriesSplit(n_splits=2)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0 1] TEST: [2 3]
TRAIN: [0 1 2 3] TEST: [4 5]


### Tutorial

In [7]:
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])
y = np.array([1, 2, 3, 4, 5, 6])

X.shape
X
y.shape
y

(6, 2)

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10],
       [11, 12]])

(6,)

array([1, 2, 3, 4, 5, 6])

### `n_splits=2`

In [8]:
tscv = TimeSeriesSplit(n_splits=2)
print(tscv)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    print("TRAIN ............")
    X_train, X_test = X[train_index], X[test_index]
    X_train.shape, X_test.shape
    X_train, X_test
    
    print("TEST ............")
    y_train, y_test = y[train_index], y[test_index]
    y_train.shape, y_test.shape
    y_train, y_test

TimeSeriesSplit(max_train_size=None, n_splits=2)
TRAIN: [0 1] TEST: [2 3]
TRAIN ............


((2, 2), (2, 2))

(array([[1, 2],
        [3, 4]]),
 array([[5, 6],
        [7, 8]]))

TEST ............


((2,), (2,))

(array([1, 2]), array([3, 4]))

TRAIN: [0 1 2 3] TEST: [4 5]
TRAIN ............


((4, 2), (2, 2))

(array([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]]),
 array([[ 9, 10],
        [11, 12]]))

TEST ............


((4,), (2,))

(array([1, 2, 3, 4]), array([5, 6]))

### `n_splits=5`

In [9]:
tscv = TimeSeriesSplit(n_splits=5)
print(tscv)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    print("TRAIN ............")
    X_train, X_test = X[train_index], X[test_index]
    #print('X_train.shape, X_test.shape: ', X_train.shape, X_test.shape)
    X_train, X_test
    
    print("TEST ............")
    y_train, y_test = y[train_index], y[test_index]
   #y_train.shape, y_test.shape
    y_train, y_test

TimeSeriesSplit(max_train_size=None, n_splits=5)
TRAIN: [0] TEST: [1]
TRAIN ............


(array([[1, 2]]), array([[3, 4]]))

TEST ............


(array([1]), array([2]))

TRAIN: [0 1] TEST: [2]
TRAIN ............


(array([[1, 2],
        [3, 4]]),
 array([[5, 6]]))

TEST ............


(array([1, 2]), array([3]))

TRAIN: [0 1 2] TEST: [3]
TRAIN ............


(array([[1, 2],
        [3, 4],
        [5, 6]]),
 array([[7, 8]]))

TEST ............


(array([1, 2, 3]), array([4]))

TRAIN: [0 1 2 3] TEST: [4]
TRAIN ............


(array([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]]),
 array([[ 9, 10]]))

TEST ............


(array([1, 2, 3, 4]), array([5]))

TRAIN: [0 1 2 3 4] TEST: [5]
TRAIN ............


(array([[ 1,  2],
        [ 3,  4],
        [ 5,  6],
        [ 7,  8],
        [ 9, 10]]),
 array([[11, 12]]))

TEST ............


(array([1, 2, 3, 4, 5]), array([6]))

### `n_splites=3`

In [10]:
tscv = TimeSeriesSplit(n_splits=3)
print(tscv)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    print("TRAIN ............")
    X_train, X_test = X[train_index], X[test_index]
    #X_train.shape, X_test.shape
    X_train, X_test
    
    print("TEST ............")
    y_train, y_test = y[train_index], y[test_index]
    #y_train.shape, y_test.shape
    y_train, y_test

TimeSeriesSplit(max_train_size=None, n_splits=3)
TRAIN: [0 1 2] TEST: [3]
TRAIN ............


(array([[1, 2],
        [3, 4],
        [5, 6]]),
 array([[7, 8]]))

TEST ............


(array([1, 2, 3]), array([4]))

TRAIN: [0 1 2 3] TEST: [4]
TRAIN ............


(array([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]]),
 array([[ 9, 10]]))

TEST ............


(array([1, 2, 3, 4]), array([5]))

TRAIN: [0 1 2 3 4] TEST: [5]
TRAIN ............


(array([[ 1,  2],
        [ 3,  4],
        [ 5,  6],
        [ 7,  8],
        [ 9, 10]]),
 array([[11, 12]]))

TEST ............


(array([1, 2, 3, 4, 5]), array([6]))

### Get the last element of generator

In [11]:
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])
y = np.array([1, 2, 3, 4, 5, 6])

tscv = TimeSeriesSplit(n_splits=5)

for train_index, test_index in list(tscv.split(X))[-1:]: # Get the last element of generator
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0 1 2 3 4] TEST: [5]


### Splitting 3D array

In [57]:
# 3D array 
SKU=2
LOC=3
DAY=5

np.random.seed(8)
X_3D = np.random.randint(10, size=(LOC, SKU, DAY)); X_3D.shape; X_3D

# Get the shape of 3D array
d_loc,d_sku,d_day = X_3D.shape

# transpose from [LOC x SKU x DAY] to [SKU x LOC x DAY]
print('********** [SKU X LOC X DAY] **************')
X_3DT = np.transpose(X_3D, (1,0,2)) ; X_3DT.shape; X_3DT
    
# for each SKU
for s in range(d_sku):
    # extract the LOC x DAY, transpose into [DAY x LOC]
    print('***** [DAY x LOC] *******')
    X_2D = X_3DT[s,:,:].T; X_2D.shape; X_2D # [DAY x LOC]
    
    # Create a TimeSeriesSplit
    n_splits = d_day-1; n_splits
    tscv = TimeSeriesSplit(n_splits=n_splits)

    # for each split
    for train_index, test_index in tscv.split(X_2D): # [DAY x LOC]
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X_2D[train_index].T, X_2D[test_index].T # do the split, and transpose back to LOC x DAY
        X_train.shape, X_test.shape
        print('X_train: \n', X_train)
        print('X_test: \n', X_test)

(3, 2, 5)

array([[[3, 4, 1, 9, 5],
        [8, 3, 8, 0, 5]],

       [[1, 3, 9, 2, 2],
        [6, 8, 9, 3, 4]],

       [[5, 5, 7, 9, 2],
        [6, 9, 5, 1, 6]]])

********** [SKU X LOC X DAY] **************


(2, 3, 5)

array([[[3, 4, 1, 9, 5],
        [1, 3, 9, 2, 2],
        [5, 5, 7, 9, 2]],

       [[8, 3, 8, 0, 5],
        [6, 8, 9, 3, 4],
        [6, 9, 5, 1, 6]]])

***** [DAY x LOC] *******


(5, 3)

array([[3, 1, 5],
       [4, 3, 5],
       [1, 9, 7],
       [9, 2, 9],
       [5, 2, 2]])

4

TRAIN: [0] TEST: [1]


((3, 1), (3, 1))

X_train: 
 [[3]
 [1]
 [5]]
X_test: 
 [[4]
 [3]
 [5]]
TRAIN: [0 1] TEST: [2]


((3, 2), (3, 1))

X_train: 
 [[3 4]
 [1 3]
 [5 5]]
X_test: 
 [[1]
 [9]
 [7]]
TRAIN: [0 1 2] TEST: [3]


((3, 3), (3, 1))

X_train: 
 [[3 4 1]
 [1 3 9]
 [5 5 7]]
X_test: 
 [[9]
 [2]
 [9]]
TRAIN: [0 1 2 3] TEST: [4]


((3, 4), (3, 1))

X_train: 
 [[3 4 1 9]
 [1 3 9 2]
 [5 5 7 9]]
X_test: 
 [[5]
 [2]
 [2]]
***** [DAY x LOC] *******


(5, 3)

array([[8, 6, 6],
       [3, 8, 9],
       [8, 9, 5],
       [0, 3, 1],
       [5, 4, 6]])

4

TRAIN: [0] TEST: [1]


((3, 1), (3, 1))

X_train: 
 [[8]
 [6]
 [6]]
X_test: 
 [[3]
 [8]
 [9]]
TRAIN: [0 1] TEST: [2]


((3, 2), (3, 1))

X_train: 
 [[8 3]
 [6 8]
 [6 9]]
X_test: 
 [[8]
 [9]
 [5]]
TRAIN: [0 1 2] TEST: [3]


((3, 3), (3, 1))

X_train: 
 [[8 3 8]
 [6 8 9]
 [6 9 5]]
X_test: 
 [[0]
 [3]
 [1]]
TRAIN: [0 1 2 3] TEST: [4]


((3, 4), (3, 1))

X_train: 
 [[8 3 8 0]
 [6 8 9 3]
 [6 9 5 1]]
X_test: 
 [[5]
 [4]
 [6]]


In [57]:
# 3D array 
SKU=2
LOC=3
DAY=5

np.random.seed(8)
X_3D = np.random.randint(10, size=(LOC, SKU, DAY)); X_3D.shape; X_3D



# Get the shape of 3D array
d_loc,d_sku,d_day = X_3D.shape

# transpose from [LOC x SKU x DAY] to [SKU x LOC x DAY]
print('********** [SKU X LOC X DAY] **************')
X_3DT = np.transpose(X_3D, (1,0,2)) ; X_3DT.shape; X_3DT
    
# for each SKU
for s in range(d_sku):
    # extract the LOC x DAY, transpose into [DAY x LOC]
    print('***** [DAY x LOC] *******')
    X_2D = X_3DT[s,:,:].T; X_2D.shape; X_2D # [DAY x LOC]
    
    # Create a TimeSeriesSplit
    n_splits = d_day-1; n_splits
    tscv = TimeSeriesSplit(n_splits=n_splits)

    # for each split
    for train_index, test_index in tscv.split(X_2D): # [DAY x LOC]
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X_2D[train_index].T, X_2D[test_index].T # do the split, and transpose back to LOC x DAY
        X_train.shape, X_test.shape
        print('X_train: \n', X_train)
        print('X_test: \n', X_test)

(3, 2, 5)

array([[[3, 4, 1, 9, 5],
        [8, 3, 8, 0, 5]],

       [[1, 3, 9, 2, 2],
        [6, 8, 9, 3, 4]],

       [[5, 5, 7, 9, 2],
        [6, 9, 5, 1, 6]]])

********** [SKU X LOC X DAY] **************


(2, 3, 5)

array([[[3, 4, 1, 9, 5],
        [1, 3, 9, 2, 2],
        [5, 5, 7, 9, 2]],

       [[8, 3, 8, 0, 5],
        [6, 8, 9, 3, 4],
        [6, 9, 5, 1, 6]]])

***** [DAY x LOC] *******


(5, 3)

array([[3, 1, 5],
       [4, 3, 5],
       [1, 9, 7],
       [9, 2, 9],
       [5, 2, 2]])

4

TRAIN: [0] TEST: [1]


((3, 1), (3, 1))

X_train: 
 [[3]
 [1]
 [5]]
X_test: 
 [[4]
 [3]
 [5]]
TRAIN: [0 1] TEST: [2]


((3, 2), (3, 1))

X_train: 
 [[3 4]
 [1 3]
 [5 5]]
X_test: 
 [[1]
 [9]
 [7]]
TRAIN: [0 1 2] TEST: [3]


((3, 3), (3, 1))

X_train: 
 [[3 4 1]
 [1 3 9]
 [5 5 7]]
X_test: 
 [[9]
 [2]
 [9]]
TRAIN: [0 1 2 3] TEST: [4]


((3, 4), (3, 1))

X_train: 
 [[3 4 1 9]
 [1 3 9 2]
 [5 5 7 9]]
X_test: 
 [[5]
 [2]
 [2]]
***** [DAY x LOC] *******


(5, 3)

array([[8, 6, 6],
       [3, 8, 9],
       [8, 9, 5],
       [0, 3, 1],
       [5, 4, 6]])

4

TRAIN: [0] TEST: [1]


((3, 1), (3, 1))

X_train: 
 [[8]
 [6]
 [6]]
X_test: 
 [[3]
 [8]
 [9]]
TRAIN: [0 1] TEST: [2]


((3, 2), (3, 1))

X_train: 
 [[8 3]
 [6 8]
 [6 9]]
X_test: 
 [[8]
 [9]
 [5]]
TRAIN: [0 1 2] TEST: [3]


((3, 3), (3, 1))

X_train: 
 [[8 3 8]
 [6 8 9]
 [6 9 5]]
X_test: 
 [[0]
 [3]
 [1]]
TRAIN: [0 1 2 3] TEST: [4]


((3, 4), (3, 1))

X_train: 
 [[8 3 8 0]
 [6 8 9 3]
 [6 9 5 1]]
X_test: 
 [[5]
 [4]
 [6]]


In [59]:
X_3D = [[[3, 4, 1, 9, 5],
        [8, 3, 8, 0, 5]],

       [[1, 3, 9, 2, 2],
        [6, 8, 9, 3, 4]],

       [[5, 5, 7, 9, 2],
        [6, 9, 5, 1, 6]]]

X_3D

[[[3, 4, 1, 9, 5], [8, 3, 8, 0, 5]],
 [[1, 3, 9, 2, 2], [6, 8, 9, 3, 4]],
 [[5, 5, 7, 9, 2], [6, 9, 5, 1, 6]]]

### Make into a function
- input: 3D array `X_3D`, `n_splits`
- return `dict` of `dict`

In [270]:
def timeseriessplit_3d(X_3D, n_splits):
    
    train_test_splits = {} # dict of dicts 
    
    # Get the shape of 3D array
    d1,d2,d3 = X_3D.shape

    # for each d1
    for d in range(d1):
        # extract the LOC x DAY 
        X_2D = X_3D[d,:,:]; X_2D

        # key for first level dictionary
        key_1 = str(d) # first dimention (e.g. sku)
        train_test_splits[key_1] = {}
        
        # Create a TimeSeriesSplit
        tscv = TimeSeriesSplit(n_splits=n_splits)

        # for each split
        for i, [train_index, test_index] in enumerate(tscv.split(X_2D.T)): # transpose from DAY x LOC
            print("TRAIN:", train_index, "TEST:", test_index)
            X_train, X_test = X_2D.T[train_index].T, X_2D.T[test_index].T # do the split, and transpose back to LOC x DAY
            #X_train.shape, X_test.shape
            #print('X_train: \n', X_train)
            #print('X_test: \n', X_test)
            
            # key for 2nd level dictionary
            key_2 = str(i)  # split_n

            # add to dict of dicts
            train_test_splits[key_1][key_2] = {'X_train': X_train, 'X_test': X_test}
         
    return(train_test_splits)

In [277]:
# 3D array 
SKU=2
LOC=3
DAY=5

np.random.seed(8)
X_3D = np.random.randint(10, size=(SKU, LOC, DAY)); X_3D.shape; X_3D

n_splits = X_3D.shape[-1]-1; n_splits
#train_test_splits_list_of_dict = timeseriessplit_3d(X_3D, n_splits=X_3D.shape[-1])
train_test_splits_dict_of_dicts = timeseriessplit_3d(X_3D, n_splits=n_splits)

(2, 3, 5)

array([[[3, 4, 1, 9, 5],
        [8, 3, 8, 0, 5],
        [1, 3, 9, 2, 2]],

       [[6, 8, 9, 3, 4],
        [5, 5, 7, 9, 2],
        [6, 9, 5, 1, 6]]])

4

TRAIN: [0] TEST: [1]
TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]
TRAIN: [0] TEST: [1]
TRAIN: [0 1] TEST: [2]
TRAIN: [0 1 2] TEST: [3]
TRAIN: [0 1 2 3] TEST: [4]


In [278]:
train_test_splits_dict_of_dicts

{'0': {'0': {'X_train': array([[3],
          [8],
          [1]]),
   'X_test': array([[4],
          [3],
          [3]])},
  '1': {'X_train': array([[3, 4],
          [8, 3],
          [1, 3]]),
   'X_test': array([[1],
          [8],
          [9]])},
  '2': {'X_train': array([[3, 4, 1],
          [8, 3, 8],
          [1, 3, 9]]),
   'X_test': array([[9],
          [0],
          [2]])},
  '3': {'X_train': array([[3, 4, 1, 9],
          [8, 3, 8, 0],
          [1, 3, 9, 2]]),
   'X_test': array([[5],
          [5],
          [2]])}},
 '1': {'0': {'X_train': array([[6],
          [5],
          [6]]),
   'X_test': array([[8],
          [5],
          [9]])},
  '1': {'X_train': array([[6, 8],
          [5, 5],
          [6, 9]]),
   'X_test': array([[9],
          [7],
          [5]])},
  '2': {'X_train': array([[6, 8, 9],
          [5, 5, 7],
          [6, 9, 5]]),
   'X_test': array([[3],
          [9],
          [1]])},
  '3': {'X_train': array([[6, 8, 9, 3],
          [5, 5, 7, 9

In [279]:
train_test_splits_dict_of_dicts['0']['3']['X_train']  # sku, split, X_train
train_test_splits_dict_of_dicts['0']['3']['X_test']  # sku, split, X_test

array([[3, 4, 1, 9],
       [8, 3, 8, 0],
       [1, 3, 9, 2]])

array([[5],
       [5],
       [2]])

In [30]:
x = np.random.randint(10, size=(2,3,5)); x.shape; x

xT = np.transpose(x, (1,0,2)); xT.shape; xT

(2, 3, 5)

array([[[7, 2, 8, 6, 5],
        [2, 3, 3, 8, 7],
        [9, 7, 3, 8, 0]],

       [[7, 3, 0, 9, 0],
        [1, 2, 6, 7, 3],
        [8, 2, 9, 6, 5]]])

(3, 2, 5)

array([[[7, 2, 8, 6, 5],
        [7, 3, 0, 9, 0]],

       [[2, 3, 3, 8, 7],
        [1, 2, 6, 7, 3]],

       [[9, 7, 3, 8, 0],
        [8, 2, 9, 6, 5]]])