In [1]:
import warnings; warnings.filterwarnings('ignore')
import oxyba as ox

### Example 1
* one input argument `data`. Thus, only 3 output args
* 60/20/20 split with `test_size=0.2`
* no random draws, `shuffle=False`

In [2]:
data = list(range(0,10))

train, valid, test = ox.threeway_split(
    data, test_size=0.2, shuffle=False)

print("training set:", train.T)
print("validation set:", valid.T)
print("test set:", test.T)

training set: [[0 1 2 3 4 5]]
validation set: [[6 7]]
test set: [[8 9]]


### Example 2 - Shuffle
Same as Example 1

* one input argument `data`. Thus, only 3 output args
* 60/20/20 split with `test_size=0.2`
* random draws, `shuffle=True`

Shuffle

* intial state externally with `np.random.seed`

In [3]:
import numpy as np
np.random.seed(23)

data = list(range(0,10))

train, valid, test = ox.threeway_split(
    data, test_size=0.2, shuffle=True)

print("training set:", train.T)
print("validation set:", valid.T)
print("test set:", test.T)

training set: [[5 8 2 9 4 7]]
validation set: [[1 0]]
test set: [[6 3]]


### Example 3 - Shuffle
Same as Example 1

* one input argument `data`. Thus, only 3 output args
* 60/20/20 split with `test_size=0.2`
* random draws, `shuffle=True`

Shuffle 

* intial state set internally with `random_state` (should be the same as in Example 2)

In [4]:
data = list(range(0,10))

train, valid, test = ox.threeway_split(
    data, test_size=0.2, shuffle=True, random_state=23)

print("training set:", train.T)
print("validation set:", valid.T)
print("test set:", test.T)

training set: [[5 8 2 9 4 7]]
validation set: [[1 0]]
test set: [[6 3]]


### Example 4 - Array with many variables as 1 input argument
Same as Example 1

* one input argument `data`. Thus, only 3 output args
* 60/20/20 split with `test_size=0.2`
* no random draws, `shuffle=False`


The `data` variable is an array 

* with 3 variables (columns) 
* and 10 observations (rows).

Use case

* there is no dependent variable `y` (e.g. correlation matrices for different subsets)
* not yet decided what variables is `y` (e.g. selection algorithms choose different variables to be `y`)

In [5]:
data = np.arange(30).reshape(3,10).T

train, valid, test = ox.threeway_split(
    data, test_size=0.2, shuffle=False)

print("training set:\n", train)
print("validation set:\n", valid)
print("test set:\n", test)

training set:
 [[ 0 10 20]
 [ 1 11 21]
 [ 2 12 22]
 [ 3 13 23]
 [ 4 14 24]
 [ 5 15 25]]
validation set:
 [[ 6 16 26]
 [ 7 17 27]]
test set:
 [[ 8 18 28]
 [ 9 19 29]]


### Example 5 - Two input argument
* two input arguments `x` and `y`. Thus, only 6 output args
* 60/20/20 split with `test_size=0.2`
* no random draws, `shuffle=False`

In [6]:
data = np.arange(30).reshape(3,10).T

y = data[:,0]
x = data[:,1:]

x_train, x_valid, x_test, y_train, y_valid, y_test = ox.threeway_split(
    x, y, test_size=0.2, shuffle=False)

print("y training set:", y_train.T)
print("y validation set:", y_valid.T)
print("y test set:", y_test.T)

print("\nx training set:\n", x_train)
print("x validation set:\n", x_valid)
print("x test set:\n", x_test)

y training set: [[0 1 2 3 4 5]]
y validation set: [[6 7]]
y test set: [[8 9]]

x training set:
 [[10 20]
 [11 21]
 [12 22]
 [13 23]
 [14 24]
 [15 25]]
x validation set:
 [[16 26]
 [17 27]]
x test set:
 [[18 28]
 [19 29]]


### Example 6
Default usages

* 60/20/20 split, `test_size=0.2`
* random draws, `shuffle=True`
* initial seed is **not** set, `random_state=None`


This is most similar how `train_test_split` but with 6 output arguments

In [7]:
data = np.arange(30).reshape(3,10).T

y = data[:,0]
x = data[:,1:]

x_train, x_valid, x_test, y_train, y_valid, y_test = ox.threeway_split(x, y)

print("y training set:", y_train.T)
print("y validation set:", y_valid.T)
print("y test set:", y_test.T)

print("\nx training set:\n", x_train)
print("x validation set:\n", x_valid)
print("x test set:\n", x_test)

y training set: [[0 7 8 2 1 4]]
y validation set: [[5 9]]
y test set: [[6 3]]

x training set:
 [[10 20]
 [17 27]
 [18 28]
 [12 22]
 [11 21]
 [14 24]]
x validation set:
 [[15 25]
 [19 29]]
x test set:
 [[16 26]
 [13 23]]
