In [1]:
import numpy as np
import scipy.io as sio
import pickle
import os

In [2]:
# Load the Lenk dataset
lenk_name = "data/lenk/lenk_data.mat"
lenk_dataset = sio.loadmat(lenk_name)
print(lenk_dataset.keys())
lenk_train = lenk_dataset['Traindata']
lenk_test = lenk_dataset['Testdata']

dict_keys(['__header__', '__version__', '__globals__', 'Traindata', 'Testdata'])


In [3]:
# Convert Lenk train dataset into T x N x d1 form
n_train = np.unique(lenk_train[:, :-1], axis=0).shape[0]
tasks = []
labels = []
for task_begin in range(0, lenk_train.shape[0], n_train):
    task_end = task_begin + n_train
    task = lenk_train[task_begin:task_end,:-1]
    label = lenk_train[task_begin:task_end,-1]
    tasks.append(task)
    labels.append(label)

# Add Lenk test dataset into the mix
n_test = np.unique(lenk_test[:, :-1], axis=0).shape[0]
task_i = 0
for task_begin in range(0, lenk_test.shape[0], n_test):
    task_end = task_begin + n_test
    task = lenk_test[task_begin:task_end,:-1]
    label = lenk_test[task_begin:task_end,-1]
    tasks[task_i] = np.vstack((tasks[task_i], task))
    labels[task_i] = np.append(labels[task_i], label)
    task_i += 1
tasks = np.asarray(tasks)
print(tasks.shape)
labels = np.asarray(labels)
print(labels.shape)

(180, 20, 14)
(180, 20)


In [4]:
# Extract the initial parameters
T = tasks.shape[0]
d1 = tasks[0].shape[0]
print("T = {}".format(T))
print("d1 = {}".format(d1))

T = 180
d1 = 20


In [5]:
def transform_Y(task, labels):
    d2 = 2 * task.shape[1]
    sigma = 100

    # Generate the parameters for the transform
    sum_phi = np.zeros(d2)
    for i in range(task.shape[0]):
        vec_operand = np.reshape(task[i], (14, 1)) @ np.reshape(np.transpose(np.array([labels[i], 1])), (1, 2))
        vec_op_1 = vec_operand[:,0]
        vec_op_2 = vec_operand[:,1]
        phi = np.concatenate((vec_op_1, vec_op_2))
        sum_phi += phi
    avg_phi = np.divide(sum_phi, task.shape[0])
    return avg_phi

In [35]:
# Choose test task and the rest is training
test_task = tasks[0]        # Can also choose this randomly
test_labels = labels[0]
train_tasks = tasks[1:]
train_labels = labels[1:]

# Loop over N2 for this new test task
parent_dir = "data/lenk/"
for N2 in range(2, 22, 2):
    # Get and store the test data
    random_indices = np.random.choice(test_task.shape[0], size=N2, replace=False)
    X0 = test_task[random_indices, :]
    Y0 = transform_Y(test_task, test_labels) # Independent of X
    R0 = test_labels[random_indices]
    path = os.path.join(parent_dir, "N2_{}/".format(N2))
    try:
        os.mkdir(path)
    except OSError as error:
        print(error)
    pickle.dump(X0, open(path + "X.pkl", "wb"))
    pickle.dump(Y0, open(path + "Y0.pkl", "wb"))
    pickle.dump(R0, open(path + "R0.pkl", "wb"))

    # Get and store the training data
    d1 = test_task.shape[1]
    d2 = 2 * d1
    X_full = np.ones((1, d1))
    Y_full = np.ones((1, d2))
    R_full = np.ones((1, 1))
    task_function = []
    index_total = 0
    for i in range(train_tasks.shape[0]):
        Y = transform_Y(train_tasks[i], train_labels[i])
        Y_full = np.vstack((Y_full, Y))
        for j in range(test_task.shape[0]):
            X = train_tasks[i][j]
            R = train_labels[i][j]
            X_full = np.vstack((X_full, X))
            R_full = np.vstack((R_full, R))

            prev_index_total = index_total
            index_total += X.shape[0]
            for j in range(prev_index_total, index_total):
                task_function.append(i)

    task_function = np.asarray(task_function)
    R_full = R_full[1:]     # N x 1
    print(R_full.shape)
    X_full = X_full[1:]     # N x d1
    print(X_full.shape)
    Y_full = Y_full[1:]     # T x d2
    print(Y_full.shape)
    pickle.dump(X_full, open(path + "X.pkl", "wb"))
    pickle.dump(Y_full, open(path + "Y.pkl", "wb"))
    pickle.dump(R_full, open(path + "R.pkl", "wb"))
    pickle.dump(task_function, open(path + "task_function.pkl", "wb"))

[Errno 17] File exists: 'data/lenk/N2_2/'
(3580, 1)
(3580, 14)
(179, 28)
[Errno 17] File exists: 'data/lenk/N2_4/'
(3580, 1)
(3580, 14)
(179, 28)
[Errno 17] File exists: 'data/lenk/N2_6/'
(3580, 1)
(3580, 14)
(179, 28)
[Errno 17] File exists: 'data/lenk/N2_8/'
(3580, 1)
(3580, 14)
(179, 28)
[Errno 17] File exists: 'data/lenk/N2_10/'
(3580, 1)
(3580, 14)
(179, 28)
[Errno 17] File exists: 'data/lenk/N2_12/'
(3580, 1)
(3580, 14)
(179, 28)
[Errno 17] File exists: 'data/lenk/N2_14/'
(3580, 1)
(3580, 14)
(179, 28)
[Errno 17] File exists: 'data/lenk/N2_16/'
(3580, 1)
(3580, 14)
(179, 28)
[Errno 17] File exists: 'data/lenk/N2_18/'
(3580, 1)
(3580, 14)
(179, 28)
[Errno 17] File exists: 'data/lenk/N2_20/'
(3580, 1)
(3580, 14)
(179, 28)
