In [1]:
import numpy as np
import sklearn
import os
import random
import math

from tqdm import tqdm
from scipy.io import loadmat

np.random.seed(1)


# Data Processing

In [2]:
full_train_data = loadmat('Data/sarcos_inv.mat')['sarcos_inv']
full_test_data = loadmat('Data/sarcos_inv_test.mat')['sarcos_inv_test']

In [3]:
def getData (full_train_data, full_test_data, num_train, num_validation, num_test):
    
    idx_train = np.random.randint(full_train_data.shape[0], size=num_train)
    idx_validation = np.random.randint(full_train_data.shape[0], size=num_validation)
    idx_test = np.random.randint(full_test_data.shape[0], size=num_test)
    
    train_data = full_train_data[idx_train, : ]
    validation_data = full_train_data[idx_validation, :]
    test_data = full_test_data[idx_test, : ]
    return (train_data, validation_data, test_data)

def Sep_X_and_Y(data, x_dim, y_dim):
    X = data[:, :x_dim]
    Y = data[:, x_dim: x_dim + y_dim ]
    return X, Y

def Unfold_Y(X, Y):
    num_tasks = Y.shape[1]
    X_new = np.vstack([X]*num_tasks)
    Y_new = np.ndarray.flatten(Y, 'F')
    T = np.arange(num_tasks)
    T_new = np.repeat(T, X.shape[0])
    return X_new, Y_new, T_new

def append_one(X):
    n = len(X)
    ones = np.zeros((n, 1)) + 1
    X_new = np.concatenate([ones, X], axis= 1)
    return X_new

def normalize_input(X):
    A = (X - np.mean(X, axis=0))
    B = A/np.std(A, axis=0)
    return B

def normalize_Y_matrix(Y):
    A = (Y - np.mean(Y, axis=0))
    return Y

In [4]:
(train_data, validation_data, test_data) = getData(full_train_data, full_test_data, 500, 100, 500)

X_train_org, Y_train_org = Sep_X_and_Y(train_data, 21, 2)
X_valid_org, Y_valid_org = Sep_X_and_Y(validation_data, 21, 2)
X_test_org , Y_test_org  = Sep_X_and_Y(test_data, 21, 2)


X_train, Y_train, T_train = Unfold_Y(X_train_org, Y_train_org)
X_valid, Y_valid, T_valid = Unfold_Y(X_valid_org, Y_valid_org)
X_test , Y_test , T_test  = Unfold_Y(X_test_org , Y_test_org )


X_train = append_one(X_train)
X_valid = append_one(X_valid)
X_test = append_one(X_test)

# Multitask Gaussian Process

In [5]:
def Kernel_input(x1, x2):  # K_input part of ICM kernel
    numerator    = 2*np.dot(np.dot(x1, Sigma_u), x2 )
    denominator1 = 1 + 2*np.dot(np.dot(x1, Sigma_u), x1 )
    denominator2 = 1 + 2*np.dot(np.dot(x2, Sigma_u), x2 )
    denominator  = math.sqrt(denominator1*denominator2) # See "Computing with infinite networks" for calculation of Expectation term 
    
    Expectation_term = (2/np.pi) * math.asin( numerator/denominator )
    similarity       = C_term +  Expectation_term  # See "Multitask Neural networks meet Multitask Gaussian Process" Paper for notation of C-term and Expectation term
    return similarity 


def Kernel_task (t1, t2):   # K_task part of the ICM Kernels
    return Omega2[t1, t2]


def Kernel(x1, t1, x2, t2): # ICM Kernel - product of input and task dependent components
    return Kernel_input(x1, x2)*Kernel_task(t1, t2)


def mtgp_fit (X_train, T_train, Y_train, Noise_variance ): # Simple MTGP implementation with specified Kernels (here ICM kernels)
    N = len(X_train)
    K = np.zeros((N, N))
            
    for i in range(0, N):
        for j in range(0, N):
            K[i, j] = Kernel(X_train[i], T_train[i], X_train[j], T_train[j] )
            
    B = np.zeros((N, N))
    for i in range(0, N):
        B[i, i] = Noise_variance[T_train[i]]
         
    
    C = K + B
    C_inv = np.linalg.inv(C)
    alpha = np.dot(C_inv, Y_train)
    model = {
        "X_train" :  X_train,
        "T_train" :  T_train,
        "Y_train" :  Y_train,
        "Noise_variance" : Noise_variance,
        "C_inv"   :  C_inv,
        "alpha"   :  alpha,
    }
    
    return model



def mtgp_predict(X, T, model):
    
    n_train  = len(model["X_train"])
    n        = len(X)
    K        = np.zeros((n_train, n))
    
    for i in range(0, n_train):
        for j in range(0, n):
            K[i, j] = Kernel( model["X_train"][i], model["T_train"][i], X[j] , T[j] )
            
    y_pred = np.dot(K.T, model["alpha"] )
    return y_pred


from sklearn.metrics import mean_squared_error


def get_stats (y_true, y_predict):
    msr = mean_squared_error(y_true, y_predict)
    stats ={
        "msr" : msr
    }
    
    return stats
    

# Hyperparameters

In [6]:
# Temporary variables for convineance - remove after tuning

temp_22_vec =  np.zeros(22) + 1
temp_2x2    =  np.identity(2)
temp_2      =  [1e-5, 1e-5]

# Kernel Hyperparameters - Prior Hyperparameters

Sigma_u =  math.pow(10, -4.244)*(np.diag( np.zeros(22) + 1 ))                              # diagonal matrix of size = dimensions of X (current example: 21)
Omega2  =  np.array([[0.931, -0.617], [-0.617, 1.983 ]])                                   # Covariance between tasks - Symmetric matrix of size T X T 
C_term  =  1                                                                               # Proportionality Constant term for bias variance 


# Noise Variance - Likelihood Hyperparameters
Noise_variance = np.array([math.pow(10, -4.77 ) , math.pow(10, -5.184) ])                  # Noise variance - Vector of size T, number of tasks 


# Ranges for hyperparameter

Sigma_u_coeff  = (-4.244) +/- 0.3 ->log scale
'Noise_variance_coeff1': -4.775270918417985, 'Noise_variance_coeff2': -5.184813670817696 (log scale)

Omega:
{'target': -20.08701974698428, 'params': {'Omega11': 0.9313010568883545, 'Omega12': -0.617757091827809, 'Omega22': 1.9838373711533497}}




# Bayesian Optimization

In [7]:
def get_Negative_Loss (
    Omega11,
    Omega12,
    Omega22,
    
    Sigma_u_coeff,
    Noise_variance_coeff1,
    Noise_variance_coeff2
    
):
    
    global Omega2
    Omega2 = np.array([[Omega11, Omega12], [Omega12, Omega22]])
    
    global Sigma_u
    Sigma_u = math.pow(10,Sigma_u_coeff )*(np.diag( np.zeros(22) + 1 ))
    
    global Noise_variance
    Noise_variance = np.array([math.pow(10, Noise_variance_coeff1 ) , math.pow(10, Noise_variance_coeff2) ])
    
    model = mtgp_fit (X_train, T_train, Y_train, Noise_variance )
    predict = mtgp_predict(X_valid, T_valid, model)
    stats = get_stats(Y_valid, predict)
    return -stats['msr']


In [8]:
from bayes_opt import BayesianOptimization


pbounds = {
            "Omega11" : (0.9313010568883545-0.5, 0.9313010568883545+0.5),
            "Omega12" : (-0.617757091827809-0.5, -0.617757091827809+0.5),
            "Omega22" : (1.9838373711533497-0.5, 1.9838373711533497+0.5),
            "Sigma_u_coeff" : (-4.244-0.5, -4.244+0.5) ,
            "Noise_variance_coeff1" : (-4.775270918417985-0.5, -4.775270918417985+0.5) ,
            "Noise_variance_coeff2" : ( -5.184813670817696-0.5,  -5.184813670817696+0.5)
          }



optimizer = BayesianOptimization(
    f= get_Negative_Loss,
    pbounds=pbounds,
    random_state=1,
)



In [10]:
optimizer.maximize(
    init_points=100,
    n_iter=200,
)

|   iter    |  target   | Noise_... | Noise_... |  Omega11  |  Omega12  |  Omega22  | Sigma_... |
-------------------------------------------------------------------------------------------------
| [95m 3       [0m | [95m-23.38   [0m | [95m-5.089   [0m | [95m-5.339   [0m | [95m 0.8281  [0m | [95m-0.5789  [0m | [95m 1.903   [0m | [95m-4.059   [0m |
| [95m 4       [0m | [95m-19.81   [0m | [95m-5.071   [0m | [95m-4.807   [0m | [95m 0.4587  [0m | [95m-0.4473  [0m | [95m 1.901   [0m | [95m-4.185   [0m |
| [0m 5       [0m | [0m-29.38   [0m | [0m-5.135   [0m | [0m-5.487   [0m | [0m 1.232   [0m | [0m-0.1495  [0m | [0m 1.797   [0m | [0m-4.052   [0m |
| [0m 6       [0m | [0m-1.815e+0[0m | [0m-4.399   [0m | [0m-4.79    [0m | [0m 0.5163  [0m | [0m-1.079   [0m | [0m 1.654   [0m | [0m-3.866   [0m |
| [0m 7       [0m | [0m-20.43   [0m | [0m-5.177   [0m | [0m-5.264   [0m | [0m 1.389   [0m | [0m-0.5846  [0m | [0m 2.176   [0m 

| [0m 53      [0m | [0m-30.45   [0m | [0m-4.93    [0m | [0m-5.46    [0m | [0m 1.024   [0m | [0m-0.8055  [0m | [0m 2.4     [0m | [0m-3.834   [0m |
| [0m 54      [0m | [0m-21.19   [0m | [0m-5.018   [0m | [0m-5.574   [0m | [0m 0.6243  [0m | [0m-0.6182  [0m | [0m 2.212   [0m | [0m-4.536   [0m |
| [0m 55      [0m | [0m-23.04   [0m | [0m-5.027   [0m | [0m-4.833   [0m | [0m 0.8471  [0m | [0m-0.5011  [0m | [0m 1.718   [0m | [0m-4.642   [0m |
| [0m 56      [0m | [0m-21.11   [0m | [0m-4.759   [0m | [0m-5.208   [0m | [0m 0.584   [0m | [0m-0.496   [0m | [0m 2.028   [0m | [0m-4.09    [0m |
| [0m 57      [0m | [0m-23.42   [0m | [0m-5.131   [0m | [0m-4.933   [0m | [0m 0.6534  [0m | [0m-0.5984  [0m | [0m 2.269   [0m | [0m-4.722   [0m |
| [0m 58      [0m | [0m-32.31   [0m | [0m-4.951   [0m | [0m-4.812   [0m | [0m 1.276   [0m | [0m-0.5793  [0m | [0m 2.35    [0m | [0m-3.794   [0m |
| [0m 59      [0m | [0m-2

| [0m 104     [0m | [0m-19.94   [0m | [0m-4.584   [0m | [0m-4.687   [0m | [0m 0.6036  [0m | [0m-0.9806  [0m | [0m 2.416   [0m | [0m-4.047   [0m |
| [95m 105     [0m | [95m-19.81   [0m | [95m-5.071   [0m | [95m-4.807   [0m | [95m 0.4587  [0m | [95m-0.4473  [0m | [95m 1.901   [0m | [95m-4.185   [0m |
| [0m 106     [0m | [0m-19.81   [0m | [0m-5.071   [0m | [0m-4.807   [0m | [0m 0.4587  [0m | [0m-0.4472  [0m | [0m 1.901   [0m | [0m-4.185   [0m |
| [0m 107     [0m | [0m-19.94   [0m | [0m-4.584   [0m | [0m-4.687   [0m | [0m 0.6036  [0m | [0m-0.9806  [0m | [0m 2.416   [0m | [0m-4.047   [0m |
| [0m 108     [0m | [0m-19.94   [0m | [0m-4.584   [0m | [0m-4.687   [0m | [0m 0.6036  [0m | [0m-0.9806  [0m | [0m 2.416   [0m | [0m-4.047   [0m |
| [0m 109     [0m | [0m-19.81   [0m | [0m-5.071   [0m | [0m-4.807   [0m | [0m 0.4587  [0m | [0m-0.4473  [0m | [0m 1.901   [0m | [0m-4.185   [0m |
| [0m 110     [0m 

| [0m 155     [0m | [0m-20.21   [0m | [0m-4.703   [0m | [0m-4.952   [0m | [0m 0.9503  [0m | [0m-0.3468  [0m | [0m 2.053   [0m | [0m-4.278   [0m |
| [0m 156     [0m | [0m-19.81   [0m | [0m-5.071   [0m | [0m-4.807   [0m | [0m 0.4587  [0m | [0m-0.4473  [0m | [0m 1.901   [0m | [0m-4.185   [0m |
| [95m 157     [0m | [95m-19.81   [0m | [95m-5.071   [0m | [95m-4.807   [0m | [95m 0.4587  [0m | [95m-0.4473  [0m | [95m 1.901   [0m | [95m-4.185   [0m |
| [0m 158     [0m | [0m-20.21   [0m | [0m-4.703   [0m | [0m-4.952   [0m | [0m 0.9503  [0m | [0m-0.3469  [0m | [0m 2.053   [0m | [0m-4.278   [0m |
| [0m 159     [0m | [0m-20.21   [0m | [0m-4.703   [0m | [0m-4.952   [0m | [0m 0.9503  [0m | [0m-0.3468  [0m | [0m 2.053   [0m | [0m-4.278   [0m |
| [0m 160     [0m | [0m-20.21   [0m | [0m-4.703   [0m | [0m-4.952   [0m | [0m 0.9503  [0m | [0m-0.3468  [0m | [0m 2.053   [0m | [0m-4.278   [0m |
| [0m 161     [0m 

| [0m 206     [0m | [0m-20.08   [0m | [0m-4.791   [0m | [0m-4.826   [0m | [0m 1.262   [0m | [0m-0.4686  [0m | [0m 2.158   [0m | [0m-4.166   [0m |
| [0m 207     [0m | [0m-19.81   [0m | [0m-5.071   [0m | [0m-4.807   [0m | [0m 0.4586  [0m | [0m-0.4473  [0m | [0m 1.901   [0m | [0m-4.185   [0m |
| [0m 208     [0m | [0m-20.21   [0m | [0m-4.703   [0m | [0m-4.952   [0m | [0m 0.9503  [0m | [0m-0.3468  [0m | [0m 2.053   [0m | [0m-4.278   [0m |
| [0m 209     [0m | [0m-20.22   [0m | [0m-4.449   [0m | [0m-4.831   [0m | [0m 0.5301  [0m | [0m-0.4664  [0m | [0m 2.187   [0m | [0m-4.134   [0m |
| [0m 210     [0m | [0m-20.23   [0m | [0m-4.658   [0m | [0m-4.736   [0m | [0m 1.382   [0m | [0m-0.5611  [0m | [0m 2.399   [0m | [0m-4.102   [0m |
| [0m 211     [0m | [0m-20.34   [0m | [0m-5.115   [0m | [0m-4.939   [0m | [0m 0.4617  [0m | [0m-0.7512  [0m | [0m 2.346   [0m | [0m-4.051   [0m |
| [0m 212     [0m | [0m-2

| [0m 257     [0m | [0m-20.21   [0m | [0m-4.703   [0m | [0m-4.952   [0m | [0m 0.9503  [0m | [0m-0.3469  [0m | [0m 2.053   [0m | [0m-4.278   [0m |
| [0m 258     [0m | [0m-20.21   [0m | [0m-4.703   [0m | [0m-4.952   [0m | [0m 0.9503  [0m | [0m-0.3469  [0m | [0m 2.053   [0m | [0m-4.278   [0m |
| [0m 259     [0m | [0m-20.23   [0m | [0m-4.658   [0m | [0m-4.736   [0m | [0m 1.381   [0m | [0m-0.5612  [0m | [0m 2.399   [0m | [0m-4.102   [0m |
| [0m 260     [0m | [0m-20.08   [0m | [0m-4.791   [0m | [0m-4.826   [0m | [0m 1.262   [0m | [0m-0.4685  [0m | [0m 2.158   [0m | [0m-4.165   [0m |
| [0m 261     [0m | [0m-20.32   [0m | [0m-5.028   [0m | [0m-5.423   [0m | [0m 1.181   [0m | [0m-0.6608  [0m | [0m 1.541   [0m | [0m-4.236   [0m |
| [0m 262     [0m | [0m-19.81   [0m | [0m-5.071   [0m | [0m-4.807   [0m | [0m 0.4587  [0m | [0m-0.4473  [0m | [0m 1.901   [0m | [0m-4.185   [0m |
| [0m 263     [0m | [0m-2

In [None]:
print(optimizer.max)

In [None]:
model = mtgp_fit (X_train, T_train, Y_train, Noise_variance )
predict = mtgp_predict(X_valid, T_valid, model)
stats = get_stats(Y_valid, predict)
print(stats["msr"])

# Single Layer Neural Network

In [None]:
from sklearn.neural_network import MLPRegressor

num_hidd_units = 700

for rs in range(0, 1):
    clf = MLPRegressor(hidden_layer_sizes= (num_hidd_units, ), activation='logistic' ,random_state=rs,  solver='adam', max_iter=100000)
    clf.fit(X_train_org, Y_train_org)
    Y_pred_NN = clf.predict(X_valid_org)
    Y_pred_NN1 = Y_pred_NN.flatten()
    Y_valid_org1 = Y_valid_org.flatten()
    
    stats = get_stats(Y_pred_NN1, Y_valid_org1)
    print( -stats['msr'] )
    
    



In [None]:
math.pow(10, -0.7248)

In [None]:
diff = (Y_valid_org - np.mean(Y_valid_org, axis=0))
diff = diff.flatten()
d = np.dot(diff, diff)/len(diff)
20/d