In [1]:
import numpy as np
import sklearn
import os
import random
import math

from tqdm import tqdm
from scipy.io import loadmat

np.random.seed(1)


# Data Processing

In [2]:
full_train_data = loadmat('Data/sarcos_inv.mat')['sarcos_inv']
full_test_data = loadmat('Data/sarcos_inv_test.mat')['sarcos_inv_test']

In [3]:
def getData (full_train_data, full_test_data, num_train, num_validation, num_test):
    
    idx_train = np.random.randint(full_train_data.shape[0], size=num_train)
    idx_validation = np.random.randint(full_train_data.shape[0], size=num_validation)
    idx_test = np.random.randint(full_test_data.shape[0], size=num_test)
    
    train_data = full_train_data[idx_train, : ]
    validation_data = full_train_data[idx_validation, :]
    test_data = full_test_data[idx_test, : ]
    return (train_data, validation_data, test_data)

def Sep_X_and_Y(data, x_dim, y_dim):
    X = data[:, :x_dim]
    Y = data[:, x_dim: x_dim + y_dim ]
    return X, Y

def Unfold_Y(X, Y):
    num_tasks = Y.shape[1]
    X_new = np.vstack([X]*num_tasks)
    Y_new = np.ndarray.flatten(Y, 'F')
    T = np.arange(num_tasks)
    T_new = np.repeat(T, X.shape[0])
    return X_new, Y_new, T_new

def append_one(X):
    n = len(X)
    ones = np.zeros((n, 1)) + 1
    X_new = np.concatenate([ones, X], axis= 1)
    return X_new

def normalize_input(X):
    A = (X - np.mean(X, axis=0))
    B = A/np.std(A, axis=0)
    return B

def normalize_Y_matrix(Y):
    A = (Y - np.mean(Y, axis=0))
    return Y

In [4]:
(train_data, validation_data, test_data) = getData(full_train_data, full_test_data, 500, 100, 500)

X_train_org, Y_train_org = Sep_X_and_Y(train_data, 21, 2)
X_valid_org, Y_valid_org = Sep_X_and_Y(validation_data, 21, 2)
X_test_org , Y_test_org  = Sep_X_and_Y(test_data, 21, 2)


X_train, Y_train, T_train = Unfold_Y(X_train_org, Y_train_org)
X_valid, Y_valid, T_valid = Unfold_Y(X_valid_org, Y_valid_org)
X_test , Y_test , T_test  = Unfold_Y(X_test_org , Y_test_org )


X_train = append_one(X_train)
X_valid = append_one(X_valid)
X_test = append_one(X_test)

# Multitask Gaussian Process

In [5]:
def Kernel_input(x1, x2):  # K_input part of ICM kernel
    numerator    = 2*np.dot(np.dot(x1, Sigma_u), x2 )
    denominator1 = 1 + 2*np.dot(np.dot(x1, Sigma_u), x1 )
    denominator2 = 1 + 2*np.dot(np.dot(x2, Sigma_u), x2 )
    denominator  = math.sqrt(denominator1*denominator2) # See "Computing with infinite networks" for calculation of Expectation term 
    
    Expectation_term = (2/np.pi) * math.asin( numerator/denominator )
    similarity       = C_term +  Expectation_term  # See "Multitask Neural networks meet Multitask Gaussian Process" Paper for notation of C-term and Expectation term
    return similarity 


def Kernel_task (t1, t2):   # K_task part of the ICM Kernels
    return Omega2[t1, t2]


def Kernel(x1, t1, x2, t2): # ICM Kernel - product of input and task dependent components
    return Kernel_input(x1, x2)*Kernel_task(t1, t2)


def mtgp_fit (X_train, T_train, Y_train, Noise_variance ): # Simple MTGP implementation with specified Kernels (here ICM kernels)
    N = len(X_train)
    K = np.zeros((N, N))
            
    for i in range(0, N):
        for j in range(0, N):
            K[i, j] = Kernel(X_train[i], T_train[i], X_train[j], T_train[j] )
            
    B = np.zeros((N, N))
    for i in range(0, N):
        B[i, i] = Noise_variance[T_train[i]]
         
    
    C = K + B
    C_inv = np.linalg.inv(C)
    alpha = np.dot(C_inv, Y_train)
    model = {
        "X_train" :  X_train,
        "T_train" :  T_train,
        "Y_train" :  Y_train,
        "Noise_variance" : Noise_variance,
        "C_inv"   :  C_inv,
        "alpha"   :  alpha,
    }
    
    return model



def mtgp_predict(X, T, model):
    
    n_train  = len(model["X_train"])
    n        = len(X)
    K        = np.zeros((n_train, n))
    
    for i in range(0, n_train):
        for j in range(0, n):
            K[i, j] = Kernel( model["X_train"][i], model["T_train"][i], X[j] , T[j] )
            
    y_pred = np.dot(K.T, model["alpha"] )
    return y_pred


from sklearn.metrics import mean_squared_error


def get_stats (y_true, y_predict):
    msr = mean_squared_error(y_true, y_predict)
    stats ={
        "msr" : msr
    }
    
    return stats
    

# Hyperparameters

In [6]:
# Temporary variables for convineance - remove after tuning

temp_22_vec =  np.zeros(22) + 1
temp_2x2    =  np.identity(2)
temp_2      =  [1e-5, 1e-5]

# Kernel Hyperparameters - Prior Hyperparameters

Sigma_u =  np.diag( temp_22_vec ) # diagonal matrix of size = dimensions of X (current example: 21)
C_term  =  1             # Proportionality Constant term for bias variance 
Omega2  =  temp_2x2      # Covariance between tasks - Symmetric matrix of size T X T 


# Noise Variance - Likelihood Hyperparameters
Noise_variance = temp_2 # Noise variance - Vector of size T, number of tasks 


# Ranges for hyperparameter

Noise_variance = (1e-1, 1e-8) for both -> log scale
Sigma_u = (1e-6, 1e-2) -> log scale
C_term -> No changes, fix to be 1
Omega2 = -0.2 = (0, 2) for a11 and a22 and (-1.5, 1.5) for a21


# Bayesian Optimization

In [7]:
def get_Negative_Loss (
    Sigma_u1,
    Sigma_u2,
    Sigma_u3,
    Sigma_u4,
    Sigma_u5,
    Sigma_u6,
    Sigma_u7,
    Sigma_u8,
    Sigma_u9,
    Sigma_u10,
    Sigma_u11,
    Sigma_u12,
    Sigma_u13,
    Sigma_u14,
    Sigma_u15,
    Sigma_u16,
    Sigma_u17,
    Sigma_u18,
    Sigma_u19,
    Sigma_u20,
    Sigma_u21,
    Sigma_u22,
    
    Omega11, 
    Omega12, 
    Omega22,
    
    Noise_variance1,
    Noise_variance2 
):
    
    global Sigma_u
    Sigma_u = np.diag ( 
        np.array(
    [ 
     math.pow(10, Sigma_u1),
     math.pow(10, Sigma_u2),
     math.pow(10, Sigma_u3),
     math.pow(10, Sigma_u4),
     math.pow(10, Sigma_u5),
     math.pow(10, Sigma_u6),
     math.pow(10, Sigma_u7),
     math.pow(10, Sigma_u8),
     math.pow(10, Sigma_u9),
     math.pow(10, Sigma_u10),
     math.pow(10, Sigma_u11),
     math.pow(10, Sigma_u12),
     math.pow(10, Sigma_u13),
     math.pow(10, Sigma_u14),
     math.pow(10, Sigma_u15),
     math.pow(10, Sigma_u16),
     math.pow(10, Sigma_u17),
     math.pow(10, Sigma_u18),
     math.pow(10, Sigma_u19),
     math.pow(10, Sigma_u20),
     math.pow(10, Sigma_u21),
     math.pow(10, Sigma_u22)
    ] )
    )
        
    global Omega2
    Omega2 = np.array( [[Omega11, Omega12], [Omega12, Omega22]] )
    
    
    global Noise_variance
    Noise_variance = [math.pow(10, Noise_variance1) , math.pow(10, Noise_variance2)]
    
    
    
    model = mtgp_fit (X_train, T_train, Y_train, Noise_variance )
    predict = mtgp_predict(X_valid, T_valid, model)
    stats = get_stats(Y_valid, predict)
    return -stats['msr']


In [8]:
from bayes_opt import BayesianOptimization


pbounds = {
                "Sigma_u1" : (-6, -2),
                "Sigma_u2" : (-6, -2),
                "Sigma_u3" : (-6, -2),
                "Sigma_u4" : (-6, -2),
                "Sigma_u5" : (-6, -2),
                "Sigma_u6" : (-6, -2),
                "Sigma_u7" : (-6, -2),
                "Sigma_u8" : (-6, -2),
                "Sigma_u9" : (-6, -2),
                "Sigma_u10" : (-6, -2),
                "Sigma_u11" : (-6, -2),
                "Sigma_u12" : (-6, -2),
                "Sigma_u13" : (-6, -2),
                "Sigma_u14" : (-6, -2),
                "Sigma_u15" : (-6, -2),
                "Sigma_u16" : (-6, -2),
                "Sigma_u17" : (-6, -2),
                "Sigma_u18" : (-6, -2),
                "Sigma_u19" : (-6, -2),
                "Sigma_u20" : (-6, -2),
                "Sigma_u21" : (-6, -2),
                "Sigma_u22" : (-6, -2),

                "Omega11" : (0, 2) ,
                "Omega12" : (-1.5, 1.5), 
                "Omega22" : (0, 2) ,

                "Noise_variance1" : (-1, -8),
                "Noise_variance2" : (-1, -8) 
          }


optimizer = BayesianOptimization(
    f= get_Negative_Loss,
    pbounds=pbounds,
    random_state=1,
)



In [12]:
optimizer.maximize(
    init_points=10,
    n_iter=100,
)

|   iter    |  target   | Noise_... | Noise_... |  Omega11  |  Omega12  |  Omega22  | Sigma_u1  | Sigma_u10 | Sigma_u11 | Sigma_u12 | Sigma_u13 | Sigma_u14 | Sigma_u15 | Sigma_u16 | Sigma_u17 | Sigma_u18 | Sigma_u19 | Sigma_u2  | Sigma_u20 | Sigma_u21 | Sigma_u22 | Sigma_u3  | Sigma_u4  | Sigma_u5  | Sigma_u6  | Sigma_u7  | Sigma_u8  | Sigma_u9  |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| [0m 111     [0m | [0m-424.4   [0m | [0m-2.298   [0m | [0m-7.532   [0m | [0m 1.872   [0m | [0m-0.6494  [0m | [0m 0.5493  [0m | [0m-3.794   [0m | [0m-5.259   [0m | [0m-4.232   [0m | [0m-5.259   [0m | [0m-2.21    [0m | [0m-4.025   [0m | [0m-2.144   [0m | [0m-3.42    [0m 

| [0m 124     [0m | [0m-157.4   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 0.123   [0m | [0m-0.08016 [0m | [0m 1.178   [0m | [0m-3.398   [0m | [0m-4.859   [0m | [0m-4.264   [0m | [0m-4.52    [0m | [0m-4.029   [0m | [0m-5.895   [0m | [0m-4.772   [0m | [0m-4.756   [0m | [0m-4.022   [0m | [0m-3.354   [0m | [0m-3.603   [0m | [0m-3.128   [0m | [0m-5.021   [0m | [0m-4.717   [0m | [0m-3.319   [0m | [0m-3.678   [0m | [0m-5.276   [0m | [0m-3.445   [0m | [0m-4.287   [0m | [0m-3.37    [0m | [0m-2.619   [0m | [0m-3.367   [0m |
| [0m 125     [0m | [0m-115.4   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 0.7967  [0m | [0m-0.8113  [0m | [0m 1.074   [0m | [0m-2.448   [0m | [0m-3.917   [0m | [0m-4.433   [0m | [0m-4.8     [0m | [0m-3.491   [0m | [0m-4.404   [0m | [0m-4.916   [0m | [0m-3.106   [0m | [0m-3.879   [0m | [0m-2.041   [0m | [0m-3.641   [0m | [0m-4.245   [0m | [0m-5.948   [0m | [0m-4.078   [0

| [0m 139     [0m | [0m-73.66   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 0.7449  [0m | [0m 0.3279  [0m | [0m 0.6237  [0m | [0m-4.403   [0m | [0m-4.246   [0m | [0m-3.863   [0m | [0m-2.709   [0m | [0m-4.3     [0m | [0m-4.893   [0m | [0m-3.562   [0m | [0m-3.238   [0m | [0m-3.803   [0m | [0m-5.018   [0m | [0m-4.085   [0m | [0m-4.389   [0m | [0m-2.496   [0m | [0m-4.39    [0m | [0m-3.522   [0m | [0m-3.632   [0m | [0m-4.045   [0m | [0m-2.856   [0m | [0m-3.739   [0m | [0m-3.914   [0m | [0m-4.639   [0m | [0m-3.493   [0m |
| [0m 140     [0m | [0m-74.14   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 1.538   [0m | [0m-0.4     [0m | [0m 0.3249  [0m | [0m-2.805   [0m | [0m-4.403   [0m | [0m-3.604   [0m | [0m-5.611   [0m | [0m-4.113   [0m | [0m-5.177   [0m | [0m-5.355   [0m | [0m-2.786   [0m | [0m-3.723   [0m | [0m-4.309   [0m | [0m-3.032   [0m | [0m-3.782   [0m | [0m-5.115   [0m | [0m-3.75    [0

| [0m 154     [0m | [0m-130.6   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 0.299   [0m | [0m-0.4722  [0m | [0m 1.152   [0m | [0m-3.418   [0m | [0m-4.941   [0m | [0m-3.826   [0m | [0m-3.118   [0m | [0m-4.152   [0m | [0m-4.008   [0m | [0m-5.151   [0m | [0m-4.057   [0m | [0m-3.142   [0m | [0m-2.939   [0m | [0m-4.761   [0m | [0m-4.403   [0m | [0m-4.546   [0m | [0m-2.727   [0m | [0m-2.56    [0m | [0m-3.326   [0m | [0m-3.832   [0m | [0m-3.016   [0m | [0m-4.203   [0m | [0m-3.362   [0m | [0m-3.769   [0m | [0m-3.859   [0m |
| [0m 155     [0m | [0m-63.2    [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 1.111   [0m | [0m 0.3617  [0m | [0m 1.159   [0m | [0m-3.285   [0m | [0m-5.065   [0m | [0m-3.101   [0m | [0m-3.487   [0m | [0m-4.182   [0m | [0m-3.355   [0m | [0m-2.682   [0m | [0m-3.477   [0m | [0m-3.832   [0m | [0m-4.182   [0m | [0m-2.37    [0m | [0m-3.533   [0m | [0m-4.298   [0m | [0m-4.849   [0

| [0m 169     [0m | [0m-94.85   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 0.8368  [0m | [0m-0.4066  [0m | [0m 1.153   [0m | [0m-4.396   [0m | [0m-4.591   [0m | [0m-4.491   [0m | [0m-3.218   [0m | [0m-3.758   [0m | [0m-4.629   [0m | [0m-4.59    [0m | [0m-3.918   [0m | [0m-5.253   [0m | [0m-4.105   [0m | [0m-5.538   [0m | [0m-3.435   [0m | [0m-5.105   [0m | [0m-3.459   [0m | [0m-3.836   [0m | [0m-3.207   [0m | [0m-5.365   [0m | [0m-4.264   [0m | [0m-4.099   [0m | [0m-3.865   [0m | [0m-2.703   [0m | [0m-3.792   [0m |
| [0m 170     [0m | [0m-59.79   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 1.203   [0m | [0m 0.4222  [0m | [0m 1.708   [0m | [0m-3.807   [0m | [0m-3.489   [0m | [0m-3.201   [0m | [0m-5.291   [0m | [0m-5.255   [0m | [0m-5.316   [0m | [0m-4.058   [0m | [0m-2.946   [0m | [0m-3.58    [0m | [0m-4.768   [0m | [0m-4.866   [0m | [0m-3.465   [0m | [0m-5.553   [0m | [0m-3.731   [0

| [0m 184     [0m | [0m-102.2   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 0.6711  [0m | [0m-0.8543  [0m | [0m 1.243   [0m | [0m-4.74    [0m | [0m-5.289   [0m | [0m-4.83    [0m | [0m-4.839   [0m | [0m-5.053   [0m | [0m-3.609   [0m | [0m-3.891   [0m | [0m-2.846   [0m | [0m-2.755   [0m | [0m-4.978   [0m | [0m-4.449   [0m | [0m-5.092   [0m | [0m-4.982   [0m | [0m-4.625   [0m | [0m-2.843   [0m | [0m-4.373   [0m | [0m-4.507   [0m | [0m-4.193   [0m | [0m-5.027   [0m | [0m-2.432   [0m | [0m-3.854   [0m | [0m-2.43    [0m |
| [0m 185     [0m | [0m-84.53   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 1.077   [0m | [0m 0.6142  [0m | [0m 0.521   [0m | [0m-2.624   [0m | [0m-5.823   [0m | [0m-2.932   [0m | [0m-4.165   [0m | [0m-4.027   [0m | [0m-5.975   [0m | [0m-4.63    [0m | [0m-3.259   [0m | [0m-4.003   [0m | [0m-3.563   [0m | [0m-4.514   [0m | [0m-4.17    [0m | [0m-3.637   [0m | [0m-3.458   [0

| [0m 199     [0m | [0m-95.92   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 0.2515  [0m | [0m-0.01527 [0m | [0m 1.185   [0m | [0m-2.597   [0m | [0m-4.083   [0m | [0m-3.766   [0m | [0m-5.395   [0m | [0m-5.648   [0m | [0m-3.367   [0m | [0m-5.805   [0m | [0m-4.198   [0m | [0m-4.332   [0m | [0m-5.063   [0m | [0m-4.722   [0m | [0m-4.92    [0m | [0m-4.712   [0m | [0m-4.487   [0m | [0m-3.155   [0m | [0m-3.574   [0m | [0m-4.459   [0m | [0m-3.929   [0m | [0m-4.551   [0m | [0m-4.488   [0m | [0m-3.018   [0m | [0m-3.68    [0m |
| [0m 200     [0m | [0m-88.39   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 0.7194  [0m | [0m-0.436   [0m | [0m 1.113   [0m | [0m-2.865   [0m | [0m-4.794   [0m | [0m-4.475   [0m | [0m-4.688   [0m | [0m-3.707   [0m | [0m-4.823   [0m | [0m-4.4     [0m | [0m-3.941   [0m | [0m-4.049   [0m | [0m-3.867   [0m | [0m-4.8     [0m | [0m-3.804   [0m | [0m-5.979   [0m | [0m-2.593   [0

| [0m 214     [0m | [0m-442.9   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 0.1567  [0m | [0m-0.5082  [0m | [0m 1.295   [0m | [0m-2.359   [0m | [0m-5.673   [0m | [0m-3.882   [0m | [0m-4.447   [0m | [0m-4.702   [0m | [0m-5.429   [0m | [0m-3.705   [0m | [0m-4.869   [0m | [0m-2.85    [0m | [0m-3.067   [0m | [0m-4.731   [0m | [0m-5.186   [0m | [0m-4.548   [0m | [0m-3.89    [0m | [0m-3.961   [0m | [0m-4.569   [0m | [0m-4.807   [0m | [0m-3.556   [0m | [0m-4.268   [0m | [0m-2.8     [0m | [0m-3.565   [0m | [0m-3.658   [0m |
| [0m 215     [0m | [0m-69.65   [0m | [0m-1.0     [0m | [0m-1.0     [0m | [0m 1.727   [0m | [0m-0.5856  [0m | [0m 0.6578  [0m | [0m-4.137   [0m | [0m-3.718   [0m | [0m-2.215   [0m | [0m-5.288   [0m | [0m-4.325   [0m | [0m-4.562   [0m | [0m-5.298   [0m | [0m-3.403   [0m | [0m-3.217   [0m | [0m-4.484   [0m | [0m-4.288   [0m | [0m-4.464   [0m | [0m-3.544   [0m | [0m-2.3     [0

In [13]:
print(optimizer.max)      

{'target': -30.146467090950367, 'params': {'Noise_variance1': -3.9300461803486906, 'Noise_variance2': -5.784254301164108, 'Omega11': 0.7959812788158449, 'Omega12': 0.017226052352431243, 'Omega22': 0.3791034056678546, 'Sigma_u1': -2.1400443035563446, 'Sigma_u10': -4.823137247022981, 'Sigma_u11': -5.586161775534395, 'Sigma_u12': -5.422738399083808, 'Sigma_u13': -5.943630823230228, 'Sigma_u14': -3.136217202150422, 'Sigma_u15': -3.7420067190365036, 'Sigma_u16': -2.8216865952609913, 'Sigma_u17': -3.9716803068634543, 'Sigma_u18': -2.8327158419833847, 'Sigma_u19': -3.2169431429126423, 'Sigma_u2': -2.8886060925632986, 'Sigma_u20': -4.374068524091655, 'Sigma_u21': -3.4089174672500753, 'Sigma_u22': -5.280822809228102, 'Sigma_u3': -4.712720146448384, 'Sigma_u4': -5.30958151952083, 'Sigma_u5': -4.3654510999469505, 'Sigma_u6': -5.034325008137605, 'Sigma_u7': -4.372312023482568, 'Sigma_u8': -2.0991106302507516, 'Sigma_u9': -4.718722696711875}}


# Single Layer Neural Network

In [None]:
from sklearn.neural_network import MLPRegressor

num_hidd_units = 700

for rs in range(0, 1):
    clf = MLPRegressor(hidden_layer_sizes= (num_hidd_units, ), activation='logistic' ,random_state=rs,  solver='adam', max_iter=100000)
    clf.fit(X_train_org, Y_train_org)
    Y_pred_NN = clf.predict(X_valid_org)
    Y_pred_NN1 = Y_pred_NN.flatten()
    Y_valid_org1 = Y_valid_org.flatten()
    
    stats = get_stats(Y_pred_NN1, Y_valid_org1)
    print( -stats['msr'] )
    
    



In [None]:
math.pow(10, -0.7248)