In [1]:
import os

#현재 폴더 경로; 작업 폴더 기준
print(os.getcwd())
print(os.listdir(os.getcwd()))
# os.chdir("Desktop/iMac_Drive/코스웍/2022 고급기계학습주제 (김광인)/과제/CW2") # Change path

/Users/ming/Desktop/iMac_Drive/코스웍/2022 고급기계학습주제 (김광인)/과제/CW2
['.DS_Store', 'Untitled.ipynb', 'CW2.pdf', 'model', '.ipynb_checkpoints', 'CW2_GP.ipynb', 'data']


In [12]:
from model.gaussian_process import MOGPR, SR_MOGPR
from model.kernel import LinearKernel, GaussianKernel

In [13]:
import pandas as pd
import numpy as np
import time
from scipy.linalg import kron
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [47]:
# Fix seed
seed = 1
np.random.seed(seed)

## Data: SARCOS
SARCOS is a Robot arm dataset. It aims to predict 7 Torque columns given 7 Joint positions, 7 Velocities, 7 Accelerations.

In [86]:
train_data = pd.read_csv('./data/SARCOSTst.csv', header=None)
test_data = pd.read_csv('./data/SARCOSTrn.csv', header=None)

X_train, y_train = train_data.iloc[:,:-7], train_data.iloc[:,21:]
# X_train, X_valid, y_train, y_valid = train_test_split(train_data.iloc[:,:-7], train_data.iloc[:,21:], test_size=0.1, random_state=seed)
X_test, y_test = test_data.iloc[:,:-7], test_data.iloc[:,21:]

print("SARCOS data size")
print("Train", X_train.shape, y_train.shape)
# print("Valid", X_valid.shape, y_valid.shape)
print("Test", X_test.shape, y_test.shape)

SARCOS data size
Train (39484, 21) (39484, 7)
Test (5000, 21) (5000, 7)


In [43]:
seeds = [2, 3, 4]
for seed in seeds:
    np.random.seed(seed)
    sparse_model = SR_MOGPR(pX_train, py_train, GaussianKernel())

    start = time.time()

    # My Code
    sparse_model.predict(pX_test)

    end = time.time()
    print("Run time [s]: ",end-start)
    print("MSE: ", mean_squared_error(py_test, sparse_model._predictive_distribution["mean"]))

Run time [s]:  0.39763307571411133
MSE:  388.1928019917617
Run time [s]:  0.3868529796600342
MSE:  388.3816701014443
Run time [s]:  0.3899240493774414
MSE:  389.7590176387092


## Kernel
We use below linear and non-linear kernels:
- Linear inner product kernel
$$ k(x_1,x_2) = \sigma_0 + x_1 \cdot x_2 $$
- Non-linear Gaussian kernel
$$ k(x_1,x_2) = \exp(-{1 \over \sigma_k^2}\left|x_1-x_2\right|^2)$$

# 1. Comparison of Exact MOGPR and Sparse MOGPR
Here, we would see the time complexity of two algorithms. Obviously, __Sparse MOGPR is better.__<br>
Both use __partial datasets__ due to time and memory limitations<br>
- Linear kernel $\sigma_0 = 1.0$
- Gaussian kernel $\sigma_k = 1.0$
- noise $\sigma_k = 1.0$

In [16]:
pX_train = X_train[:2000]
py_train = y_train[:2000]
pX_test = X_test[:500]
py_test = y_test[:500]

print("partial SARCOS data size")
print("Train", pX_train.shape, py_train.shape)
print("Test", pX_test.shape, py_test.shape)

partial SARCOS data size
Train (2000, 21) (2000, 7)
Test (500, 21) (500, 7)


### Gaussian kernel

In [17]:
# Exact MOGPR
full_model = MOGPR(pX_train, py_train, GaussianKernel())

start = time.time()

# My Code
full_model.predict(pX_test)

end = time.time()
print("Run time [s]: ",end-start)
print("MSE: ", mean_squared_error(py_test, full_model._predictive_distribution["mean"]))

Run time [s]:  42.407217025756836
MSE:  391.52417473079106


In [18]:
# Sparse MOGPR
sparse_model = SR_MOGPR(pX_train, py_train, GaussianKernel())

start = time.time()

# My Code
sparse_model.predict(pX_test)

end = time.time()
print("Run time [s]: ",end-start)
print("MSE: ", mean_squared_error(py_test, sparse_model._predictive_distribution["mean"]))

Run time [s]:  0.3959929943084717
MSE:  390.1942064591819


### Linear kernel

In [19]:
# Exact MOGPR
full_model = MOGPR(pX_train, py_train, LinearKernel())

start = time.time()

# My Code
full_model.predict(pX_test)

end = time.time()
print("Run time [s]: ",end-start)
print("MSE: ", mean_squared_error(py_test, full_model._predictive_distribution["mean"]))

Run time [s]:  35.73332405090332
MSE:  700.6701421631735


In [20]:
# Sparse MOGPR
sparse_model = SR_MOGPR(pX_train, py_train, LinearKernel())

start = time.time()

# My Code
sparse_model.predict(pX_test)

end = time.time()
print("Run time [s]: ",end-start)
print("MSE: ", mean_squared_error(py_test, sparse_model._predictive_distribution["mean"]))

Run time [s]:  0.2842860221862793
MSE:  233617.3517069902


# 2. Hyperparameter Optimisation via Grid Search
Different seeds -> different inducing inputs.

In [76]:
from sklearn.model_selection import ParameterGrid
from collections import defaultdict
import json

### 1. Gaussian kernel

In [72]:
param_grid = {
    'sigma': [0.01], # Noise
    'sigma_k': [0.01, 0.1, 1.0, 10, 100],
}

grid = list(ParameterGrid(param_grid))
print("the number of experiments: ", len(grid))

the number of experiments:  5


In [84]:
grid

[{'sigma': 0.01, 'sigma_k': 0.01},
 {'sigma': 0.01, 'sigma_k': 0.1},
 {'sigma': 0.01, 'sigma_k': 1.0},
 {'sigma': 0.01, 'sigma_k': 10},
 {'sigma': 0.01, 'sigma_k': 100}]

In [None]:
runtime = defaultdict(list)
pred_dist = defaultdict(list)
mse = defaultdict(list)

for i, args in enumerate(grid):
    sigma_noise = args["sigma"]
    sigma_gaussian = args["sigma_k"]
    
    model = SR_MOGPR(X_train, y_train, GaussianKernel(sigma_k=sigma_gaussian), sigma=sigma_noise)
    
    # check time
    key = str(args)
    start = time.time()
    
    print(f'{i}th running..., ', end="")
    model.predict(X_test)
    
    end1 = time.time()
    runtime[key] = round(end1-start,3)
    print('time: ', round(end1-start,3), ", ", end="")
    
    # Save predictive distribution
    pred_dist[key] = model._predictive_distribution
    
    mse[key] = mean_squared_error(y_test, model._predictive_distribution["mean"])
    print('MSE: ', mse[key])
    
    with open("SR_MOGPR_runtime.json", "w") as json_file:
        json.dump(runtime, json_file)
    with open("SR_MOGPR_predictive_distribution.json", "w") as json_file:
        json.dump(pred_dict, json_file)
    with open("SR_MOGPR_mse.json", "w") as json_file:
        json.dump(mse, json_file)

0th running..., 

In [74]:
runtime

defaultdict(list,
            {"{'sigma': 0.01, 'sigma_k': 0.01}": 0.0,
             "{'sigma': 0.01, 'sigma_k': 0.1}": 0.0,
             "{'sigma': 0.01, 'sigma_k': 1.0}": 0.0,
             "{'sigma': 0.01, 'sigma_k': 10}": 0.0,
             "{'sigma': 0.01, 'sigma_k': 100}": 0.0})

### 2. Linear kernel

In [46]:
param_grid = {
    'sigma': [0.01, 0.1, 10], # Noise
}

grid = list(ParameterGrid(param_grid))
print("the number of experiments: ", len(grid))

the number of experiments:  3


In [37]:
start = time.time()

model = SR_MOGPR(X_train, y_train, GaussianKernel(sigma_k=0.5), sigma=0.5)
model.predict(X_test)

end = time.time()
print("Run time [s]: ",end-start)
print("MSE: ", mean_squared_error(y_test, model._predictive_distribution["mean"]))

Run time [s]:  766.880294084549
MSE:  387.5813290671355


In [24]:
param_grid = {
    'sigma': [0.1, 0.2, 0.5, 1.0, 1.2, 10], # Noise
    'sigma_k': [0.1, 0.2, 0.5, 1.0, 1.2, 10],
}

grid = list(ParameterGrid(param_grid))

In [31]:
print("the number of experiments: ", len(grid))
grid

the number of experiments:  36


[{'sigma': 0.1, 'sigma_k': 0.1},
 {'sigma': 0.1, 'sigma_k': 0.2},
 {'sigma': 0.1, 'sigma_k': 0.5},
 {'sigma': 0.1, 'sigma_k': 1.0},
 {'sigma': 0.1, 'sigma_k': 1.2},
 {'sigma': 0.1, 'sigma_k': 10},
 {'sigma': 0.2, 'sigma_k': 0.1},
 {'sigma': 0.2, 'sigma_k': 0.2},
 {'sigma': 0.2, 'sigma_k': 0.5},
 {'sigma': 0.2, 'sigma_k': 1.0},
 {'sigma': 0.2, 'sigma_k': 1.2},
 {'sigma': 0.2, 'sigma_k': 10},
 {'sigma': 0.5, 'sigma_k': 0.1},
 {'sigma': 0.5, 'sigma_k': 0.2},
 {'sigma': 0.5, 'sigma_k': 0.5},
 {'sigma': 0.5, 'sigma_k': 1.0},
 {'sigma': 0.5, 'sigma_k': 1.2},
 {'sigma': 0.5, 'sigma_k': 10},
 {'sigma': 1.0, 'sigma_k': 0.1},
 {'sigma': 1.0, 'sigma_k': 0.2},
 {'sigma': 1.0, 'sigma_k': 0.5},
 {'sigma': 1.0, 'sigma_k': 1.0},
 {'sigma': 1.0, 'sigma_k': 1.2},
 {'sigma': 1.0, 'sigma_k': 10},
 {'sigma': 1.2, 'sigma_k': 0.1},
 {'sigma': 1.2, 'sigma_k': 0.2},
 {'sigma': 1.2, 'sigma_k': 0.5},
 {'sigma': 1.2, 'sigma_k': 1.0},
 {'sigma': 1.2, 'sigma_k': 1.2},
 {'sigma': 1.2, 'sigma_k': 10},
 {'sigma': 10, 

In [33]:
train_time = []

for i, args in enumerate(grid):
    sigma_noise = args["sigma"]
    sigma_gaussian = args["sigma_k"]
    
    model = SR_MOGPR(X_train, y_train, GaussianKernel(sigma_k=sigma_gaussian), sigma=sigma_noise)
    
    # check time
    start = time.time()
    
    print(f'{i}th running...', end='')
    model.predict(X_train, y_train)
    
    end1 = time.time()
    train_time.append(end1-start)
    print( 'time: ', round(end1-start,1), end='')

#     y_pred = clf.predict(test_X10)
#     ac = accuracy_score(test_y10, y_pred)
#     print(' acc: ', ac)

#     end2 = time.time()
#     inf_time.append(end2-end1)
#     acc.append(ac)

# Rounding
# tt = [round(t,1) for t in train_time]
# it = [round(t,4) for t in inf_time]
# ac = [round(a,3) for a in acc]


0.1
0.1
0.1
0.1
0.1
0.1
0.2
0.2
0.2
0.2
0.2
0.2
0.5
0.5
0.5
0.5
0.5
0.5
1.0
1.0
1.0
1.0
1.0
1.0
1.2
1.2
1.2
1.2
1.2
1.2
10
10
10
10
10
10
