## First, train GP based on the full dataset

In [20]:
import scipy.io as sio
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
import pickle
import os

## Step 0: Split the data into train/test

In [19]:
datapath_base = "/data/yutaro/IROS/"

In [2]:
datapath_big = "/data/yutaro/IROS/sim_data_full_v11_d4_m1.mat"
datapath_small = "/data/yutaro/IROS/sim_data_partial_v111_d4_m1.mat"

In [3]:
big = sio.loadmat(datapath_big)

In [4]:
big['D'].shape

(1616064, 10)

In [5]:
pd.DataFrame(big['D']).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.426909,118.443377,16.0,16.0,0.0,0.0,0.418931,118.442513,16.030001,16.030001
1,0.418931,118.442513,16.030001,16.030001,1.0,1.0,0.404663,118.443087,16.030001,16.030001
2,0.404663,118.443087,16.030001,16.030001,1.0,1.0,0.396586,118.443348,16.060001,16.060001
3,0.396586,118.443348,16.060001,16.060001,1.0,1.0,0.391399,118.442141,16.060001,16.060001
4,0.391399,118.442141,16.060001,16.060001,1.0,1.0,0.384889,118.43922,16.090002,16.090002


In [6]:
big['D'][:,6:].shape

(1616064, 4)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(big['D'][:,:6], big['D'][:,6:], test_size=0.1, random_state=42)

In [8]:
print(X_test.shape)

(161607, 6)


In [9]:
print(X_train.shape)

(1454457, 6)


## Step 1: Normalize the training data

In [10]:
def compute_normalization_parameters(data):
    """
    Compute normalization parameters (min, max per feature)
    :param data: matrix with data organized by rows [num_samples x num_features]
    :return: min and max per feautre as row matrices of dimension [1 x num_variables]
    """
    min_param = np.min(data, axis=0)
    max_param = np.max(data, axis=0)
    return np.expand_dims(min_param, 0), np.expand_dims(max_param, 0)

In [11]:
def normalize_data_per_row(data, min_param, max_param):
    """
    Normalize a given matrix of data (samples must be organized per row)
    :param data: input data
    :param min_param: min (for each feature) for normalization
    :param max_param: max (for each feature) for normalization
    :return: normalized data, (data - min_param) / max_param - min_param
    """
    # sanity checks!
    assert len(data.shape) == 2, "Expected the input data to be a 2D matrix"
    assert data.shape[1] == min_param.shape[1], "Data - min_param size mismatch ({} vs {})".format(data.shape[1], min_param.shape[1])
    assert data.shape[1] == max_param.shape[1], "Data - max_param size mismatch ({} vs {})".format(data.shape[1], max_param.shape[1])

    # TODO. Complete. Replace the line below with code to whitten the data.
    normalized_data = np.divide(data - min_param, max_param - min_param)

    return normalized_data

In [12]:
min_param, max_param = compute_normalization_parameters(X_train)

In [13]:
max_param.shape

(1, 6)

In [14]:
X_train_normalized = normalize_data_per_row(X_train, min_param, max_param)

In [31]:
X_test_normalized = normalize_data_per_row(X_test, min_param, max_param)

In [32]:
print(np.min(X_test_normalized, axis=1))
print(np.max(X_test_normalized, axis=1))

[0.27291292 0.21652865 0.         ... 0.26044959 0.         0.32468289]
[1.         1.         0.74025489 ... 1.         0.76048428 1.        ]


## Step 2: Implement the transition model 

- Given the query point $(x, a)$, we want to find where $(x,a)$ will go next.
- The function $(x_{next}, a_{next}) = f((x,a))$ should be implemented as follows:
  - Step1: Find K1 (=1000) nearest points in training data. Let the closest pair to $(x,a)$ be $(x_h, a_h)$
  - Step2: Diffusin map is created based on these 1000 points, which yields a reduced dimensional data (m=3 is used in the original paper)
  - Step3: K2 (=100) closest points in the reduced dimensional data to the $(x_h, a_h)$ in the diffusion map is found
  - Step4: Perform GP regression on these 100 points.

### Step2.0: Set parameters

In [15]:
m = 3
k1 = 1000
k2 = 100

### Step 2.1: Nearest Neighbours

### Memo:

```
%%time
nbrs = NearestNeighbors(n_neighbors=1000, algorithm='ball_tree').fit(big['D'])
print(nbrs)

NearestNeighbors(algorithm='ball_tree', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=1, n_neighbors=1000, p=2, radius=1.0)
CPU times: user 7.56 s, sys: 151 ms, total: 7.71 s
Wall time: 7.68 s

NearestNeighbors(algorithm='ball_tree', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=1, n_neighbors=1000, p=2, radius=1.0)
CPU times: user 14min 53s, sys: 2.17 s, total: 14min 55s
Wall time: 14min 53s
```

In [17]:
X_train_normalized.shape

(1454457, 6)

In [16]:
%%time
nbrs = NearestNeighbors(n_neighbors=k1, algorithm='ball_tree').fit(X_train_normalized)
print(nbrs)

NearestNeighbors(algorithm='ball_tree', leaf_size=30, metric='minkowski',
         metric_params=None, n_jobs=1, n_neighbors=1000, p=2, radius=1.0)
CPU times: user 14min 53s, sys: 2.17 s, total: 14min 55s
Wall time: 14min 53s


In [22]:
with open('nbrs.pkl', 'wb') as f:
    pickle.dump(nbrs, f)

In [26]:
X_train_normalized.shape

(1454457, 6)

In [27]:
X_test.shape

(161607, 6)

In [28]:
distances, indices = nbrs.kneighbors(X_test[0,:].reshape(1,-1))

### Step2.2: Create Diffusion map

In [None]:
embedding = SpectralEmbedding(n_components=m) 
X_transformed = embedding.fit_transform(X[:100])