There are 20 trials: X1, X2, ..., X20
keep the depth as 9

Each trial has sequence of points: X1 = {x11, x12, x13, ..., x15}

Algorithm:
    1. upload X1 with given depth for 9 different objects
    2. optimize hyperparameters for all of the models
    3. predict depth+1, record probability, get label
    4. re-upload X1 with new, remove anything far before depth+1

In [1]:
%matplotlib inline
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics.pairwise import pairwise_kernels
from scipy.optimize import fmin_l_bfgs_b
from scipy.linalg import cholesky, cho_solve, solve_triangular
import pickle
import pandas as pd
from scipy.spatial.distance import pdist, squareform

### Upload data

In [2]:
data = pickle.load(open('../dataset.pkl', 'rb'))
df = pd.read_csv('../df.csv')

In [3]:
df.head()

Unnamed: 0,thumb_1,thumb_2,thumb_3,thumb_4,thumb_5,thumb_6,thumb_7,thumb_8,thumb_9,thumb_10,...,little_5,little_6,little_7,little_8,little_9,little_10,little_11,little_12,obj_name,trial
0,0.05098,0.05098,0.054902,0.047059,0.047059,0.047059,0.05098,0.047059,0.047059,0.05098,...,0.027451,0.031373,0.027451,0.031373,0.035294,0.031373,0.027451,0.039216,blue_bear,1
1,0.05098,0.05098,0.054902,0.047059,0.047059,0.05098,0.05098,0.05098,0.047059,0.047059,...,0.031373,0.027451,0.027451,0.027451,0.039216,0.027451,0.031373,0.043137,blue_bear,1
2,0.05098,0.054902,0.054902,0.05098,0.05098,0.047059,0.047059,0.05098,0.047059,0.05098,...,0.027451,0.031373,0.027451,0.027451,0.031373,0.035294,0.027451,0.039216,blue_bear,1
3,0.05098,0.054902,0.054902,0.047059,0.05098,0.05098,0.05098,0.05098,0.047059,0.05098,...,0.035294,0.031373,0.027451,0.031373,0.031373,0.035294,0.027451,0.043137,blue_bear,1
4,0.05098,0.054902,0.054902,0.05098,0.05098,0.05098,0.047059,0.05098,0.047059,0.047059,...,0.035294,0.031373,0.027451,0.027451,0.035294,0.027451,0.031373,0.039216,blue_bear,1


In [4]:
obj_names = list(df['obj_name'].unique())

In [5]:
obj_to_id = {
    'blue_bear':1,
    'med_coke':2,
    'book':3,
    'empty_coke':4,
    'lotion':5,
    'empty_vitamin_water':6,
    'med_vitamin_water':7,
    'full_vitamin_water':8,
    'monkey_toy':9
}

In [6]:
obj_names

['blue_bear',
 'med_coke',
 'book',
 'empty_coke',
 'lotion',
 'empty_vitamin_water',
 'med_vitamin_water',
 'full_vitamin_water',
 'monkey_toy']

### Kernels

In [7]:
def squared_exp_kernel(X, theta_):
    pairwise_dists = squareform(pdist(X, 'euclidean'))**2
    K = np.exp(-pairwise_dists/ (2 * theta_[0] ** 2))
    K_gradient = np.multiply(pairwise_dists/(theta_[0]**3), K)
    return K, np.expand_dims(K_gradient, axis=2)

def squared_exp_kernel_func(a, b, theta_):
    # theta includes number of parameters
    squared_dist = np.linalg.norm(a-b)**2
    k = np.exp(-squared_dist/(2*theta_[0]**2))
    return k

### Optimization functions

In [8]:
OPTIMIZATION_ITERATIONS = 10

In [143]:
class GP:
    def __init__(self, sigma_n_square, bounds):
        self.sigma_n_square = sigma_n_square
        self.bounds = bounds
    
    def set_data(self, X_, y_):
        self.X = X_.copy()
        self.y = y_.copy()
        
        
    def log_marginal_likelihood(self, theta_, *args):    
        #K, K_gradient = squared_exp_kernel(X, theta_=theta_)
        K, K_gradient = squared_exp_kernel(self.X, theta_=theta_)


        # do Cholesky decomposition
        L = cholesky(K + self.sigma_n_square*np.eye(len(self.X)), lower=True)
        #print(L)
        # solve for alpha
        alpha = cho_solve(  (L, True), self.y )
        #print(K_gradient.shape)

        tmp = np.einsum("ik,jk->ijk", alpha, alpha)  # k: output-dimension
        tmp -= cho_solve((L, True), np.eye(K.shape[0]))[:, :, np.newaxis]
        log_likelihood_gradient_dims = 0.5 * np.einsum("ijl,ijk->kl", tmp, K_gradient)
        log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)

        return -( -0.5*np.dot(self.y.T, alpha) - np.sum(np.log(L.diagonal())) - self.X.shape[0]/2*np.log(2*np.pi) ), -log_likelihood_gradient

        #log_likelihood_gradients = []
        #for dim in range(X.shape[1]):
        #    log_likelihood_gradient_dims = 0.5 * np.einsum("ijl,ijk->kl", tmp, K_gradient[dim])
        #    log_likelihood_gradient = log_likelihood_gradient_dims.sum(-1)
        #    log_likelihood_gradients.append(-log_likelihood_gradient)

        #return -( -0.5*np.dot(y.T, alpha) - np.sum(np.log(L.diagonal())) - X.shape[0]/2*np.log(2*np.pi) ), np.array(log_likelihood_gradients)
        
    def optimize_kernel_parameters(self, x0):
        theta_estimate, min_log_marginal_likelihood, info = fmin_l_bfgs_b(func=self.log_marginal_likelihood, x0=x0, args=(), bounds=bounds)
        # print('Optimization succeded with log marginal likelihood ', min_log_marginal_likelihood[0][0], ' in ', info['funcalls'],' iterations.')
        return  theta_estimate, min_log_marginal_likelihood, info
    
    def optimize_kernels(self, x0):
        log_likelihood_list = []
        theta_est_list  = []
        for i in range(OPTIMIZATION_ITERATIONS):
            #bounds = np.array(bounds)
            theta_initial = np.random.uniform(self.bounds[:, 0], self.bounds[:, 1])
            theta_estimate, min_log_marginal_likelihood, info = self.optimize_kernel_parameters(x0)
            log_likelihood_list.append(min_log_marginal_likelihood[0][0])
            theta_est_list.append(theta_estimate)
        return theta_estimate[np.argmin(log_likelihood_list)]
    
        # make prediction
    def predict(self, x_star, theta_):

        # create covariance matrix using kernel
        #K = pairwise_kernels(X, metric=kernel_func,  theta_=theta)
        K, _ = squared_exp_kernel(self.X, theta_=theta_)
        #print(K)

        # do Cholesky decomposition
        L = cholesky(K + self.sigma_n_square*np.eye(self.X.shape[0]), lower=True)

        # solve for alpha
        alpha = cho_solve(    (L, True),    self.y   )

        L_inv = solve_triangular(L.T, np.eye(L.shape[0]))
        K_inv = L_inv.dot(L_inv.T)

        k_star = pairwise_kernels(x_star, self.X, metric=squared_exp_kernel_func,  theta_= theta_)
        y_mean = k_star.dot(alpha) + np.mean(self.y)
        y_var = np.ones(x_star.shape[0]) - np.einsum("ij,ij->i", np.dot(k_star, K_inv), k_star)
        y_var[y_var < 0] = 0

        # log marginal likelihood
        # log_marg_likelihood = -0.5*np.dot(self.y.T, alpha) - np.sum(np.log(L.diagonal())) - self.X.shape[0]/2*np.log(2*np.pi)

        #print(-0.5*np.dot(y.T, alpha) - np.sum(np.log(L.diagonal())) - X.shape[0]/2*np.log(2*np.pi))
        return y_mean, np.sqrt(y_var) #, log_marg_likelihood
    
    def pdf(self, x_, mu_, sigma_):
        if sigma_[0] == 0:
            return  np.array([[0]])
        return  np.exp( - (x_ - mu_)**2 / (2 * sigma_**2) )* 1/ ( np.sqrt(2*np.pi) * sigma_)
    
    def predict_all(self, x_new, models_):
        list_of_pred = []
        #list_of_sigma = []
        for obj in models_:
            #print(obj,  models_[obj])
            y_pred, y_sigma = self.predict(theta_= models_[obj], x_star=X_temp)
            prob  = self.pdf(x_= y_pred, mu_= 1, sigma_=y_sigma)
            list_of_pred.append(prob[0])
        print(list_of_pred)
            #list_of_sigma.append(prob[0])
        return np.argmax(np.array(list_of_pred)) + 1

### Global parameters

In [168]:
LOCAL_BOUND = (1e-2,1e2)
SIGMA_N_SQUARE = 1e-10
DEPTH = 4
NUMBER_OF_HYPERPARAMETERS = 1
OPTIMIZATION_ITERATIONS = 10

### Framework

In [169]:
# initiliaze boundaries: # boundries = # type
bounds = []
for i in range(NUMBER_OF_HYPERPARAMETERS):
    bounds.append(LOCAL_BOUND)
x0 = np.ones(len(bounds))
bounds = np.array(bounds)

In [170]:
# initilize the models: assign initial theta values
models = {}
for obj in obj_names:
    models[obj] = x0

In [171]:
my_gp = GP(sigma_n_square=SIGMA_N_SQUARE, bounds=bounds)

# initilize the first training set
trial = 1
X = np.zeros([1,60])
y = np.ones([1,1])
for obj in obj_names:
    X_temp = data[obj][trial][0:DEPTH]
    y_temp = np.ones(X_temp.shape[0])*obj_to_id[obj]
    y_temp = y_temp.reshape(-1,1)
    X = np.vstack([X, X_temp])
    y = np.vstack([y, y_temp])
    
X = X[1:]
y = y[1:]
print(X.shape)

# train 9 models
for obj in obj_names:
    print(obj)
    y_train = y.copy()
    y_train[y == obj_to_id[obj]] = 1
    y_train[y != obj_to_id[obj]] = -1
    my_gp.set_data(X_= X, y_ = y_train)
    #theta_estimate, _, info = my_gp.optimize_kernel_parameters(models[obj])
    theta_estimate = my_gp.optimize_kernels(models[obj])
    models[obj] = np.array([theta_estimate])

(36, 60)
blue_bear
med_coke
book
empty_coke
lotion
empty_vitamin_water
med_vitamin_water
full_vitamin_water
monkey_toy


In [172]:
models

{'blue_bear': array([45.3350202]),
 'med_coke': array([4.43186673]),
 'book': array([68.34528685]),
 'empty_coke': array([1.]),
 'lotion': array([1.86009582]),
 'empty_vitamin_water': array([7.375]),
 'med_vitamin_water': array([35.4600811]),
 'full_vitamin_water': array([40.75000005]),
 'monkey_toy': array([1.])}

In [173]:
y_pred, y_sigma = my_gp.predict(theta_= models[obj], x_star=np.array([X[14, :]]))

In [174]:
y_pred, y_sigma

(array([[-1.55556022]]), array([0.]))

In [175]:
my_gp.pdf(x_= y_pred, mu_= 1, sigma_=y_sigma)

array([[0]])

In [176]:
y_pred = my_gp.predict_all(x_new=X[14], models_=models)

[array([ 0.,  0., nan, nan]), array([ 0.,  0., nan,  0.]), array([0]), array([ 0., nan, nan,  0.]), array([0]), array([0., 0., 0., 0.]), array([0]), array([0]), array([ 0., nan, nan,  0.])]


  return getattr(obj, method)(*args, **kwds)


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [156]:
obj = 'med_coke'
for i in range(DEPTH, 4):
    X_temp = data[obj][trial][i:i+1]
    if X_temp.shape[0] == 0:
        break
    #y_temp = np.array([1])*obj_to_id[obj]
    #y_temp = y_temp.reshape(-1,1)
    
    # do prediction
    #y_pred, y_sigma = my_gp.predict(theta_= models[obj], x_star=X_temp)
    #print(y_pred, y_sigma)
    
    y_pred = my_gp.predict_all(x_new=X_temp, models_=models)
    print(y_pred)
    #X = np.vstack([X, X_temp])
    #y = np.vstack([y, y_temp])
    
    #X = X[1:]
    #y = y[1:]
    

[array([0.03390088]), array([0.]), array([0.]), array([0]), array([0.]), array([0.]), array([0.]), array([0.]), array([0.])]
1


In [152]:
my_gp.pdf(x_= y_pred, mu_= 1, sigma_=y_sigma)

array([0.42810323])

In [153]:
models

{'blue_bear': array([0.01]),
 'med_coke': array([0.50500718]),
 'book': array([1.04760671]),
 'empty_coke': array([100.]),
 'lotion': array([1.00000001]),
 'empty_vitamin_water': array([1.]),
 'med_vitamin_water': array([0.66248095]),
 'full_vitamin_water': array([1.]),
 'monkey_toy': array([1.22691006])}

In [135]:
for trial in range(1, 21):
    for i in range(20):
        for obj in obj_names:
            mask = (df.obj_name == obj) & (df.trial == trial)
            X = 
        

SyntaxError: invalid syntax (<ipython-input-135-3391f878e900>, line 5)