In [1]:
import sys
import os
import random
import logging
import time
from tqdm.notebook import tqdm
import pandas as pd

sys.path.append('../')

import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
import time
from ocp import *
from costs import *
from ocp_utils import *
from env_creator import EnvCreator, generate_sdf_rep
from tensor_decomp import apply_tt
from visualization_utils import plot_traj_projections, plot_traj_and_obs_3d

import pybullet as p
import pybullet_data
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from regression import NN_Regressor, GPy_Regressor
from regression import rbf
import numpy.matlib

%load_ext autoreload
%autoreload 2
np.set_printoptions(precision=4, suppress=True)

#### Load data

In [38]:
# exp_name = 'var_start_goal_3000samples'
# exp_name = 'start_goal_fixed_300samples_1'
# exp_name = 'mult_obs_var_start_goal_1000samples_1'
# exp_name = 'mult_obs_var_start_goal_1000samples_teguh'
exp_name = 'var_start_goal_1000samples_no_waypoint'
exp_name = 'mult_obs_var_start_goal_1000samples_teguh_noviapoint'

In [None]:
data = np.load('training_data/data_'+ exp_name +'.npy', allow_pickle=True)[()]

In [4]:
x_inputs = data['x_inputs']
x_outputs = data['x_outputs']
obs_set = data['obstacles']
pca = data['pca']

#### Visualize data in pybullet

In [5]:
p.connect(p.DIRECT)
p.setAdditionalSearchPath(pybullet_data.getDataPath())

In [6]:
idx = np.random.randint(len(data['x_inputs']))
x = x_inputs[idx]
y = x_outputs[idx].reshape(-1,3)
obstacles = obs_set[idx]
x0, x_target = x[-6:-3],x[-3:]
obj_id, init_id, target_id, border_id, obstacle_ids = init_pybullet(x0,x_target, obstacles)

## Try prediction

#### Load MDN model

In [7]:
save_path = os.path.join('/home/teguh/git/rli/iterative_lqr/notebooks/' + 'data/models/mdn_'+exp_name+'.h5')  

In [8]:
import pickle as pkl
import tensorflow_probability as tfp
tf.keras.backend.set_floatx('float64')
tfd = tfp.distributions
tfpl = tfp.layers

#### Setup the model (copy from the learning notebook)

n_comps = 10
D_out = 15
D_in = 606
n_comp_params_size = tfpl.IndependentNormal.params_size(event_shape=(D_out,))

params_size = tfpl.MixtureSameFamily.params_size(num_components=n_comps, component_params_size=n_comp_params_size)

mdn = Sequential([
    Dense(256, activation='relu', input_shape=(D_in,), kernel_regularizer = tf.keras.regularizers.l2(1e-2)),
    Dense(256, activation='relu', kernel_regularizer = tf.keras.regularizers.l2(1e-2)),
    Dense(params_size),
    tfpl.MixtureSameFamily(n_comps, tfpl.IndependentNormal(event_shape=(D_out,)))
])


#### Load the weights

mdn.load_weights(save_path)

#### Load NN

In [9]:
save_path_nn = os.path.join('/home/teguh/git/rli/iterative_lqr/notebooks/' + 'data/models/nn_'+exp_name+'.h5')  

In [10]:
nn = tf.keras.models.load_model(save_path_nn)

In [11]:
nn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               155392    
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_2 (Dense)              (None, 15)                3855      
Total params: 225,039
Trainable params: 225,039
Non-trainable params: 0
_________________________________________________________________


#### Try prediction

In [27]:
model = mdn
mode = 'mdn'
n_sample = 10

#### Adapt the prediction using lqt

In [42]:
T = 100
lin_sys = create_double_integrator()
xs_init, us_init = create_lqt_init(lin_sys, y_traj, x0, x_target, T)

In [45]:
plot_traj(xs_init[:,:3], obj_id)

#### Create DDP problem

In [46]:
T = 100
n_iter = 10

x0 = np.concatenate([x0, np.zeros(3)])
x_target = np.concatenate([x_target, np.zeros(3)])

# Setup and solve problem 
prob, lin_sys = setup_ilqr(T, x0, x_target, obstacles)

#### Standard init

In [47]:
from ocp_utils import *

In [48]:
# xs_init, us_init = create_standard_init(lin_sys, x0, T)
xs_init, us_init = create_linear_init(x0, x_target, T)

#init using waypoint:
# x_waypoint = np.array([0,0,-1,0,0,0])
# xs_init, us_init = create_waypoint_init(x0, x_waypoint, x_target,  T)

In [49]:
plot_traj(xs_init, obj_id)

#### Solve DDP

In [50]:
xs, us, ddp = solve_ilqr(prob, xs_init, us_init, iterations=n_iter, th_grad = 1e-6, th_stop = 1e-6)
cost_std = ddp.cost
feas_std = ddp.isFeasible

print(cost_std, feas_std)

36.839194222144215 True


#### NN init

In [51]:
# xs_init, us_init = create_pred_init(y_traj, x0)
xs_init, us_init = create_lqt_init(lin_sys, y_traj, x0, x_target, T)

#### Solve DDP

In [52]:
xs, us, ddp = solve_ilqr(prob, xs_init, us_init, iterations=n_iter, th_grad = 1e-6, th_stop = 1e-6)
cost_nn = ddp.cost
feas_nn = ddp.isFeasible

print('Cost std:{}, cost nn: {}'.format(cost_std, cost_nn))

print('Feasible std:{}, Feasible nn: {}'.format(feas_std, feas_nn))

Cost std:36.839194222144215, cost nn: 1.4237797361718778
Feasible std:True, Feasible nn: True


In [20]:
plot_traj(xs_init, obj_id, dt=0.01)

#### Compare Initialization vs DDP solution

#### Divide data to train and test data

In [12]:
n_samples = len(x_inputs)
indices = np.arange(n_samples)
x_train, x_test, y_train, y_test, train_idx, test_idx = train_test_split(x_inputs, x_outputs, indices, random_state=3, test_size=0.3)

## Compare warmstarting performance in batches

In [16]:
T = 100
n_iter = 10 #number of ddp iterations
model = mdn
mode = 'mdn'
n_sample = 10
th_grad = 1e-6
th_stop = 1e-6

In [33]:
for n_iter in [50]:#, 10, 50]: 
    print('-------------------\n%d'%n_iter)
    cost_nn_set, cost_std_set, cost_mdn_set = [], [], []
    feas_nn_set, feas_std_set, feas_mdn_set = [], [], []
    iter_nn_set, iter_std_set = [], []
    
    for idx in range(len(x_test[:100])): #20,30):#
        #pick one test case
        x = x_test[idx]
        y = y_test[idx].reshape(-1,3)
        full_idx = test_idx[idx]
        obstacles = obs_set[full_idx]
        x0, x_target = x[-6:-3],x[-3:]
#         obj_id, init_id, target_id, border_id, obstacle_ids = init_pybullet(x0,x_target, obstacles)



        #### Create DDP problem
        x0 = np.concatenate([x0, np.zeros(3)])
        x_target = np.concatenate([x_target, np.zeros(3)])
        # Setup problem 
        prob, lin_sys = setup_ilqr(T, x0, x_target, obstacles)

#         #### Standard init
#         xs_init, us_init = create_standard_init(lin_sys, x0, T)
#         xs, us, ddp = solve_ilqr(prob, xs_init, us_init, iterations=n_iter, th_grad = th_grad, th_stop = th_stop)
#         cost_std = ddp.cost
#         feas_std = ddp.isFeasible
#         xs_init_std = xs_init.copy()
        
        #### Linear init
        xs_init, us_init = create_linear_init(x0, x_target, T)
        xs, us, ddp = solve_ilqr(prob, xs_init, us_init, iterations=n_iter, th_grad = th_grad, th_stop = th_stop)
        cost_std = ddp.cost
        feas_std = ddp.isFeasible
        iter_std = ddp.iter
        xs_init_std = xs_init.copy()

        #### NN init
        y_pred = nn.predict(x[None,:])
        y_traj = pca.inverse_transform(y_pred).reshape(-1,3)
        xs_init, us_init = create_lqt_init(lin_sys, y_traj, x0, x_target, T)
        xs, us, ddp = solve_ilqr(prob, xs_init, us_init, iterations=n_iter, th_grad = th_grad, th_stop = th_stop)
        cost_nn = ddp.cost
        feas_nn = ddp.isFeasible
        iter_nn = ddp.iter
        xs_init_nn = xs_init.copy()
        
        #### MDN init
        y_pred = mdn(x[None,:]).sample(n_sample)
        y_traj = pca.inverse_transform(y_pred).reshape(n_sample, -1, 3)
        y_traj = get_best_mdn_prediction(y_traj, x, obstacles)        
        xs_init, us_init = create_lqt_init(lin_sys, y_traj, x0, x_target, T)
        xs, us, ddp = solve_ilqr(prob, xs_init, us_init, iterations=n_iter, th_grad = th_grad, th_stop = th_stop)
        cost_mdn = ddp.cost
        feas_mdn = ddp.isFeasible
        xs_init_mdn = xs_init.copy()

        cost_nn_set.append(cost_nn)
        cost_mdn_set.append(cost_mdn)
        cost_std_set.append(cost_std) 
        
        feas_nn_set.append(feas_nn)
        feas_mdn_set.append(feas_mdn)
        feas_std_set.append(feas_std)
#         iter_nn_set.append(iter_nn)
#         iter_std_set.append(iter_std)
        print('Feasible STD: {}, feasible NN: {}, feasible MDN: {}'.format(np.sum(feas_std_set), np.sum(feas_nn_set), np.sum(feas_mdn_set)))
        print('Cost STD: {:.3f}, Cost NN: {:.3f}, Cost MDN: {:.3f}'.format(np.mean(cost_std_set), np.mean(cost_nn_set), np.mean(cost_mdn_set)))    #         print('Iter STD: {}, iter NN: {}'.format(np.mean(iter_std_set), np.mean(iter_nn_set)))

-------------------
50
Feasible STD: 1, feasible NN: 1, feasible MDN: 1
Cost STD: 0.732, Cost NN: 0.728, Cost MDN: 0.737
Feasible STD: 2, feasible NN: 2, feasible MDN: 2
Cost STD: 0.814, Cost NN: 0.815, Cost MDN: 0.825
Feasible STD: 3, feasible NN: 3, feasible MDN: 3
Cost STD: 1.027, Cost NN: 1.067, Cost MDN: 1.027
Feasible STD: 4, feasible NN: 4, feasible MDN: 4
Cost STD: 1.001, Cost NN: 1.030, Cost MDN: 0.997
Feasible STD: 5, feasible NN: 5, feasible MDN: 5
Cost STD: 0.959, Cost NN: 0.982, Cost MDN: 0.956
Feasible STD: 6, feasible NN: 6, feasible MDN: 6
Cost STD: 0.982, Cost NN: 1.001, Cost MDN: 0.997
Feasible STD: 7, feasible NN: 7, feasible MDN: 7
Cost STD: 0.946, Cost NN: 0.959, Cost MDN: 0.956
Feasible STD: 8, feasible NN: 8, feasible MDN: 8
Cost STD: 0.953, Cost NN: 0.965, Cost MDN: 0.962
Feasible STD: 9, feasible NN: 9, feasible MDN: 9
Cost STD: 1.012, Cost NN: 1.023, Cost MDN: 1.028
Feasible STD: 10, feasible NN: 10, feasible MDN: 10
Cost STD: 0.973, Cost NN: 0.982, Cost MDN: 

KeyboardInterrupt: 

In [40]:
cost_std_set = np.array(cost_std_set)[:100]
cost_nn_set = np.array(cost_nn_set)[:100]
cost_mdn_set = np.array(cost_mdn_set)[:100]

In [41]:
print('Feasible STD: {}, feasible NN: {}, feasible MDN: {}'.format(np.sum(feas_std_set), np.sum(feas_nn_set), np.sum(feas_mdn_set)))
print('Cost STD: {:.3f}, Cost NN: {:.3f}, Cost MDN: {:.3f}'.format(np.mean(cost_std_set), np.mean(cost_nn_set), np.mean(cost_mdn_set)))

Feasible STD: 160, feasible NN: 160, feasible MDN: 160
Cost STD: 1.299, Cost NN: 1.297, Cost MDN: 1.311


In [42]:
data = dict()
data['feas_std_set'] = feas_std_set
data['feas_nn_set'] = feas_nn_set
data['feas_mdn_set'] = feas_mdn_set
data['cost_std_set'] = cost_std_set
data['cost_nn_set'] = cost_nn_set
data['cost_mdn_set'] = cost_mdn_set
np.save('final_data/'+exp_name+'_warmstart_res_'+str(n_iter), data)