## Imitation Learning 

Learn {A, B} (dynamics) from imitation loss which is 

\begin{equation*}
Loss ={E}_{\text{x_init}} [\|\tau(x_\text{init}; \theta)- \tau(x_\text{init};\hat{\theta})|_2^2]
\end{equation*}

In [1]:
train_seed = 2

In [2]:
# Cost はすでに求められていて部分的にBack Propagationをする
import chainer
import numpy as np
import sys
sys.path.append("../lqr")
from lqr_recursion import LqrRecursion
from differentiable_lqr import  DiffLqr, LqrNet
from chainer import functions as F
import chainer.computational_graph as c
from util import expand_time_batch
import os
'''
import matplotlib.pyplot as plt
'''

vecLib, which is a part of Accelerate, is known not to work correctly with Chainer.
We recommend using other BLAS libraries such as OpenBLAS.
For details of the issue, please see
https://docs.chainer.org/en/stable/tips.html#mnist-example-does-not-converge-in-cpu-mode-on-mac-os-x.

Please be aware that Mac OS X is not an officially supported OS.

  ''')  # NOQA


'\nimport matplotlib.pyplot as plt\n'

In [3]:
T = 5
n_state = 3
n_ctrl = 1
n_sc = n_ctrl + n_state
n_batch = 128
dtype = np.float64
expert_seed = 42
np.random.seed(expert_seed)
alpha = 0.2

In [4]:
exp = dict(
    Q = chainer.Variable(np.eye(n_sc)), #Cost
    p = chainer.Variable(np.random.randn(n_sc)), # cost (little c)
    A = chainer.Variable(np.eye(n_state) + alpha * np.random.randn(n_state, n_state)), # F left side
    B = chainer.Variable(np.random.randn(n_state, n_ctrl))
)

In [5]:
exp_ab_cat = F.concat((exp['A'], exp['B']), axis=1)
exp_large_f = expand_time_batch(exp_ab_cat, T-1, n_batch)

In [6]:
exp['F'] = exp_large_f
exp['f'] = None
exp_C = expand_time_batch(exp['Q'], T, n_batch)
exp['C'] = exp_C
exp['c'] = expand_time_batch(exp['p'], T, n_batch)

In [7]:
def get_loss(x_init):
    expert = LqrRecursion(x_init, exp['C'], exp['c'], exp['F'], exp['f'], T, n_state, n_ctrl)
    x_true, u_true = expert.solve_recursion()    
    x_pred, u_pred = net((x_init, exp['C'], exp['c'], exp['f']))
    # print(u_pred.dtype)
    '''
    g = c.build_computational_graph((x_pred), remove_split=True)
    print(g.nodes)
    for i in range(len(g.nodes)):
        print(g.nodes[i])
        print(g.nodes[i].label)
    with open('grapht.dot', 'w') as o:
        tmp = g.dump()
        o.write(tmp)
    '''
    trajectory_loss = F.mean((u_true - u_pred)**2) + F.mean((x_true - x_pred)**2)
    
    return trajectory_loss

In [8]:
opt = chainer.optimizers.RMSprop(lr=1e-2)
net = LqrNet(T, n_batch, n_state, n_ctrl, train_seed)
opt.setup(net)
fname =str(train_seed)+'_losses.csv'
loss_f = open(fname, 'w')
loss_f.write('im_loss,mse\n')
loss_f.flush()

In [9]:
for i in range(200):
    net.cleargrads()
    x_init = chainer.Variable(np.random.randn(n_batch, n_state))
    loss = get_loss(x_init)
    loss.backward()
    opt.update()
    model_loss = F.mean((net.A - exp['A'])**2) + F.mean((net.B - exp['B'])**2)
    loss_f.write('{},{}\n'.format(loss.data, model_loss.data))
    loss_f.flush()
    plot_interval = 10
    if i % plot_interval == 0:
        print("iteration", i,  "{0:04f}".format(loss.data), "dyanmics loss ", "{0:04f}".format(model_loss.data))

iteration 0 0.661925 dyanmics loss  4.774785
iteration 10 0.294314 dyanmics loss  4.722322
iteration 20 0.229643 dyanmics loss  4.779115
iteration 30 0.176507 dyanmics loss  4.801983
iteration 40 0.161523 dyanmics loss  4.819201
iteration 50 0.219494 dyanmics loss  4.833848
iteration 60 0.148188 dyanmics loss  4.845135
iteration 70 0.134515 dyanmics loss  4.835713
iteration 80 0.164870 dyanmics loss  4.823391
iteration 90 0.197836 dyanmics loss  4.798079
iteration 100 0.133565 dyanmics loss  4.785647
iteration 110 0.146801 dyanmics loss  4.759894
iteration 120 0.146065 dyanmics loss  4.724303
iteration 130 0.166107 dyanmics loss  4.678963
iteration 140 0.145697 dyanmics loss  4.630056
iteration 150 0.146848 dyanmics loss  4.585837
iteration 160 0.155886 dyanmics loss  4.535748
iteration 170 0.146954 dyanmics loss  4.473944
iteration 180 0.140506 dyanmics loss  4.407619
iteration 190 0.137744 dyanmics loss  4.346956
