In [10]:
import numpy as np
import torch
from torch import nn
import torch.autograd.functional as F
STATELEN = 10
ACTLEN = 10
STEP_SIZE = 4
#based on https://homes.cs.washington.edu/~todorov/papers/TassaIROS12.pdf
class Dynamics(nn.Module):

    def __init__(self, input_size, hidden_size, output_size):
        super(Dynamics, self).__init__()
        # self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size),
            nn.LeakyReLU(0.1)
        )
    def forward(self, input_element):
        output = self.linear_relu_stack(input_element)
        return output
    
class reward(nn.Module):

    def __init__(self, input_size, hidden_size, output_size = 1):
        super(reward, self).__init__()
        # self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size),
            nn.Tanh()
        )

    def forward(self, input_element):
        output = self.linear_relu_stack(input_element)
        return output
    
class ilqr:
    
    def __init__(self, ts, dyn, re, sl, al):
        """
        Args:
            ts: time step
            dyn: dynamic
            re: reward
            sl: state length
            al: action length
        """
        self.ts = ts
        self.dyn = dyn
        self.re = re
        self.sl = sl
        self.al = al
        self.b_s = 1
        
        self.S = torch.rand((self.ts, self.b_s, self.sl))
        self.A = torch.rand((self.ts, self.b_s, self.al))
        self.R = torch.empty((self.ts, self.b_s, 1))
        self.K_arr = torch.zeros(self.ts, self.b_s, self.al, self.sl)
        self.k_arr = torch.zeros(self.ts, self.b_s, 1, self.al)
        self.V_t = torch.zeros(self.b_s, self.sl, self.sl)
        self.v_t = torch.zeros(self.b_s, 1, self.sl)
        self.ifconv = 0
        
    def _forward(self):
        
        new_S = torch.rand((self.ts, self.b_s, self.sl))
        new_A = torch.rand((self.ts, self.b_s, self.al))
        s = self.S[0].clone().detach()

        i = 0
        while i < self.ts:
            new_S[i] = s
            new_A[i] = (torch.matmul(new_S[i] - self.S[i],torch.transpose((self.K_arr[i]),0,1)) + 
                        self.k_arr[i] + self.A[i]
                       )
            
            sa_in = torch.cat((new_S[i], new_A[i]),dim = 1)
            #sa_in shape = [1,state_size + action_size]

            s = self.dyn(sa_in)
            #state shape = [1,state_size]

            self.R[i] = self.re(sa_in)
            
            i = i + 1
            
        self.S = new_S
        self.A = new_A

    def _backward(self):
        
        C = torch.zeros(self.b_s, self.al + self.sl, self.al + self.sl)
        F = torch.zeros(self.b_s, self.sl, self.al + self.sl)
        c = torch.zeros(self.b_s, 1, self.al + self.sl)
        sa_in = torch.cat((self.S, self.A),dim = 2)
        i = self.ts -1
        while i > -1:
            j = 0
            while j < self.b_s:
                C[j] = F.hessian(self.re, sa_in[i][j])
                #shape = [state+action, state+action]
                #print(torch.sum(C[j]))
                F[j] = F.jacobian(self.dyn, sa_in[i][j])
                #shape = [state, state+action]
                #print(torch.sum(F[j]))
                c[j] = F.jacobian(self.re, sa_in[i][j])
                #shape = [1, state+action]
                #print(torch.sum(c[j]))
                j = j + 1
            
            transF_t = torch.transpose(F_t,1,2)
            Q = C + torch.matmul(torch.matmul(transF_t, V_t), F_t)
            #eq 5[c~e]
            q = c + torch.matmul(v_t, F_t)
            #eq 5[a~b]
            
            Q_pre1, Q_pre2 = torch.split(Q, [self.sl, self.al], dim = 1)
            Q_xx, Q_xu = torch.split(Q_pre1, [self.sl, self.al], dim = 2)
            Q_ux, Q_uu = torch.split(Q_pre2, [self.sl, self.al], dim = 2)
            
            Q_x, Q_u = torch.split(q, [self.sl, self.al], dim = 1)
            ## how to batched eye?
            try:
                invQuu = torch.linalg.inv(Q_uu - torch.eye(self.al)) #regularize term
                #eq [9]
            except:
                invQuu = torch.linalg.inv(Q_uu + torch.eye(self.al)*0.01)
                self.ifconv = 1

            K_t = -torch.matmul(invQuu, Q_ux)
            transK_t = torch.transpose(K_t, 0, 1)
            #K_t shape = [actlen, statelen]
            
            k_t = -torch.matmul(Q_u, invQuu)
            #k_t shape = [1,actlen]
            
            V_t = (Q_xx + torch.matmul(Q_xu, K_t) + 
                   torch.matmul(transK_t, Q_ux) +
                   torch.matmul(torch.matmul(transK_t, Q_uu), K_t)
                  )
            # eq 11c
            #V_t shape = [statelen, statelen]

            v_t = (Q_x + torch.matmul(k_t, Q_ux) + 
                   torch.matmul(Q_u, K_t) + 
                   torch.matmul(k_t, torch.matmul(Q_uu, K_t)) 
                  )
            # eq 11b
            #v_t shape = [1, statelen]
            
            self.K_arr[i] = K_t
            self.k_arr[i] = k_t
            i = i - 1
    
    def fit(self):
        
        i = 0
        while (self.ifconv != 1) and i < 100:
            i = i + 1
            self._forward()
            self._backward()
        
        return self.A
 
#for param in rew.parameters():
#    print(param)

In [2]:
my_Dyna = Dynamics(STATELEN + ACTLEN, STATELEN, STATELEN)
my_reward = reward(STATELEN + ACTLEN, STATELEN , 1)


myilqr = ilqr(4,my_Dyna,my_reward,10,10)

myilqr.fit()

tensor([[[-2.2658, -0.9684,  4.6181,  0.6727, -2.4023,  0.1251,  0.3206,
           1.6152,  0.2583, -1.9794]],

        [[-1.9366, -1.3025,  4.8335,  0.4203, -2.4425,  0.0961,  0.3086,
           2.1336,  0.5074, -2.2155]],

        [[-2.4339, -1.0471,  4.4494,  0.2337, -2.2584,  0.0066,  0.5277,
           2.1778,  0.5146, -2.4784]],

        [[-1.9440, -1.3290,  4.7818,  0.5538, -2.6536,  0.0823,  0.4539,
           2.0534,  0.3851, -2.2354]]], grad_fn=<CopySlices>)

In [2]:
import torch
S = torch.rand((3, 1, 4))

In [4]:
import numpy as np
np.shape(S.view(-1))

torch.Size([12])

In [9]:
np.shape(S.squeeze())

torch.Size([3, 4])

In [11]:
new_S = torch.rand((4, 1, 3))
new_A = torch.rand((4, 1, 5))

my_Dyna = Dynamics(STATELEN + ACTLEN, STATELEN, STATELEN)
my_reward = reward(STATELEN + ACTLEN, STATELEN , 1)

s = S[0].clone().detach()


i = 0
while i < self.ts:
    new_S[i] = s
    new_A[i] = (torch.matmul(new_S[i] - S[i],torch.transpose((K_arr[i]),0,1)) + 
         k_arr[i] + A[i]
        )
    sa_in = torch.cat((new_S[i], new_A[i]),dim = 1)
    #sa_in shape = [1,state_size + action_size]

    s = my_Dyna(sa_in)
    #state shape = [1,state_size]

    R[i] = my_reward(sa_in)

    i = i + 1

NameError: name 'self' is not defined

In [27]:
new_S = torch.rand((4, 2, 3))
new_A = torch.rand((4, 2, 5))
print(new_S)
print(new_A)
sa_in = torch.cat((new_S[0], new_A[0]),dim = 1)
print(np.shape(sa_in))

tensor([[[0.2503, 0.3700, 0.4463],
         [0.4160, 0.0699, 0.1047]],

        [[0.9365, 0.2305, 0.5194],
         [0.5007, 0.1840, 0.7685]],

        [[0.3118, 0.0037, 0.9661],
         [0.8170, 0.1456, 0.4000]],

        [[0.2563, 0.4554, 0.3776],
         [0.1245, 0.0504, 0.4079]]])
tensor([[[3.6793e-02, 1.1044e-01, 2.9308e-04, 4.6370e-01, 4.4860e-01],
         [3.6057e-01, 9.2463e-03, 5.8263e-01, 5.1973e-01, 1.9103e-01]],

        [[2.8883e-01, 3.3326e-01, 8.0322e-01, 9.9161e-01, 5.6124e-01],
         [2.3160e-01, 5.5209e-02, 6.0231e-02, 1.4384e-02, 5.6236e-01]],

        [[4.1703e-01, 1.2557e-01, 1.8472e-01, 7.6195e-01, 3.2007e-02],
         [2.8687e-01, 4.0119e-01, 6.5657e-02, 4.1373e-01, 3.4538e-01]],

        [[8.0777e-01, 2.9345e-01, 5.2441e-01, 6.9381e-01, 3.9671e-01],
         [9.2216e-01, 5.5927e-01, 8.9263e-01, 2.1028e-01, 6.0275e-01]]])
torch.Size([2, 8])


In [16]:
my_Dyna = Dynamics(8, 3, 3)
my_reward = reward(8, 3 , 1)

In [24]:
output = my_Dyna(torch.cat((new_S[0], new_A[0]),dim = 1))

In [25]:
np.shape(output)

torch.Size([2, 3])

In [26]:
import torch.autograd.functional as F
C_t = F.hessian(my_reward, sa_in)
print(np.shape(C_t))

RuntimeError: The Tensor returned by the function given to hessian should contain a single element

In [30]:
import torch.autograd.functional as F
def pow_adder_reducer(x, y):
   return (2 * x.pow(2) + 3 * y.pow(2)).sum()
inputs = torch.rand(2)
F.hessian(pow_adder_reducer, inputs)

TypeError: pow_adder_reducer() missing 1 required positional argument: 'y'