# <h1> Project 1

Gradient - based optimization and differentiable programming

Trajectory optimization of vehicle bicycle model

In [11]:
import numpy as np
import torch as t
import torch.nn as nn
from torch import optim
from torch.nn import utils
import matplotlib.pyplot as plt

In [2]:
class Dynamics(nn.Module):
    def __init__(self):
        super(Dynamics, self).__init__()
    @staticmethod
    def forward(state,action):
        beta = t.atan(t.tensor(lr)*t.tan(action[1])/l)
        x_dot = state[3]*t.cos(beta+state[2])
        y_dot = state[3]*t.sin(beta+state[2])
        psi_dot = state[3]*t.sin(beta)/t.tensor(lr)
        x_1 = state[0] + x_dot*frame
        y_1 = state[1] + y_dot*frame
        psi_1 = state[2] + psi_dot*frame
        v_1 = state[3] + action[0]*frame
        state = t.tensor([x_1,y_1,psi_1,v_1],requires_grad=True)
        return state

In [3]:
class Controller(nn.Module):

    def __init__(self, dim_input, dim_hidden, dim_output):
        """
        dim_input: # of system states
        dim_output: # of actions
        dim_hidden: up to you
        """
        super(Controller, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(dim_input, dim_hidden),
            nn.Tanh(),
            nn.Linear(dim_hidden, dim_output),
            # You can add more layers here
            nn.Sigmoid()
        )

    def forward(self, state):
        action = self.network(state)

        return action

In [27]:
class Simulation(nn.Module):

    def __init__(self, controller, dynamics, T):
        super(Simulation, self).__init__()
        self.state = self.initialize_state()
        self.controller = controller
        self.dynamics = dynamics
        self.T = T
        self.action_trajectory = []
        self.state_trajectory = []

    def forward(self, state):
        self.action_trajectory = []
        self.state_trajectory = []
        for _ in range(T):
            action = self.controller.forward(state)
            state = self.dynamics.forward(state, action)
            self.action_trajectory.append(action)
            self.state_trajectory.append(state)
        return self.error(state)

    @staticmethod
    def initialize_state():
        state = [0., 0.,0,10]  # TODO: need batch of initial states
        return t.tensor(state, requires_grad=False).float()

    def error(self, state):
        return state[2]**2 + state[3]**2#+(state[0]-15)**2 + (state[1]-15)**2

In [17]:
class Optimize:
    def __init__(self, simulation):
        self.simulation = simulation
        self.parameters = simulation.controller.parameters()
        self.optimizer = optim.SGD(self.parameters, lr=0.01)

    def step(self):
        def closure():
            loss = self.simulation(self.simulation.state)
            self.optimizer.zero_grad()
            loss.backward()
            return loss
        self.optimizer.step(closure)
        return closure()

    def train(self, epochs):
        for epoch in range(epochs):
            loss = self.step()
            print('[%d] loss: %.3f' % (epoch + 1, loss))
            #self.visualize()

    def visualize(self):
        data = np.array([self.simulation.state_trajectory[i].detach().numpy() for i in range(self.simulation.T)])
        x = data[:, 0]
        y = data[:, 1]
        plt.plot(x, y)
        plt.show()

In [28]:
lf = 2
lr = 2
l = lf + lr
frame = 0.1
dim_input = 4
dim_output = 2
dim_hidden = 100
T = 100
d = Dynamics()  # define dynamics
c = Controller(dim_input, dim_hidden, dim_output)  # define controller
s = Simulation(c, d, T)  # define simulation
o = Optimize(s)  # define optimizer
o.train(40)  #

[1] loss: 776.725
[2] loss: 776.725
[3] loss: 776.725
[4] loss: 776.725
[5] loss: 776.725
[6] loss: 776.725
[7] loss: 776.725
[8] loss: 776.725
[9] loss: 776.725
[10] loss: 776.725
[11] loss: 776.725
[12] loss: 776.725
[13] loss: 776.725
[14] loss: 776.725
[15] loss: 776.725
[16] loss: 776.725
[17] loss: 776.725
[18] loss: 776.725
[19] loss: 776.725
[20] loss: 776.725
[21] loss: 776.725
[22] loss: 776.725
[23] loss: 776.725
[24] loss: 776.725
[25] loss: 776.725
[26] loss: 776.725
[27] loss: 776.725
[28] loss: 776.725
[29] loss: 776.725
[30] loss: 776.725
[31] loss: 776.725
[32] loss: 776.725
[33] loss: 776.725
[34] loss: 776.725
[35] loss: 776.725
[36] loss: 776.725
[37] loss: 776.725
[38] loss: 776.725
[39] loss: 776.725
[40] loss: 776.725


In [35]:
act = c.forward(t.tensor([0.,0.,3.14/2,10.]))
statex = d.forward(t.tensor([0.,0.,1.,0.]),act)

In [36]:
s.controller.parameters()
ox = optim.SGD(s.controller.parameters(),lr=0.01)
#s.forward(t.tensor([0.,0.,1.,0.]))

In [7]:
loss = s.forward(t.tensor([0.,0.,1.,0.]))
loss.backward()
loss

tensor(124.4523, grad_fn=<AddBackward0>)

In [6]:
s.forward(t.tensor([1.,0.,1.,0.]))

tensor(107.9737, grad_fn=<AddBackward0>)

In [42]:
s.state

tensor([0.0000, 0.0000, 1.5700, 0.0000])