# <h1> Project 1

Gradient - based optimization and differentiable programming

Pole - Cart Balancing Controller Design with Neural Network and Gradient Descent.

In [3]:
# overhead

import logging
import numpy as np
import torch as t
import torch.nn as nn
from torch import optim
import matplotlib.pyplot as plt

logger = logging.getLogger(__name__)

In [5]:
M = 0.5
m = 0.2
b = 0.1
I = 0.006
g = 9.8
l = 0.3
p = I*(M+m)+M*m*(l**2)

In [10]:
A = t.tensor([[0,1,0,0],[0,-(I+m*(l**2))*b/p,((m**2)*g*(l**2))/p,0],
              [0,0,0,1],[0,-(m*l*b)/p,m*g*l*(M+m)/p,0]]).float()
B = t.tensor([[0],[(I+m*(l**2))/p],
              [0],[m*l/p]]).float()
C = t.tensor([[1,0,0,0],
              [0,0,1,0]]).float()
D = t.tensor([[0],[0]]).float()

In [24]:
0.01*t.randn(size=[1])

tensor([0.0030])

In [25]:
class Dynamics(nn.Module):
    def __init__(self):
        super(Dynamics, self).__init__()
    @staticmethod
    def forward(state,action):
        dA = t.matmul(B,t.transpose(action,0,1))
        Ac = A-dA
        state = t.matmul(Ac,state)
        state = state + 0.01*t.randn(size=[1])
        return state

In [32]:
class Controller(nn.Module):

    def __init__(self, dim_input, dim_hidden, dim_output):
        """
        dim_input: # of system states
        dim_output: # of actions
        dim_hidden: up to you
        """
        super(Controller, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(dim_input, dim_hidden),
            nn.ReLU(),
            #nn.Linear(dim_hidden, dim_hidden),
            #nn.Sigmoid(),
            #nn.Linear(dim_hidden, dim_hidden),
            #nn.ReLU(),
            nn.Linear(dim_hidden, dim_output),
            nn.Sigmoid()


            # You can add more layers here
            #nn.Sigmoid()
        )

    def forward(self, state):
        action = self.network(state)
        return action

In [33]:
class Simulation(nn.Module):

    def __init__(self, controller, dynamics, T):
        super(Simulation, self).__init__()
        self.state = self.initialize_state()
        self.controller = controller
        self.dynamics = dynamics
        self.T = T
        self.action_trajectory = []
        self.state_trajectory = []

    def forward(self, state):
        self.action_trajectory = []
        self.state_trajectory = []
        for _ in range(T):
            action = self.controller.forward(state)
            state = self.dynamics.forward(state, action)
            self.action_trajectory.append(action)
            self.state_trajectory.append(state)
        return self.error(state)

    @staticmethod
    def initialize_state():
        state = [1, 0.,0.4,0.]  # TODO: need batch of initial states
        return t.tensor(state,requires_grad=True).float()

    def error(self, state):
        return 0.1*state[0]**2 + 0.1*state[2]**0

In [34]:
class Optimize:
    def __init__(self, simulation):
        self.simulation = simulation
        self.parameters = simulation.controller.parameters()
        self.optimizer = optim.LBFGS(self.parameters, lr=0.01)

    def step(self):
        def closure():
            loss = self.simulation(self.simulation.state)
            self.optimizer.zero_grad()
            loss.backward()
            return loss
        self.optimizer.step(closure)
        return closure()

    def train(self, epochs):
        for epoch in range(epochs):
            loss = self.step()
            print('[%d] loss: %.3f' % (epoch + 1, loss))
            #self.visualize()

    def visualize(self):
        data = np.array([self.simulation.state_trajectory[i].detach().numpy() for i in range(self.simulation.T)])
        x = data[:, 0]
        y = data[:, 1]
        plt.plot(x, y)
        plt.show()

In [35]:
#frame = 0.2
dim_input = 4
dim_output = 1
dim_hidden = 10
T = 500
d = Dynamics()  # define dynamics
c = Controller(dim_input, dim_hidden, dim_output)  # define controller
s = Simulation(c, d, T)  # define simulation
o = Optimize(s)  # define optimizer
o.train(40)  #

[1] loss: 69332560021601386496.000
[2] loss: 66214995956112293888.000
[3] loss: 64326509968616325120.000
[4] loss: 63339412409664143360.000
[5] loss: 62826727729818238976.000
[6] loss: 62519933199343157248.000
[7] loss: 62311008397879672832.000
[8] loss: 62128911680133922816.000
[9] loss: 62022624090100072448.000
[10] loss: 61956618208061423616.000
[11] loss: 61914638854112935936.000
[12] loss: 61888725564069511168.000
[13] loss: 61871093795606495232.000
[14] loss: 61859680864910180352.000
[15] loss: 61852907873283080192.000
[16] loss: 61851636837841371136.000
[17] loss: 61848435059981287424.000
[18] loss: 61847067267516334080.000
[19] loss: 61846082105097846784.000
[20] loss: 61845611514121158656.000
[21] loss: 61845365223516536832.000
[22] loss: 61845365223516536832.000
[23] loss: 61845220087981670400.000
[24] loss: 61845184903609581568.000
[25] loss: 61844978195423559680.000
[26] loss: 61844956205191004160.000
[27] loss: 61844727506772426752.000
[28] loss: 61844727506772426752.000
[

In [36]:
c.forward(t.tensor([0,0.,0.2,0.]))

tensor([0.0082], grad_fn=<SigmoidBackward>)

In [37]:
s.action_trajectory

[tensor([0.0001], grad_fn=<SigmoidBackward>),
 tensor([9.9939e-05], grad_fn=<SigmoidBackward>),
 tensor([7.8300e-05], grad_fn=<SigmoidBackward>),
 tensor([6.1028e-05], grad_fn=<SigmoidBackward>),
 tensor([4.7284e-05], grad_fn=<SigmoidBackward>),
 tensor([3.6388e-05], grad_fn=<SigmoidBackward>),
 tensor([2.7760e-05], grad_fn=<SigmoidBackward>),
 tensor([2.0999e-05], grad_fn=<SigmoidBackward>),
 tensor([1.5737e-05], grad_fn=<SigmoidBackward>),
 tensor([1.1673e-05], grad_fn=<SigmoidBackward>),
 tensor([8.5631e-06], grad_fn=<SigmoidBackward>),
 tensor([6.2057e-06], grad_fn=<SigmoidBackward>),
 tensor([4.4386e-06], grad_fn=<SigmoidBackward>),
 tensor([3.1299e-06], grad_fn=<SigmoidBackward>),
 tensor([2.1736e-06], grad_fn=<SigmoidBackward>),
 tensor([1.4848e-06], grad_fn=<SigmoidBackward>),
 tensor([9.9661e-07], grad_fn=<SigmoidBackward>),
 tensor([6.5653e-07], grad_fn=<SigmoidBackward>),
 tensor([4.2405e-07], grad_fn=<SigmoidBackward>),
 tensor([2.6805e-07], grad_fn=<SigmoidBackward>),
 ten