In [3]:
## This file is an implememtation of an lipm enviroment
## Author : Avadesh Meduri
## Date : 22/02/2020

import numpy as np
import IPython
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.animation import FuncAnimation

In [4]:
'''
This is an inverted pendulum enviromment designed to use for RL. The action in this space is the step length
with respect to the current cop location. 9 possible actions are allowed including 0 step length. 
'''

class LipmEnv:
    def __init__(self, h, max_step_length, w):
        self.omega = np.sqrt(9.81/h)
        self.max_leg_length = 0.3
        self.dt = 0.001
        self.h = h
        assert len(w) == 3
        self.w = w
        assert (np.linalg.norm([max_step_length, self.h]) < self.max_leg_length)
        self.action_space = np.linspace(-max_step_length, max_step_length, 9) #discretize to 9 actions
        self.A = np.matrix([[1, self.dt], [(self.omega**2)*self.dt, 1]])
        self.B = np.matrix([0, -(self.omega**2)*self.dt])
        self.t = 0
                                     
    def integrate_lip_dynamics(self, x_t, u_t):
        ## integrates dynamics for one time step
        assert np.shape(x_t) == (2,)
        x_t_1 = np.matmul(self.A, np.transpose(x_t)) + np.matmul(self.B.transpose(), [u_t])
        return x_t_1

    def reset_env(self, x0, epi_time):
        ## initialises environment
        assert np.shape(x0) == (2,)
        self.t = 0
        self.sim_data = np.zeros((4, int(epi_time/self.dt)+1))
        assert (np.linalg.norm([x0[0], self.h]) < self.max_leg_length)
        self.sim_data[:,0][0:2] = x0
        self.sim_data[:,0][2] = 0
        self.sim_data[:,0][3] = self.h
        
        return self.sim_data[:,0][0:2]
        
    def step_env(self, u, step_time):
        ## integrates the simulation for the duration of one step time(actual step not time step)
        ## This way we can look at things as snap shots of steps not worrying about things in between
        for i in range(int(step_time/self.dt)):
            self.sim_data[:,self.t + 1][0:2] = self.integrate_lip_dynamics(self.sim_data[:,self.t][0:2],\
                                                       self.sim_data[:,self.t][2])
            self.sim_data[:,self.t + 1][2] = self.sim_data[:,self.t][2] #u0
            self.sim_data[:,self.t + 1][3] = self.sim_data[:,self.t][3] #h
            self.t += 1
        
        self.sim_data[:,self.t][2] += self.action_space[u] # new u
        
        ## modifying state that is returned is such that the origin is u0 instead of the global origin
        ## This ensures that the state x[0] is bounded by the maximum leg size while collecting data
        processed_state = self.sim_data[:,self.t][0:2].copy()
        processed_state[0] -= self.sim_data[:,self.t][2]
        
        return processed_state, self.compute_cost(), self.isdone()
        
    def isdone(self):
        ## checks if the kinematic constraint is violated and terminates episode
        current_leg_length = np.linalg.norm([self.sim_data[:,self.t][0] - self.sim_data[:,self.t][2], self.h])
        if current_leg_length > self.max_leg_length:
            return True
        else:
            return False
    
    def compute_cost(self):
        '''
        computes cost: 
            which is square of distance between u and current com + 
            velocity of Com + square of step length 
        '''
        cost = self.w[0]*abs(self.sim_data[:,self.t][0] - self.sim_data[:,self.t][2]) #|x0 - u0|
        cost += self.w[1]*abs(self.sim_data[:,self.t][1]) #|xd|
        cost += self.w[2]*abs(self.sim_data[:,self.t][2] - self.sim_data[:,self.t-1][2]) # |u|
        
        return cost
    
    def random_action(self):
        ## generates a random action to input to the simulation
        action = np.random.randint(len(self.action_space))
        return action
    
    def show_episode(self, freq, i_no):
        ## Input:
            ## Freq : frame rate (if freq = 5 one in 5 is shown)
            ## i_no : iteration number 
        sim_data = self.sim_data[:,::freq]

        fig = plt.figure()
        ax = plt.axes(xlim=(-2, 2), ylim=(0, sim_data[:,0][3] + 0.05))
        text_str = "iter - " + str(i_no)
        line, = ax.plot([], [], lw=3)
        def init():
            line.set_data([], [])
            return line,
        def animate(i):
            x = sim_data[:,i][0]
            y = sim_data[:,i][3]
            u = sim_data[:,i][2]
            line.set_data([u,x], [0,y])
            return line,
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        ax.text(0.05, 0.95, text_str, transform=ax.transAxes, fontsize=15,
        verticalalignment='top', bbox=props)
        
        anim = FuncAnimation(fig, animate, init_func=init,
                                       frames=np.shape(sim_data)[1], interval=25, blit=True)

        plt.close(fig)
        plt.close(anim._fig)
        IPython.display.display_html(IPython.core.display.HTML(anim.to_html5_video()))
    

In [9]:
## env test 
run_test = False
if run_test:
    no_steps = 10 ## simulates 10 steps taken by the inverted pendulum
    step_time = 0.1 ## duration after which step is taken by pendulum
    env = LipmEnv(0.2, 0.22, [0,0,1])
    state = env.reset_env([0.2,0], no_steps*step_time)
    for t in range(no_steps):
        action = env.random_action()
        next_state, cost, done = env.step_env(4, step_time)
        print(state, action, cost, next_state, done)
        state = next_state

    env.show_episode(5, 1)


[0.2 0. ] 5 0.0 [0.25047467 1.06060246] True
[0.25047467 1.06060246] 8 0.0 [0.42835422 2.65654048] True
[0.42835422 2.65654048] 2 0.0 [0.82366973 5.59854814] True
[0.82366973 5.59854814] 4 0.0 [ 1.6368258  11.37940314] True
[ 1.6368258  11.37940314] 2 0.0 [ 3.28019456 22.93136842] True
[ 3.28019456 22.93136842] 8 0.0 [ 6.5872398  46.11354648] True
[ 6.5872398  46.11354648] 3 0.0 [13.23522282 92.68358979] True
[13.23522282 92.68358979] 2 0.0 [ 26.59587273 186.2610061 ] True
[ 26.59587273 186.2610061 ] 6 0.0 [ 53.44546254 374.30655808] True
[ 53.44546254 374.30655808] 0 0.0 [107.40160871 752.19349858] True
