<a href="https://colab.research.google.com/github/mattagnew/SIT796-Reinforcement-Learning/blob/main/Task_1_2C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Task 1.2C 

In [None]:
%%capture
# INSTALL REQUIRED SYSTEM DEPENDENCIES

!apt-get install -y xvfb x11-utils 
!apt-get install x11-utils > /dev/null 2>&1
!pip install PyVirtualDisplay==2.0.* \
  PyOpenGL==3.1.* \
  PyOpenGL-accelerate==3.1.* \
  gym[box2d]==0.17.* 
!pip install pyglet

In [None]:
# IMPORT REQUIRED PACKAGES

import gym
import numpy as np
import base64
import io
import IPython
import time
from random import randint
from random import seed
from gym.wrappers import Monitor
from IPython import display
from pyvirtualdisplay import Display
from gym import spaces
from gym.utils import seeding

## Environment

---




In [None]:
# DEFINE ENVIRONMENT

class RocketLander(gym.Env):
  """A rocket landing environment for OpenAI gym"""
  
  metadata = {'render.modes': ['human', 'rgb_array'],
              'video.frames_per_second':30
  }

  def __init__(self, steps_per_s=1, goal_position=0, goal_velocity=[0, 0]):
    super(RocketLander, self).__init__()

    self.seed()

    self.steps_per_s = steps_per_s
    
    #self.np_random.uniform(low=-0.6, high=-0.4)
    
    self.min_position = -2000
    self.max_position = 2000
    self.min_vel = -9999
    self.max_vel = 9999
    #self.goal_position = [self.np_random.uniform(low=self.max_position/2 - self.max_position/4, 
    #                                              high=self.max_position/2 + self.max_position/4), 0]
    self.goal_position = [0,0]
    print(self.goal_position)
    #goal_position = [0,0]
    #self.goal_velocity = goal_velocity

    self.booster = 19.6/steps_per_s
    self.threshold = 1

    self.gravity = -9.8/steps_per_s
    #self.burn = self.booster + self.gravity

    #self.low = np.array(
    #  [self.min_position, self.min_vel, self.gravity], dtype=np.float32
    #) 

    #self.high = np.array(
    #  [self.max_position, self.max_vel, self.burn], dtype=np.float32
    #)

    self.viewer = None

    self.action_space = spaces.Discrete(2)
    #self.observation_space = spaces.Box(
    #    self.low, self.high, dtype=np.float32
    #)
    self.observation_space = spaces.Box(
        -np.inf, np.inf, shape=(8,), dtype=np.float32
    )

  def seed(self,seed=None):
    self.np_random, seed = seeding.np_random(seed)
    return [seed]

  def step(self, action):
    #assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
    p = self.goal_position
    
    position = self.state[0]
    velocity = self.state[1] 
    acceleration = self.state[2] 

    action, theta = action
    acceleration_x = action * self.booster * (np.cos(theta))
    acceleration_y = action * self.booster * (np.sin(theta)) + self.gravity

    velocity_x = velocity[0] + acceleration_x
    velocity_y = velocity[1] + acceleration_y

    position_x = (position[0]+p[0]) + velocity[0] + 0.5*acceleration_x
    position_y = (position[1]+p[1]) + velocity[1] + 0.5*acceleration_y

    acceleration = [acceleration_x, acceleration_y]
    velocity = [velocity_x, velocity_y]
    position = [position_x, position_y]

    #position = np.clip(position, self.min_position, self.max_position)
    
    #print(p)

    d_x = position_x - p[0]
    d_y = position_y - p[1]

    d = [d_x, d_y]

    #print(d)

    done = bool(
        d_y <= 0)
    
    reward = -(np.linalg.norm(d) + np.linalg.norm(velocity)) - velocity_y**2

    self.state = [d, velocity, acceleration]
    #print (self.state)
    return np.array(self.state), reward, done, {}

  def reset(self):
    #self.state = np.array([[1000, 1000], [0,0], [0, 0]])
    self.state = np.array([[self.np_random.uniform(low=self.min_position, 
                                                  high=self.max_position) - self.goal_position[0], 
                            self.np_random.uniform(low=2500 - self.max_position/4, 
                                                  high=2500 + self.max_position/4) - self.goal_position[0]], [0,0], [0, self.gravity]])
        
    return np.array(self.state)

  def render(self, mode='human'):
        screen_width = 400
        screen_height = 400

        world_width = self.max_position - self.min_position
        scale = screen_width / world_width
        rocketwidth = 20
        rocketheight = 40
        
        #if (self.state[2] > 0):
        #    burner = 1
        #else:
        #    burner = 0
            
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(screen_width, screen_height)
            #xs = np.linspace(self.min_position, self.max_position, 100)

            clearance = 0

            l, r, t, b = -rocketwidth / 2, rocketwidth / 2, rocketheight, 0
            rocket = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
            rocket.add_attr(rendering.Transform(translation=(0, clearance)))
            self.rockettrans = rendering.Transform()
            rocket.add_attr(self.rockettrans)
            self.viewer.add_geom(rocket)
            
            landingx1 = (self.goal_position[0] - self.min_position) * scale - rocketwidth/2
            landingx2 = (self.goal_position[0] - self.min_position) * scale + rocketwidth/2
            landingheight = 5
            landpad = rendering.FilledPolygon([(landingx1, 0), (landingx1, landingheight ), (landingx2, landingheight ), (landingx2, 0)])
            landpad.set_color(0, 0, 1)
            self.viewer.add_geom(landpad)


        pos = self.state[0] 
        #print(pos)
        #print(pos)
        #print(np.shape(pos))
        #print(pos[0])
        #print(np.shape(pos[0]))
        #pos = pos[:,0]
        self.rockettrans.set_translation(
            (pos[0]+self.goal_position[0]-self.min_position) * scale, (pos[1]+self.goal_position[1]) * scale
        )

        return self.viewer.render(return_rgb_array=mode == 'rgb_array')

## Policy

In [352]:
# DEFINE POLICY
import random

def policy(obs, t):
    d = obs[0,:] 
    velocity = obs[1,:] 
    acceleration = obs[2,:] 
    
    # CURRENT POLICY : RANDOM ACTIONS
    # The actions are
    #    0      Don't activate booster
    #    1      Activate booster

    # Initialise action and theta
    action = 0
    theta = np.pi/2

    # Calculate trajectory rocket is on
    d_x = d[0]
    d_y = d[1]
    v_x = velocity[0]
    v_y = velocity[1]

    t = -d_y / v_y

    r_x = d_x + v_x * t

    # Calculate angle of velocity vector of rocket
    theta_opp = np.arctan2(v_y,v_x) + np.pi

    # Calculate angle from landing pad to rocket
    theta_r   = np.pi - np.arctan2(d_y, d_x)

    # Set limits to when to activate booster and upper limit of speed
    limit = d_y / 20
    v_lim = np.clip((limit / 2), 9.8, 9999)

    # If trajectory of rocket is outside landing pad + limit, fire booster against angle of rocket position
    if abs(r_x) > limit:
      action = 1
      theta = theta_r

    # If velocity of rocket exceeds limit, fire booster in opposite direction to velocity vector
    if np.linalg.norm(velocity) > v_lim:
      action = 1
      theta = theta_opp

    # If rocked exceeds an altitude of 3000, cut booster
    if d_y > 3000:
      action = 0

    return [action, theta]

## Run

In [355]:
# RUN ENVIRONMENT

!rm ./vid/*.* # CLEAN UP THE VIDEO BEFORE STARTING

TIME_LIMIT = 1000 # SET TIME LIMIT
reset_flag = 0

d = Display()
d.start()

env = RocketLander()
env = Monitor(env,'./vid',force=True)

o = env.reset()
print(o)
#print(o)
for t in range(TIME_LIMIT):
    
    action = policy(o,t)            # CALL POLICY
    o, r, d, _ = env.step(action)   # ACTION FROM POLICY USED IN ENVIRONMENT
    print(o, r)

    if d and t<TIME_LIMIT-1:
        print("Task completed in", t, "time steps")
        reset_flag = 1
        break
else:
    print("Time limit exceeded. Try again.")

if reset_flag>0:
  env.reset()

[0, 0]




[[1797.71038823 2353.05051192]
 [   0.            0.        ]
 [   0.           -9.8       ]]
[[1797.71038823 2348.15051192]
 [   0.           -9.8       ]
 [   0.           -9.8       ]] -3063.1315761821634
[[1791.75305823 2341.23191406]
 [ -11.91465999   -4.03719571]
 [ -11.91465999    5.76280429]] -2977.0557228736825
[[ 1.77388245e+03  2.34007718e+03]
 [-2.38265511e+01  1.72772800e+00]
 [-1.19118911e+01  5.76492370e+00]] -2963.3039873306466
[[1744.13577101 2344.71464917]
 [ -35.66681226    7.54720993]
 [ -11.84026115    5.81948193]] -3015.6928423219183
[[1702.61991123 2355.22497827]
 [ -47.3649073    13.47344827]
 [ -11.69809505    5.92623834]] -3136.9779160334883
[[1649.51359785 2371.74048272]
 [ -58.84771944   19.55756063]
 [ -11.48281214    6.08411236]] -3333.4634506616026
[[1599.96573719 2383.30731096]
 [ -40.24800188    3.57609587]
 [  18.59971756  -15.98146476]] -2923.742722090901
[[1554.25548007 2390.1199766 ]
 [ -51.17251237   10.04923541]
 [ -10.92451049    6.47313955]] -30



[[ 648.06511838 2614.75914993]
 [ -53.58891027   27.13142537]
 [  -5.10368178    9.12385881]] -3490.053303693232
[[ 592.11862212 2646.50276686]
 [ -58.30408226   36.35580849]
 [  -4.71517199    9.22438312]] -4102.388291319801
[[ 542.13031892 2672.77322881]
 [ -41.67252414   16.18511542]
 [  16.63155811  -20.17069307]] -3033.8635860486743
[[ 498.50968817 2693.66276409]
 [ -45.56873736   25.59395512]
 [  -3.89621322    9.4088397 ]] -3446.718271805007
[[ 451.15757165 2723.99308568]
 [ -49.13549569   35.06668806]
 [  -3.56675833    9.47273294]] -4051.1394110695514
[[ 400.42077872 2763.82806455]
 [ -52.33809016   44.60326968]
 [  -3.20259448    9.53658162]] -4850.901130502395
[[ 3.46677544e+02  2.81323007e+03]
 [-5.51483788e+01  5.42007509e+01]
 [-2.81028861e+00  9.59748123e+00]] -5849.556136648376
[[ 298.51860232 2855.66149007]
 [ -41.16950508   30.66207956]
 [  13.97887369  -23.53867134]] -3862.718285629689
[[ 2.56330199e+02  2.89117046e+03]
 [-4.32073010e+01  4.03558577e+01]
 [-2.0377959

## Video

In [356]:
# OBSERVE VIDEO

for f in env.videos:
    video = io.open(f[0], 'r+b').read()
    encoded = base64.b64encode(video)

    display.display(display.HTML(data="""
        <video alt="test" controls>
        <source src="data:video/mp4;base64,{0}" type="video/mp4" />
        </video>
        """.format(encoded.decode('ascii'))))