<a href="https://colab.research.google.com/github/mattagnew/SIT796-Reinforcement-Learning/blob/main/Task_1_2C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Task 1.2C 

In [1]:
%%capture
# INSTALL REQUIRED SYSTEM DEPENDENCIES

!apt-get install -y xvfb x11-utils 
!apt-get install x11-utils > /dev/null 2>&1
!pip install PyVirtualDisplay==2.0.* \
  PyOpenGL==3.1.* \
  PyOpenGL-accelerate==3.1.* \
  gym[box2d]==0.17.* 
!pip install pyglet

In [2]:
# IMPORT REQUIRED PACKAGES

import gym
import numpy as np
import base64
import io
import IPython
import time
from random import randint
from random import seed
from gym.wrappers import Monitor
from IPython import display
from pyvirtualdisplay import Display
from gym import spaces
from gym.utils import seeding

## Environment

---




In [10]:
# DEFINE ENVIRONMENT

class RocketLander(gym.Env):
  """A rocket landing environment for OpenAI gym"""
  
  metadata = {'render.modes': ['human', 'rgb_array'],
              'video.frames_per_second':30
  }

  def __init__(self, steps_per_s=1, goal_altitude=0, goal_velocity=0):
    super(RocketLander, self).__init__()

    self.steps_per_s = 1

    self.min_position = 0
    self.max_position = 1500
    self.goal_altitude = goal_altitude
    self.goal_velocity = goal_velocity

    self.booster = 19.6/steps_per_s
    self.threshold = 1

    self.gravity = -9.8/steps_per_s
    self.burn = self.booster + self.gravity

    self.low = np.array(
      [self.min_position, -9999, self.gravity], dtype=np.float32
    ) 

    self.high = np.array(
      [self.max_position, 9999, self.burn], dtype=np.float32
    )

    self.viewer = None

    self.action_space = spaces.Discrete(2)
    self.observation_space = spaces.Box(
        self.low, self.high, dtype=np.float32
    )

  def seed(self,seed=None):
    self.np_random, seed = seeding.np_random(seed)
    return [seed]

  def step(self, action):
    assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))

    position, velocity, acceleration = self.state
    acceleration = action * self.booster + self.gravity
    velocity += acceleration  
    position += velocity
    position = np.clip(position, self.min_position, self.max_position)
    done = bool(
        position == self.goal_altitude)
    
    reward = (abs(position) + velocity**2)

    self.state = (position, velocity, acceleration)
    return np.array(self.state), reward, done, {}

  def reset(self):
    self.state = np.array([1000, 0, self.gravity])
    return np.array(self.state)

  def render(self, mode='human'):
        screen_width = 200
        screen_height = 400

        world_width = self.max_position - self.min_position
        scale = screen_width / world_width
        rocketwidth = 20
        rocketheight = 40
        
        if (self.state[2] > 0):
            burner = 1
        else:
            burner = 0
            
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(screen_width, screen_height)
            xs = np.linspace(self.min_position, self.max_position, 100)

            clearance = 0

            l, r, t, b = -rocketwidth / 2, rocketwidth / 2, rocketheight, 0
            rocket = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)])
            rocket.add_attr(rendering.Transform(translation=(0, clearance)))
            self.rockettrans = rendering.Transform()
            rocket.add_attr(self.rockettrans)
            self.viewer.add_geom(rocket)
            
        pos = self.state[0]
        self.rockettrans.set_translation(
            100, (pos-self.min_position) * scale
        )

        return self.viewer.render(return_rgb_array=mode == 'rgb_array')

## Policy

In [11]:
# DEFINE POLICY

def policy(obs, t):
    position, velocity, acceleration = obs
    
    # CURRENT POLICY : RANDOM ACTIONS
    # The actions are
    #    0      Don't activate booster
    #    1      Activate booster

    # seed random number generator with the system clock
    seed(time.clock())
        
    # generate random integers between zero and two
    actions = randint(0,1)
   
    return actions

## Run

In [12]:
# RUN ENVIRONMENT

!rm ./vid/*.* # CLEAN UP THE VIDEO BEFORE STARTING

TIME_LIMIT = 2000 # SET TIME LIMIT
reset_flag = 0

d = Display()
d.start()

env = RocketLander()
env = Monitor(env,'./vid',force=True)

o = env.reset()

for t in range(TIME_LIMIT):
    
    action = policy(o,t)            # CALL POLICY
    o, r, d, _ = env.step(action)   # ACTION FROM POLICY USED IN ENVIRONMENT
    print(o, r)

    if d and t<TIME_LIMIT-1:
        print("Task completed in", t, "time steps")
        reset_flag = 1
        break
else:
    print("Time limit exceeded. Try again.")

if reset_flag>0:
  env.reset()

  if sys.path[0] == '':


[990.2  -9.8  -9.8] 1086.24
[970.6 -19.6  -9.8] 1354.7600000000002
[960.8  -9.8   9.8] 1056.8400000000001
[960.8   0.    9.8] 960.8000000000001
[970.6   9.8   9.8] 1066.64
[970.6   0.   -9.8] 970.6
[980.4   9.8   9.8] 1076.44
[1000.    19.6    9.8] 1384.16
[1029.4   29.4    9.8] 1893.7600000000002
[1049.    19.6   -9.8] 1433.16
[1078.4   29.4    9.8] 1942.7600000000002
[1098.    19.6   -9.8] 1482.16
[1107.8    9.8   -9.8] 1203.84
[1107.8    0.    -9.8] 1107.8
[1098.    -9.8   -9.8] 1194.04
[1078.4  -19.6   -9.8] 1462.5600000000002
[1049.   -29.4   -9.8] 1913.3600000000001
[1029.4  -19.6    9.8] 1413.5600000000002
[1000.   -29.4   -9.8] 1864.3600000000001
[960.8 -39.2  -9.8] 2497.4400000000005
[931.4 -29.4   9.8] 1795.7600000000002
[892.2 -39.2  -9.8] 2428.84
[843.2 -49.   -9.8] 3244.2
[784.4 -58.8  -9.8] 4241.84
[735.4 -49.    9.8] 3136.4
[696.2 -39.2   9.8] 2232.84
[666.8 -29.4   9.8] 1531.1600000000003
[627.6 -39.2  -9.8] 2164.2400000000002
[578.6 -49.   -9.8] 2979.6
[539.4 -39.2   9

## Video

In [6]:
# OBSERVE VIDEO

for f in env.videos:
    video = io.open(f[0], 'r+b').read()
    encoded = base64.b64encode(video)

    display.display(display.HTML(data="""
        <video alt="test" controls>
        <source src="data:video/mp4;base64,{0}" type="video/mp4" />
        </video>
        """.format(encoded.decode('ascii'))))