In [None]:
!pip install gym
!apt-get install python-opengl -y
!apt install xvfb -y

# Special gym environment
!pip install gym[atari]

# For rendering environment, you can use pyvirtualdisplay.
!pip install pyvirtualdisplay
!pip install piglet

# To activate virtual display 
# need to run a script once for training an agent as follows
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

# This code creates a virtual display to draw game images on. 
# If you are running locally, just ignore it
import os
if type(os.environ.get("DISPLAY")) is not str or len(os.environ.get("DISPLAY"))==0:
    !bash ../xvfb start
    %env DISPLAY=:1

#
# Import libraries
#
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) # error only
import tensorflow as tf
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import glob
import io
import base64
from IPython.display import HTML

from IPython import display as ipythondisplay

"""
Utility functions to enable video recording of gym environment and displaying it
To enable video, just do "env = wrap_env(env)""
"""

def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

Reading package lists... Done
Building dependency tree       
Reading state information... Done
python-opengl is already the newest version (3.1.0+dfsg-1).
0 upgraded, 0 newly installed, 0 to remove and 10 not upgraded.
Reading package lists... Done
Building dependency tree       
Reading state information... Done
xvfb is already the newest version (2:1.19.6-1ubuntu4.2).
0 upgraded, 0 newly installed, 0 to remove and 10 not upgraded.


In [None]:
# Install spinningup on CoLab
!git clone https://github.com/openai/spinningup.git
!cd spinningup
#!pip install -e . # this will incur error: File "setup.py" not found. Directory cannot be installed in editable mode: /content
!pip install -e spinningup

# OpenAI Gym

OpenAI gym is a python library that wraps many classical decision problems including robot control, videogames and board games. We will use the environments it provides to test our algorithms on interesting decision problems .

## [Code Format]

All implementations in Spinning Up adhere to a standard template. They are split into two files: 
- An algorithm file, which contains the core logic of the algorithm. 
  - A class definition for an experience buffer object (for storing information from agent-environment interactions).
  - A single function which runs the algorithm. (core)
    - Logger setup
    - Random seed setting
    - Environment inistantiation
    - Making placehodlers from the computation graph
    - Building the actor-critic computation graph via the actor_critic function passed to the algorithm function as an argument
    - Instantiating the experience buffer
    - Building the computation graph for loss functions and diagnostics specific to the algorithm
    - Making training ops
    - Making the TF Session and initiali
    - Setting up model saving through the logger
    - Defining functions needed for running the main loop of the algorithm (eg the core update function, get action function, and test agent function, depending on the algorithm)
    - Running the main loop of the algorithm:
      - Run the agent in the environment
      - Periodically update the parameters of the agent according to the main equations of the algorithm
      - Log key performance metrics and save agent
  - Some support for directly running the algorithm in Gym environment from the common line. 

- A core file, which contains various utilities needed to run the algorithm.
  - Functions related to making and managing placeholders
  - Functions for building sections of computation graph relevant to the actor_critic method for a particular algorithm 
  - Any other useful functions
  - Implementations for an MLP actor-critic compatible with the algorithm, where both the policy and the value function(s) are represented by simple MLPs


## Example: LunarLander-v2

Train PPO in spinup and show resulting policy in a video. 

### Train PPO in spinup

In [None]:
# load packages
import gym
from spinup import ppo
import tensorflow as tf

# after training, load policy and show results in video
from spinup.utils.test_policy import load_policy, run_policy

# train policy
env_fn = lambda : gym.make('LunarLander-v2')

ac_kwargs = dict(hidden_sizes=[64,64], activation=tf.nn.relu)

logger_kwargs = dict(output_dir='path/to/output_dir', exp_name='experiment_name')




In [None]:
ppo(env_fn=env_fn, ac_kwargs=ac_kwargs, steps_per_epoch=5000, epochs=250, logger_kwargs=logger_kwargs)

In [None]:
# Show policy
_, get_action = load_policy('path/to/output_dir')
env2 = gym.make('LunarLander-v2')
env3 = wrap_env(env2)
run_policy(env3, get_action, max_ep_len=500, num_episodes=10)
env3.close()
show_video() 

INFO:tensorflow:Restoring parameters from path/to/output_dir/simple_save/variables/variables
Using default action op.
[32;1mLogging data to /tmp/experiments/1552146854/progress.txt[0m
Episode 0 	 EpRet 285.614 	 EpLen 145
Episode 1 	 EpRet 290.872 	 EpLen 210
Episode 2 	 EpRet 286.897 	 EpLen 127
Episode 3 	 EpRet 307.363 	 EpLen 224
Episode 4 	 EpRet 287.512 	 EpLen 207
Episode 5 	 EpRet 286.895 	 EpLen 195
Episode 6 	 EpRet 291.545 	 EpLen 194
Episode 7 	 EpRet 287.070 	 EpLen 205
Episode 8 	 EpRet 278.756 	 EpLen 156
Episode 9 	 EpRet 296.767 	 EpLen 227
-------------------------------------
|    AverageEpRet |             290 |
|        StdEpRet |            7.28 |
|        MaxEpRet |             307 |
|        MinEpRet |             279 |
|           EpLen |             189 |
-------------------------------------
