<a href="https://colab.research.google.com/github/saishreddyk/Practical-RL/blob/main/Rendering_OpenAi_Gym_in_Colaboratory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# What happens in the below cell
  - Install original gym dependencies
  - Install extra other atari games dependencies
  - Import and helper functions
  - Utility functions `show_video() and wrap_env()`
### Usage
  ```
  env = wrap_env(gym.make("MsPacman-v0"))
  show_video()
  ```


In [None]:
#@title RUN THIS
#remove " > /dev/null 2>&1" to see what is going on under the hood

print("installing gym")
!pip install 'gym[all]' pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
print("gym install successfully")
print("installing atari dependencies")
!apt-get update > /dev/null 2>&1
!apt-get install cmake > /dev/null 2>&1
!pip install --upgrade setuptools 2>&1
!pip install ez_setup > /dev/null 2>&1
!pip install gym[atari] > /dev/null 2>&1
print("Atari games dependencies installed successfully")
print("Importing necessary modules")
import gym
from gym import logger as gymlogger
from gym.wrappers import Monitor
gymlogger.set_level(40) #error only
import tensorflow as tf
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import glob
import io
import base64
from IPython.display import HTML

from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()
print("""
Utility functions to enable video recording of gym environment and displaying it
To enable video, just do "env = wrap_env(env)
"""
)
def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

In [None]:
import gym
import numpy as np

# Load Environment and structure of Q-table
env = wrap_env(gym.make('FrozenLake8x8-v0'))
Q = np.zeros([env.observation_space.n, env.action_space.n]) # .n gives no of states and actions
print(Q.shape)

In [None]:
# Defining hyperparameters of Q-Learning
lr = 0.628  # learning rate (alpha)
gamma = 0.9 # discount factor
epochs = 5000 # also episodes
rev_list = [] # reward storage

In [None]:
from time import sleep
for i in range(epochs):
    print("Running episode {}".format(i))
    # sleep(1)
    s = env.reset() # first, we gotta reset the environment
    rAll = 0
    j = 0
    d = False
    while True:
      j+=1
      print("Running {} iteration of {} episode".format(j, i))
      env.render()
      # choose a max action from Q table
      a = np.argmax(Q[s,:] + np.random.randn(1, env.action_space.n)*(1./(i+1)))
      # get new state and reward from environment
      s1, r, d, _  = env.step(a) # returns observation, reward, if_done?, dict(for info)
      # now update q table
      Q[s, a] = Q[s, a] + lr*(r + gamma*np.argmax(Q[s1,:]) - Q[s, a])
      rAll += r
      s = s1
      if d == True:
          break
    rev_list.append(rAll)
    env.render()

print("Reward sum on all episodes "+ str(sum(rev_list)/epochs))
print("Final Values of Q-Table")
print(Q)


In [None]:
show_video()

In [4]:
!pwd

/content
