## Basic Setups to use OpenAI Gym with Colab

# 0. Connect google drive with colab to import python modules

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks/rl-master-old/day1
!ls

/content/drive/MyDrive/Colab Notebooks/rl-master-old/day1
setup.ipynb  tabular_mdp  torch_test.py  video


# 1. Introduction to Gym environment

## 1.1 Prerequisites

In [None]:
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!apt-get update > /dev/null 2>&1
!apt-get install cmake > /dev/null 2>&1
!pip install --upgrade setuptools 2>&1
!pip install ez_setup > /dev/null 2>&1



## 1.2 Functions to render Gym in Colab. 

Referenced from [here](https://colab.research.google.com/drive/1flu31ulJlgiRL1dnN2ir8wGh9p7Zij2t).

In [None]:
import gym
from gym.wrappers import Monitor
import glob
import io
import base64
from IPython.display import HTML
from pyvirtualdisplay import Display
from IPython import display as ipythondisplay

display = Display(visible=0, size=(1400, 900))
display.start()

"""
Utility functions to enable video recording of gym environment 
and displaying it.
To enable video, just do "env = wrap_env(env)""
"""
def show_video():
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
      mp4 = mp4list[0]
      video = io.open(mp4, 'r+b').read()
      encoded = base64.b64encode(video)
      ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                  loop controls style="height: 400px;">
                  <source src="data:video/mp4;base64,{0}" type="video/mp4" />
              </video>'''.format(encoded.decode('ascii'))))
    else: 
        print("Could not find video")
    

def wrap_env(env):
    env = Monitor(env, './video', force=True)
    return env

Now, we can render Gtm environment!

If you run in jupyter, turn 

```
colab = False
```

In [None]:
colab = True

In [None]:
import gym

env = gym.make("Acrobot-v1")
if colab:
    env = wrap_env(env)
observation = env.reset()

while True:
    env.render()

    # Randomly sample action from environment
    action = env.action_space.sample() 
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;
            
env.close()

if colab:
    show_video()

In [None]:
# Box2d?
!pip3 install box2d-py
!pip3 install gym[Box_2D]



In [None]:
import gym

env = gym.make("BipedalWalker-v3")
#env = gym.make("LunarLander-v2")
if colab:
    env = wrap_env(env)
observation = env.reset()

for t in range(1000):
    env.render()

    # Randomly sample action from environment
    action = env.action_space.sample() 
    
    # Simulate 1 step
    observation, reward, done, info = env.step(action) 
   
    # done is used to check terminal condition    
    if done: 
      break;
            
env.close()

if colab:
    show_video()



#2. Torch gradient calculation test

In [None]:
import torch

x = torch.randn((5, 5), requires_grad=True)
y = torch.randn((5, 5), requires_grad=True)
z = torch.randn((5, 5), requires_grad=True)

v = (x + y) * z
w = torch.sum(v)

# w = (x + y) * z
w.backward()

# dw / dx  = z
print(z)
print(x.grad)
print('')

# dw / dz  = x + y
print(x+y)
print(z.grad)

tensor([[ 0.3941, -1.4770,  0.7659,  0.2390,  0.2213],
        [-0.2182,  0.7638, -0.2892,  0.7334, -0.0900],
        [ 0.8338,  0.0134, -0.4332,  0.8654, -0.6400],
        [-0.1142, -1.5778, -0.5357, -0.7042,  1.2282],
        [-1.5207,  1.8237, -0.3489, -1.2028,  1.7736]], requires_grad=True)
tensor([[ 0.3941, -1.4770,  0.7659,  0.2390,  0.2213],
        [-0.2182,  0.7638, -0.2892,  0.7334, -0.0900],
        [ 0.8338,  0.0134, -0.4332,  0.8654, -0.6400],
        [-0.1142, -1.5778, -0.5357, -0.7042,  1.2282],
        [-1.5207,  1.8237, -0.3489, -1.2028,  1.7736]])

tensor([[-8.6737e-01, -1.9747e+00,  2.9278e-04,  2.5176e+00,  1.2650e-01],
        [-5.9878e-01, -6.3168e-01,  1.3758e+00, -1.3433e+00,  5.1919e-01],
        [ 7.0360e-01,  3.6848e-01, -1.5111e-01, -3.4855e-02,  2.3357e+00],
        [-1.1199e+00, -2.2225e-01, -3.6831e-01,  3.3537e+00, -1.5967e+00],
        [ 1.1003e+00, -1.3478e+00,  5.5436e-01, -5.0382e-01, -5.4302e-01]],
       grad_fn=<AddBackward0>)
tensor([[-8.6737e-01