In [1]:
!apt-get install python-opengl -y
!apt install xvfb -y

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following package was automatically installed and is no longer required:
  libnvidia-common-410
Use 'apt autoremove' to remove it.
Suggested packages:
  libgle3
The following NEW packages will be installed:
  python-opengl
0 upgraded, 1 newly installed, 0 to remove and 8 not upgraded.
Need to get 496 kB of archives.
After this operation, 5,416 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 python-opengl all 3.1.0+dfsg-1 [496 kB]
Fetched 496 kB in 1s (697 kB/s)
Selecting previously unselected package python-opengl.
(Reading database ... 130912 files and directories currently installed.)
Preparing to unpack .../python-opengl_3.1.0+dfsg-1_all.deb ...
Unpacking python-opengl (3.1.0+dfsg-1) ...
Setting up python-opengl (3.1.0+dfsg-1) ...
Reading package lists... Done
Building dependency tree       
Reading state information... Done
The f

In [2]:
!pip3 install pyvirtualdisplay
!pip3 install piglet

Collecting pyvirtualdisplay
  Downloading https://files.pythonhosted.org/packages/68/6b/4bc5678b5219edba7190ae45a7e3d02b03dc7d20ffae84047c7494b355c3/PyVirtualDisplay-0.2.3-py2.py3-none-any.whl
Collecting EasyProcess (from pyvirtualdisplay)
  Downloading https://files.pythonhosted.org/packages/fa/29/40040d1d64a224a5e44df9572794a66494618ffe5c77199214aeceedb8a7/EasyProcess-0.2.7-py2.py3-none-any.whl
Installing collected packages: EasyProcess, pyvirtualdisplay
Successfully installed EasyProcess-0.2.7 pyvirtualdisplay-0.2.3
Collecting piglet
[?25l  Downloading https://files.pythonhosted.org/packages/86/f6/ef278239ebe525466ea51a7dd9d6d3211d197ac4b4abc76e17cdd419f69c/piglet-0.4.4.tar.gz (52kB)
[K     |████████████████████████████████| 61kB 3.0MB/s 
[?25hCollecting Parsley (from piglet)
[?25l  Downloading https://files.pythonhosted.org/packages/2b/d6/4fed8d65e28a970e1c5cb33ce9c7e22e3de745e1b2ae37af051ef16aea3b/Parsley-1.3-py2.py3-none-any.whl (88kB)
[K     |███████████████████████████████

In [3]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(640, 480))
display.start()

xdpyinfo was not found, X start can not be checked! Please install xdpyinfo!


<Display cmd_param=['Xvfb', '-br', '-nolisten', 'tcp', '-screen', '0', '640x480x24', ':1001'] cmd=['Xvfb', '-br', '-nolisten', 'tcp', '-screen', '0', '640x480x24', ':1001'] oserror=None return_code=None stdout="None" stderr="None" timeout_happened=False>

In [4]:
!pip3 install tensorboard-pytorch

Collecting tensorboard-pytorch
[?25l  Downloading https://files.pythonhosted.org/packages/6e/d6/b8540153f69a8720b2f032fe8c7504ee66c8c0bce9103c272bd67c8e8c77/tensorboard_pytorch-0.7.1-py2.py3-none-any.whl (72kB)
[K     |████████████████████████████████| 81kB 3.9MB/s 
Installing collected packages: tensorboard-pytorch
Successfully installed tensorboard-pytorch-0.7.1


In [0]:
import gym
from collections import namedtuple
import numpy as np
from tensorboardX import SummaryWriter

import torch
import torch.nn as nn
import torch.optim as optim


HIDDEN_SIZE = 128
BATCH_SIZE = 16
PERCENTILE = 70

In [0]:
class Net(nn.Module):
    def __init__(self, obs_size, hidden_size, n_actions):
        super(Net, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(obs_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, n_actions)
        )

    def forward(self, x):
        return self.net(x)


In [0]:
Episode = namedtuple('Episode', field_names=['reward', 'steps'])
EpisodeStep = namedtuple('EpisodeStep', field_names=['observation', 'action'])

In [0]:
def iterate_batches(env, net, batch_size):
    batch = []
    episode_reward = 0.0
    episode_steps = []
    obs = env.reset()
    sm = nn.Softmax(dim=1)
    while True:
        obs_v = torch.FloatTensor([obs])
        act_probs_v = sm(net(obs_v))
        act_probs = act_probs_v.data.numpy()[0]
        action = np.random.choice(len(act_probs), p=act_probs)
        next_obs, reward, is_done, _ = env.step(action)
        episode_reward += reward
        episode_steps.append(EpisodeStep(observation=obs, action=action))
        if is_done:
            batch.append(Episode(reward=episode_reward, steps=episode_steps))
            episode_reward = 0.0
            episode_steps = []
            next_obs = env.reset()
            if len(batch) == batch_size:
                yield batch
                batch = []
        obs = next_obs

In [0]:
def filter_batch(batch, percentile):
    rewards = list(map(lambda s: s.reward, batch))
    reward_bound = np.percentile(rewards, percentile)
    reward_mean = float(np.mean(rewards))

    train_obs = []
    train_act = []
    for example in batch:
        if example.reward < reward_bound:
            continue
        train_obs.extend(map(lambda step: step.observation, example.steps))
        train_act.extend(map(lambda step: step.action, example.steps))

    train_obs_v = torch.FloatTensor(train_obs)
    train_act_v = torch.LongTensor(train_act)
    return train_obs_v, train_act_v, reward_bound, reward_mean

In [18]:
env = gym.make("CartPole-v0")
env = gym.wrappers.Monitor(env, directory="mon", force=True)
obs_size = env.observation_space.shape[0]
n_actions = env.action_space.n

net = Net(obs_size, HIDDEN_SIZE, n_actions)
objective = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=net.parameters(), lr=0.01)
writer = SummaryWriter(comment="-cartpole")

for iter_no, batch in enumerate(iterate_batches(env, net, BATCH_SIZE)):
  obs_v, acts_v, reward_b, reward_m = filter_batch(batch, PERCENTILE)
  optimizer.zero_grad()
  action_scores_v = net(obs_v)
  loss_v = objective(action_scores_v, acts_v)
  loss_v.backward()
  optimizer.step()
  print("%d: loss=%.3f, reward_mean=%.1f, reward_bound=%.1f" % (
      iter_no, loss_v.item(), reward_m, reward_b))
  writer.add_scalar("loss", loss_v.item(), iter_no)
  writer.add_scalar("reward_bound", reward_b, iter_no)
  writer.add_scalar("reward_mean", reward_m, iter_no)
  if reward_m > 199:
    print("Solved!")
    break
writer.close()

0: loss=0.697, reward_mean=21.6, reward_bound=21.5
1: loss=0.678, reward_mean=22.0, reward_bound=23.5
2: loss=0.673, reward_mean=34.9, reward_bound=32.0
3: loss=0.652, reward_mean=31.7, reward_bound=34.5
4: loss=0.640, reward_mean=35.6, reward_bound=41.0
5: loss=0.651, reward_mean=40.4, reward_bound=44.5
6: loss=0.640, reward_mean=36.6, reward_bound=39.0
7: loss=0.625, reward_mean=39.6, reward_bound=36.0
8: loss=0.630, reward_mean=43.2, reward_bound=47.0
9: loss=0.628, reward_mean=38.2, reward_bound=42.5
10: loss=0.620, reward_mean=48.7, reward_bound=53.5
11: loss=0.619, reward_mean=39.8, reward_bound=42.0
12: loss=0.604, reward_mean=53.3, reward_bound=60.5
13: loss=0.610, reward_mean=67.5, reward_bound=88.5
14: loss=0.603, reward_mean=52.2, reward_bound=60.5
15: loss=0.609, reward_mean=59.3, reward_bound=67.0
16: loss=0.598, reward_mean=64.4, reward_bound=70.5
17: loss=0.600, reward_mean=64.8, reward_bound=72.5
18: loss=0.614, reward_mean=53.7, reward_bound=62.5
19: loss=0.581, reward

In [12]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip

--2019-06-10 07:00:38--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 34.195.49.195, 52.72.145.109, 34.206.9.96, ...
Connecting to bin.equinox.io (bin.equinox.io)|34.195.49.195|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16648024 (16M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip’


2019-06-10 07:00:39 (35.3 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [16648024/16648024]



In [13]:
!unzip ngrok-stable-linux-amd64.zip

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   


In [0]:
LOG_DIR = './runs'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

In [0]:
get_ipython().system_raw('./ngrok http 6006 &')

In [16]:
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

https://41c1c593.ngrok.io


In [0]:
import glob
import io
import base64
from IPython.display import HTML

from IPython import display as ipythondisplay

"""
Utility functions to enable video recording of gym environment and displaying it
To enable video, just do "env = wrap_env(env)""
"""

def show_video():
  mp4list = glob.glob('mon/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")

In [34]:
show_video()