# Installing Packages


In [1]:
%%sh
# gym
pip install gym pyvirtualdisplay
apt-get install -y xvfb python-opengl ffmpeg
pip3 install box2d
git clone https://github.com/katipber/hillclimber.git

# dash
pip install -q dash
pip install -q dash_core_components
pip install -q dash_html_components
pip install -q dash_table
pip install -q dash-bootstrap-components

# get ngrok
curl -O https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
unzip ngrok-stable-linux-amd64.zip

Collecting pyvirtualdisplay
  Downloading https://files.pythonhosted.org/packages/d0/8a/643043cc70791367bee2d19eb20e00ed1a246ac48e5dbe57bbbcc8be40a9/PyVirtualDisplay-1.3.2-py2.py3-none-any.whl
Collecting EasyProcess
  Downloading https://files.pythonhosted.org/packages/48/3c/75573613641c90c6d094059ac28adb748560d99bd27ee6f80cce398f404e/EasyProcess-0.3-py2.py3-none-any.whl
Installing collected packages: EasyProcess, pyvirtualdisplay
Successfully installed EasyProcess-0.3 pyvirtualdisplay-1.3.2
Reading package lists...
Building dependency tree...
Reading state information...
ffmpeg is already the newest version (7:3.4.8-0ubuntu0.2).
The following package was automatically installed and is no longer required:
  libnvidia-common-440
Use 'apt autoremove' to remove it.
Suggested packages:
  libgle3
The following NEW packages will be installed:
  python-opengl xvfb
0 upgraded, 2 newly installed, 0 to remove and 35 not upgraded.
Need to get 1,280 kB of archives.
After this operation, 7,682 kB o

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100 13.1M  100 13.1M    0     0  20.8M      0 --:--:-- --:--:-- --:--:-- 20.8M


In [2]:
import sys
sys.path.append('./hillclimber')

import utils
import pandas as pd
from agent import HillClimber
from IPython.display import clear_output

# Selecting the Environment

List available environments.

In [3]:
print(utils.env_list())

CODE       NAME                 LINK
CP         CartPole-v1          https://gym.openai.com/envs/CartPole-v1/
MC         MountainCar-v0       https://gym.openai.com/envs/MountainCar-v0/
LL         LunarLander-v2       https://gym.openai.com/envs/LunarLander-v2/




* CODE: The code we will use to instantiate our environment.
* NAME: Name of the environment.
* LINK: Link to environment page for more details.

Select your environment by using its code.

In [4]:
code = 'CP' # <-- Change this to your environment code.

env = utils.get_env(code)

# Create an Agent


In [5]:
agent = HillClimber(env.model, file_name=code)

# Watch Agent

In [6]:
agent.test = True
env.record = True

obs = env.reset()

score = 0
while True:
    action = agent.eval(obs)
    obs, reward, done, info = env.step(action)
    score += reward
    if done:
        break

agent.update_score(score)

env.record = False
env.play_video()

  keepdims=keepdims)
  ret = ret.dtype.type(ret / rcount)


# "Train" Agent

In [7]:
agent.test = False
env.record = False

df = pd.DataFrame(columns=agent.log_header)

while True:
    obs = env.reset()

    score = 0
    while True:
        action = agent.eval(obs)
        obs, reward, done, info = env.step(action)
        score += reward
        if done:
            break

    log = agent.update_score(score)
    log.columns = agent.log_header
    df = df.append(log, ignore_index=True)
    df = df.tail(10)

    clear_output(wait=True)
    with pd.option_context('display.float_format', '{:12.2f}'.format):
        print(df[['node_score', 'seed_mean', 'best_mean']].to_string(index=False))


    if df.best_mean.iloc[-1] >= env.target:
        print(f"\n\nAgent's best mean score is {df.best_mean.iloc[-1]:.2f}")
        break

  node_score    seed_mean    best_mean
      500.00       391.65       391.65
      500.00       391.65       391.65
      500.00       391.65       391.65
      500.00       391.65       391.65
      500.00       391.65       391.65
      500.00       391.65       391.65
      500.00       391.65       391.65
      500.00       391.65       391.65
      500.00       391.65       391.65
      181.00       500.00       500.00


Agent's best mean score is 500.00


# Test Agent

In [8]:
agent.test = True
env.record = False

df = pd.DataFrame(columns=agent.log_header)

for _ in range(100):
    obs = env.reset()

    score = 0
    while True:
        action = agent.eval(obs)
        obs, reward, done, info = env.step(action)
        score += reward
        if done:
            break

    log = agent.update_score(score)
    log.columns = agent.log_header
    df = df.append(log, ignore_index=True)
    df = df.tail(10)

    clear_output(wait=True)
    with pd.option_context('display.float_format', '{:12.2f}'.format):
        print(df[['node_score', 'seed_mean', 'best_mean']].to_string(index=False))



print(f"\n\nAgent's mean score in the last 100 games is {agent.best.mean:.2f}")
print('\Tip: You can go back and watch your agent again to see the improvement.')

  node_score    seed_mean    best_mean
      500.00       479.81       479.81
      500.00       479.81       479.81
      500.00       479.81       479.81
      500.00       479.81       479.81
      500.00       479.81       479.81
      500.00       479.81       479.81
      500.00       479.81       479.81
      500.00       479.81       479.81
      500.00       479.81       479.81
      500.00       479.81       479.81


Agent's mean score in the last 100 games is 479.81

Hint: You can go back and watch your agent again to see the improvement.


# Save / Load Model

In [9]:
agent.save_model('my_model')

# agent.load_model('my_model')

# Visualization

Since ngrok provides limited connection with the free plan, visualization is not working very well here. It is better to run 'graph.py' locally.

Use the link ending with 'ngrok.io' after executing next 2 cells.

In [10]:
# launch ngrok
get_ipython().system_raw('./ngrok http 8050 &')

# get url with ngrok
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

http://cceaabf44f9c.ngrok.io


In [11]:
! python graph.py

Dash is running on http://127.0.0.1:8050/

 in production, use a production WSGI server like gunicorn instead.

 * Serving Flask app "graph" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


<!--  -->