# Installing Packages


In [None]:
%%sh
# gym
pip install gym pyvirtualdisplay
apt-get install -y xvfb python-opengl ffmpeg
pip3 install box2d
git clone https://github.com/katipber/hillclimber.git

# dash
pip install -q dash
pip install -q dash_core_components
pip install -q dash_html_components
pip install -q dash_table
pip install -q dash-bootstrap-components

# get ngrok
curl -O https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
unzip ngrok-stable-linux-amd64.zip

In [None]:
import sys
sys.path.append('./hillclimber')

import utils
import pandas as pd
from agent import HillClimber
from IPython.display import clear_output

# Selecting the Environment

List available environments.

In [None]:
print(utils.env_list())



* CODE: The code we will use to instantiate our environment.
* NAME: Name of the environment.
* LINK: Link to environment page for more details.

Select your environment by using its code.

In [None]:
code = 'CP' # <-- Change this to your environment code.

env = utils.get_env(code)

# Create an Agent


In [None]:
agent = HillClimber(env.model, file_name=code)

# Watch Agent

In [None]:
agent.test = True
env.record = True

obs = env.reset()

score = 0
while True:
    action = agent.eval(obs)
    obs, reward, done, info = env.step(action)
    score += reward
    if done:
        break

agent.update_score(score)

env.record = False
env.play_video()

# "Train" Agent

In [None]:
agent.test = False
env.record = False

df = pd.DataFrame(columns=agent.log_header)

while True:
    obs = env.reset()

    score = 0
    while True:
        action = agent.eval(obs)
        obs, reward, done, info = env.step(action)
        score += reward
        if done:
            break

    log = agent.update_score(score)
    log.columns = agent.log_header
    df = df.append(log, ignore_index=True)
    df = df.tail(10)

    clear_output(wait=True)
    with pd.option_context('display.float_format', '{:12.2f}'.format):
        print(df[['node_score', 'seed_mean', 'best_mean']].to_string(index=False))


    if df.best_mean.iloc[-1] >= env.target:
        print(f"\n\nAgent's best mean score is {df.best_mean.iloc[-1]:.2f}")
        break

# Test Agent

In [None]:
agent.test = True
env.record = False

df = pd.DataFrame(columns=agent.log_header)

for _ in range(100):
    obs = env.reset()

    score = 0
    while True:
        action = agent.eval(obs)
        obs, reward, done, info = env.step(action)
        score += reward
        if done:
            break

    log = agent.update_score(score)
    log.columns = agent.log_header
    df = df.append(log, ignore_index=True)
    df = df.tail(10)

    clear_output(wait=True)
    with pd.option_context('display.float_format', '{:12.2f}'.format):
        print(df[['node_score', 'seed_mean', 'best_mean']].to_string(index=False))



print(f"\n\nAgent's mean score in the last 100 games is {agent.best.mean:.2f}")
print('Tip: You can go back and watch your agent again to see the improvement.')

# Save / Load Model

In [None]:
agent.save_model('my_model')

# agent.load_model('my_model')

# Visualization

Since ngrok provides limited connection with the free plan, visualization is not working very well here. It is better to run 'graph.py' locally.

Use the link ending with 'ngrok.io' after executing next 2 cells.

In [None]:
# launch ngrok
get_ipython().system_raw('./ngrok http 8050 &')

# get url with ngrok
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

In [None]:
! python hillclimber/graph.py

<!--  -->