### Model-Free Reinforcement Learning

In [1]:
import numpy as np
import pandas as pd
import tempfile
import pprint
import json
import sys
import gym

from gym import wrappers
from subprocess import check_output
from IPython.display import HTML

#### Q-Learning

In [2]:
def action_selection(state, Q, episode, n_episodes):
    epsilon = max(0, episode/n_episodes*2)
    if np.random.random() < epsilon:
        action = np.random.randint(len(Q[0]))
    else:
        action = np.argmax(Q[state])
    return action

In [3]:
def q_learning(env, alpha = 0.9, gamma = 0.9):
    nS = env.env.observation_space.n
    nA = env.env.action_space.n
    
    Q = np.random.random((nS, nA)) * 2.0
    n_episodes = 10000
    
    for episode in range(n_episodes//2, -n_episodes//2, -1):
        state = env.reset()
        done = False
        while not done:
            action = action_selection(state, Q, episode, n_episodes)
            nstate, reward, done, info = env.step(action)
            Q[state][action] += alpha * (reward + gamma * Q[nstate].max() * (not done) - Q[state][action])
            state = nstate
    return Q

In [4]:
mdir = tempfile.mkdtemp()
env = gym.make('FrozenLake-v0')
env = wrappers.Monitor(env, mdir, force=True)

Q = q_learning(env)

[2017-04-02 00:40:36,000] Making new env: FrozenLake-v0
[2017-04-02 00:40:36,013] Starting new video recorder writing to /tmp/tmpww4igizw/openaigym.video.0.379.video000000.json
[2017-04-02 00:40:36,015] Starting new video recorder writing to /tmp/tmpww4igizw/openaigym.video.0.379.video000001.json
[2017-04-02 00:40:36,023] Starting new video recorder writing to /tmp/tmpww4igizw/openaigym.video.0.379.video000008.json
[2017-04-02 00:40:36,040] Starting new video recorder writing to /tmp/tmpww4igizw/openaigym.video.0.379.video000027.json
[2017-04-02 00:40:36,060] Starting new video recorder writing to /tmp/tmpww4igizw/openaigym.video.0.379.video000064.json
[2017-04-02 00:40:36,096] Starting new video recorder writing to /tmp/tmpww4igizw/openaigym.video.0.379.video000125.json
[2017-04-02 00:40:36,135] Starting new video recorder writing to /tmp/tmpww4igizw/openaigym.video.0.379.video000216.json
[2017-04-02 00:40:36,181] Starting new video recorder writing to /tmp/tmpww4igizw/openaigym.video

In [5]:
videos = np.array(env.videos)
n_videos = 5

idxs = np.linspace(0, len(videos) - 1, n_videos).astype(int)
videos = videos[idxs,:]

In [6]:
urls = []
for i in range(n_videos):
    out = check_output(["asciinema", "upload", videos[i][0]])
    out = out.decode("utf-8").replace('\n', '').replace('\r', '')
    urls.append([out])
videos = np.concatenate((videos, urls), axis=1)

In [7]:
strm = ''
for video_path, meta_path, url in videos:

    with open(meta_path) as data_file:    
        meta = json.load(data_file)
    castid = url.split('/')[-1]
    html_tag = """
    <h2>{0}
    <script type="text/javascript" 
        src="https://asciinema.org/a/{1}.js" 
        id="asciicast-{1}" 
        async data-autoplay="true" data-size="big">
    </script>
    """
    strm += html_tag.format('Episode ' + str(meta['episode_id']),
                               castid)
HTML(data=strm)

In [8]:
V = np.max(Q, axis=1)
V

array([ 0.02811639,  0.0095337 ,  0.01137065,  0.01803616,  0.156215  ,
        1.09083691,  0.01094503,  1.93693217,  0.16369259,  0.53101062,
        0.15193081,  1.65656898,  1.66693547,  0.39044957,  0.46077878,
        1.93756701])

In [9]:
pi = np.argmax(Q, axis=1)
pi

array([1, 3, 1, 3, 0, 3, 2, 3, 3, 1, 0, 2, 0, 2, 1, 3])

In [10]:
env.close()

[2017-04-02 00:40:50,753] Finished writing results. You can upload them to the scoreboard via gym.upload('/tmp/tmpww4igizw')


In [11]:
gym.upload(mdir, api_key='<YOUR OPENAI API KEY>')

[2017-04-02 00:40:50,777] [FrozenLake-v0] Uploading 10000 episodes of training data
[2017-04-02 00:40:52,390] [FrozenLake-v0] Uploading videos of 19 training episodes (1810 bytes)
[2017-04-02 00:40:52,639] [FrozenLake-v0] Creating evaluation object from /tmp/tmpww4igizw with learning curve and training video
[2017-04-02 00:40:52,874] 
****************************************************
You successfully uploaded your evaluation on FrozenLake-v0 to
OpenAI Gym! You can find it at:

    https://gym.openai.com/evaluations/eval_hGg7u5NwS1a35elvMZC0Q

****************************************************
