In [2]:
import numpy as np
import gym
import ppaquette_gym_doom
from ppaquette_gym_doom.wrappers import SetResolution, ToDiscrete
from gym.wrappers import SkipWrapper
from gym import wrappers

# (see https://github.com/ppaquette/gym-doom/blob/master/ppaquette_gym_doom/doom_basic.py)
def create_env(seed=None, monitory_directory=None):
    env_spec = gym.spec('ppaquette/DoomBasic-v0')
    env_spec.id = 'DoomBasic-v0'
    env = env_spec.make()

    if monitory_directory is not None:
        env = wrappers.Monitor(
            env, directory=monitory_directory, force=True, mode='training',
            video_callable=lambda episode_id: episode_id % 100 == 0)

    if seed is not None:
        env.seed(seed)

    return SetResolution('200x150')(
        SkipWrapper(repeat_count=4)(
        ToDiscrete('minimal')(env)))

env = create_env()
WIDTH, HEIGHT = env.screen_width, env.screen_height

NOOP, SHOOT, RIGHT, LEFT = 0, 1, 2, 3

## Collecting experiences

In [3]:
from collections import namedtuple
import operator

SARE = namedtuple('SAR', ['state', 'action', 'reward', 'end'])


def generate_sares(env, agent, episode_count=100):
    reward = 0
    done = False

    for i in range(episode_count):
        observation = env.reset()
        while True:
            action = agent.act(observation, reward, done)
            new_observation, reward, done, _ = env.step(action)
            yield SARE(observation, action, reward, done)
            
            if done:
                break
            else:
                observation = new_observation

def episode_sares(env, agent, episode_count=100):
    sares = list(generate_sares(env, agent, episode_count))
    print('average reward per episode = {}'.format(
        sum(r for _, _, r, _ in sares) / float(sum(e for _, _, _, e in sares))))
    return sares

        
def to_experiences(sares, only_n_misses=100):
    experiences = [
        (previous_s, a, r, next_s, end)
        for (previous_s, a, r, end), (next_s, _, _, _) in zip(sares[:-1], sares[1:])
    ]

    # simplistic experience prioritization
    shuffled_exps = experiences if only_n_misses is None\
        else random.choices(experiences, k=only_n_misses) + [e for e in experiences if e[2] > 0]
    random.shuffle(shuffled_exps)

    prev_frames, actions, rewards, next_frames, is_ends = zip(*shuffled_exps)
    prev_frames = np.asarray(prev_frames)
    next_frames = np.asarray(next_frames)
    actions = np.asarray(actions)
    rewards = np.asarray(rewards)
    is_ends = np.asarray(is_ends)
    
    print('Training on {}/{} positive/total out of {} 1-step experiences with actions distribution {}'.format(
        np.sum(rewards>=0),
        len(rewards),
        len(experiences),
        np.bincount(actions)))
    
    return (prev_frames, next_frames, actions, rewards, is_ends)

## Deep Q-learning

In [4]:
import tensorflow as tf
from keras import backend as K

from keras.layers import Dense, Convolution2D, Flatten, Activation
from keras.models import Sequential
from keras.optimizers import Adam

sess = tf.InteractiveSession()
K.set_session(sess)

def create_q_model(conv1_weights=None, conv2_weights=None, dense1_weights=None, dense2_weights=None):
    model = Sequential()

    model.add(
        Convolution2D(nb_filter=2, nb_row=6, nb_col=6, subsample=(1, 1), border_mode='valid', weights=conv1_weights,
            input_shape=[HEIGHT, WIDTH, 3], dim_ordering='tf'))
    model.add(Activation('relu'))
    model.add(
        Convolution2D(nb_filter=4, nb_row=2, nb_col=2, subsample=(1, 1), weights=conv2_weights))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(64, init='normal', weights=dense1_weights))
    model.add(Activation('relu'))
    model.add(Dense(4, init='normal', weights=dense2_weights))
    model.compile(loss='mse', optimizer=Adam())
    
    return model

acting_model = create_q_model()
target_model = create_q_model()

def copy_model(model):
    conv1_weights =  [w.eval() for w in model.layers[0].weights]
    conv2_weights = [w.eval() for w in model.layers[2].weights]
    dense1_weights = [w.eval() for w in model.layers[5].weights]
    dense2_weights = [w.eval() for w in model.layers[7].weights]
    return create_q_model(conv1_weights, conv2_weights, dense1_weights, dense2_weights)

acting_model.summary()

Using TensorFlow backend.


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
convolution2d_1 (Convolution2D)  (None, 145, 195, 2)   218         convolution2d_input_1[0][0]      
____________________________________________________________________________________________________
activation_1 (Activation)        (None, 145, 195, 2)   0           convolution2d_1[0][0]            
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 4, 194, 1)     2324        activation_1[0][0]               
____________________________________________________________________________________________________
activation_2 (Activation)        (None, 4, 194, 1)     0           convolution2d_2[0][0]            
___________________________________________________________________________________________

In [5]:
def sares_to_input_targets(model, sares, gamma = .99, reward_clip=5, only_n_misses=100):
    
    prev_frames, next_frames, actions, rewards, is_ends = to_experiences(sares, only_n_misses)
    
    n_samples = len(actions)
    clipped_rewards = np.clip(rewards, -np.inf, reward_clip)
    
    # Transcription of the Q-learning target formula
    targets = clipped_rewards + gamma * (1 - is_ends) * model.predict(next_frames).max(axis=1)

    target_action_rewards = model.predict(prev_frames)
    target_action_rewards[np.arange(n_samples), actions] = targets

    return prev_frames, target_action_rewards

## Training

In [6]:
import random

class EpsilonGreedyQAgent(object):
    def __init__(self, model, epsilon=.1):
        self.model = model
        self.epsilon = epsilon

    def act(self, observation, reward, done):
        if random.uniform(0, 1) <= self.epsilon:
            return random.choice([NOOP, SHOOT, LEFT, RIGHT])
        else:
            return self.model.predict(observation[np.newaxis])[0].argmax()


N_BATCHES = 200
N_BATCHED_EPISODES = 10
UPDATE_TARGET_EVERY_N_BACTHES = 2
MINI_BATCH_SIZE = 32
REWARD_CLIP = 5
ONLY_N_MISSES = 200

directory = 'tmp/DoomBasic_old_qlearning'
env = create_env(monitory_directory=directory)

for _ in range(N_BATCHES):
    for _ in range(UPDATE_TARGET_EVERY_N_BACTHES):
        sares = episode_sares(env, EpsilonGreedyQAgent(acting_model, epsilon=.1), N_BATCHED_EPISODES)
        if (sum(r for _, _, r, _ in sares) / N_BATCHED_EPISODES) > 80:
            break
        prev_frames, target_action_rewards = sares_to_input_targets(target_model, sares, reward_clip=REWARD_CLIP, only_n_misses=ONLY_N_MISSES)
        acting_model.fit(x=prev_frames, y=target_action_rewards, batch_size=MINI_BATCH_SIZE, nb_epoch=1, verbose=0)
    
    target_model = copy_model(acting_model)

# final greedy episodes
sares = episode_sares(env, EpsilonGreedyQAgent(acting_model, epsilon=0), episode_count=100)

env.close()
gym.upload(directory, api_key='sk_bNZUvCfkTfabQCoKoKbjFA')

[2017-03-12 11:51:38,977] Clearing 2 monitor files from previous run (because force=True was provided)
[2017-03-12 11:51:39,912] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video000000.mp4


average reward per episode = -242.7
Training on 3/203 positive/total out of 518 1-step experiences with actions distribution [172  19   5   7]
average reward per episode = -343.4
Training on 2/201 positive/total out of 696 1-step experiences with actions distribution [189   5   5   2]
average reward per episode = -313.4
Training on 2/201 positive/total out of 631 1-step experiences with actions distribution [ 47   7 133  14]
average reward per episode = -359.5
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [  5   5  38 152]
average reward per episode = -292.6
Training on 3/203 positive/total out of 630 1-step experiences with actions distribution [  6  10  73 114]
average reward per episode = -358.0
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [  5   6 173  16]
average reward per episode = -360.5
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [  6  10  35 149]

[2017-03-12 11:53:23,552] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video000100.mp4


average reward per episode = -229.6
Training on 4/204 positive/total out of 527 1-step experiences with actions distribution [  5   7  78 114]
average reward per episode = -300.8
Training on 2/202 positive/total out of 631 1-step experiences with actions distribution [  7   8  86 101]
average reward per episode = -360.5
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [102   5  38  55]
average reward per episode = -358.5
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [67  3 55 75]
average reward per episode = -332.4
Training on 1/201 positive/total out of 666 1-step experiences with actions distribution [158   3   7  33]
average reward per episode = -358.5
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [  3   5  86 106]
average reward per episode = -222.1
Training on 5/204 positive/total out of 506 1-step experiences with actions distribution [82 11 43 68]
average

[2017-03-12 11:55:05,654] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video000200.mp4


average reward per episode = -346.4
Training on 1/201 positive/total out of 689 1-step experiences with actions distribution [85  3 50 63]
average reward per episode = -268.7
Training on 2/202 positive/total out of 586 1-step experiences with actions distribution [140   6  33  23]
average reward per episode = -304.8
Training on 2/202 positive/total out of 632 1-step experiences with actions distribution [ 9 11 86 96]
average reward per episode = -329.4
Training on 1/201 positive/total out of 659 1-step experiences with actions distribution [99  8 40 54]
average reward per episode = -277.2
Training on 4/203 positive/total out of 596 1-step experiences with actions distribution [  8  14  75 106]
average reward per episode = -315.3
Training on 3/202 positive/total out of 655 1-step experiences with actions distribution [ 15   7 112  68]
average reward per episode = -290.3
Training on 2/202 positive/total out of 604 1-step experiences with actions distribution [145   8  18  31]
average rew

[2017-03-12 11:56:48,526] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video000300.mp4


average reward per episode = -275.2
Training on 3/203 positive/total out of 599 1-step experiences with actions distribution [  6   4  88 105]
average reward per episode = -297.8
Training on 2/202 positive/total out of 619 1-step experiences with actions distribution [  6   3  90 103]
average reward per episode = -291.3
Training on 4/202 positive/total out of 612 1-step experiences with actions distribution [126   6  38  32]
average reward per episode = -213.0
Training on 4/204 positive/total out of 514 1-step experiences with actions distribution [132   7  25  40]
average reward per episode = -300.7
Training on 4/203 positive/total out of 649 1-step experiences with actions distribution [  5   7  83 108]
average reward per episode = -315.3
Training on 2/202 positive/total out of 660 1-step experiences with actions distribution [10  4 92 96]
average reward per episode = -315.4
Training on 1/201 positive/total out of 639 1-step experiences with actions distribution [117   6  31  47]
ave

[2017-03-12 11:58:31,826] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video000400.mp4


average reward per episode = -295.8
Training on 2/202 positive/total out of 617 1-step experiences with actions distribution [111  11  41  39]
average reward per episode = -282.3
Training on 3/202 positive/total out of 593 1-step experiences with actions distribution [  8   8 105  81]
average reward per episode = -300.8
Training on 3/202 positive/total out of 626 1-step experiences with actions distribution [154   7  14  27]
average reward per episode = -285.2
Training on 4/203 positive/total out of 618 1-step experiences with actions distribution [81  7 54 61]
average reward per episode = -227.5
Training on 6/205 positive/total out of 542 1-step experiences with actions distribution [14  9 88 94]
average reward per episode = -288.3
Training on 2/202 positive/total out of 605 1-step experiences with actions distribution [75  8 45 74]
average reward per episode = -235.6
Training on 3/203 positive/total out of 543 1-step experiences with actions distribution [70  5 68 60]
average reward 

[2017-03-12 12:00:13,929] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video000500.mp4


average reward per episode = -267.4
Training on 6/205 positive/total out of 606 1-step experiences with actions distribution [ 11  24  69 101]
average reward per episode = -74.7
Training on 12/207 positive/total out of 281 1-step experiences with actions distribution [12 28 74 93]
average reward per episode = -317.3
Training on 1/201 positive/total out of 632 1-step experiences with actions distribution [112   9  34  46]
average reward per episode = -232.5
Training on 8/205 positive/total out of 548 1-step experiences with actions distribution [92 12 66 35]
average reward per episode = -193.5
Training on 9/205 positive/total out of 422 1-step experiences with actions distribution [12 46 60 87]
average reward per episode = -303.3
Training on 2/202 positive/total out of 612 1-step experiences with actions distribution [91 12 29 70]
average reward per episode = -137.2
Training on 11/207 positive/total out of 370 1-step experiences with actions distribution [33 62 58 54]
average reward per

[2017-03-12 12:01:53,973] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video000600.mp4


average reward per episode = -253.2
Training on 4/203 positive/total out of 506 1-step experiences with actions distribution [27 30 80 66]
average reward per episode = -238.7
Training on 4/203 positive/total out of 529 1-step experiences with actions distribution [104  11  40  48]
average reward per episode = -242.9
Training on 8/204 positive/total out of 503 1-step experiences with actions distribution [47 38 46 73]
average reward per episode = -244.7
Training on 7/204 positive/total out of 491 1-step experiences with actions distribution [20 64 69 51]
average reward per episode = -261.1
Training on 3/203 positive/total out of 549 1-step experiences with actions distribution [52 30 65 56]
average reward per episode = -346.4
Training on 4/201 positive/total out of 695 1-step experiences with actions distribution [161  13   7  20]
average reward per episode = -181.8
Training on 7/204 positive/total out of 429 1-step experiences with actions distribution [10 19 88 87]
average reward per 

[2017-03-12 12:03:33,679] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video000700.mp4


average reward per episode = -244.5
Training on 11/204 positive/total out of 504 1-step experiences with actions distribution [ 3 47 70 84]
average reward per episode = -339.8
Training on 2/202 positive/total out of 580 1-step experiences with actions distribution [15 78 55 54]
average reward per episode = -218.5
Training on 4/204 positive/total out of 469 1-step experiences with actions distribution [105  62  10  27]
average reward per episode = -260.2
Training on 3/203 positive/total out of 571 1-step experiences with actions distribution [67  3 73 60]
average reward per episode = -227.5
Training on 11/205 positive/total out of 495 1-step experiences with actions distribution [21 31 76 77]
average reward per episode = -148.2
Training on 6/206 positive/total out of 363 1-step experiences with actions distribution [105  47  13  41]
average reward per episode = -181.1
Training on 10/206 positive/total out of 408 1-step experiences with actions distribution [49 64 38 55]
average reward p

[2017-03-12 12:05:14,366] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video000800.mp4


average reward per episode = -156.0
Training on 8/206 positive/total out of 363 1-step experiences with actions distribution [ 58 113  21  14]
average reward per episode = -331.9
Training on 0/200 positive/total out of 661 1-step experiences with actions distribution [ 18   4 120  58]
average reward per episode = -360.5
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [13  5 84 98]
average reward per episode = -332.2
Training on 1/201 positive/total out of 661 1-step experiences with actions distribution [89 25 28 59]
average reward per episode = -321.8
Training on 1/201 positive/total out of 582 1-step experiences with actions distribution [97 63 20 21]
average reward per episode = -195.8
Training on 10/205 positive/total out of 459 1-step experiences with actions distribution [101  28  51  25]
average reward per episode = -161.9
Training on 7/204 positive/total out of 398 1-step experiences with actions distribution [ 16  26 101  61]
average re

[2017-03-12 12:06:58,579] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video000900.mp4


average reward per episode = -272.2
Training on 2/202 positive/total out of 545 1-step experiences with actions distribution [122  21  34  25]
average reward per episode = -239.6
Training on 5/203 positive/total out of 496 1-step experiences with actions distribution [14 43 88 58]
average reward per episode = -107.5
Training on 11/207 positive/total out of 323 1-step experiences with actions distribution [20 53 69 65]
average reward per episode = -248.6
Training on 6/204 positive/total out of 521 1-step experiences with actions distribution [30 36 48 90]
average reward per episode = -215.9
Training on 4/203 positive/total out of 474 1-step experiences with actions distribution [75 23 47 58]
average reward per episode = -144.4
Training on 7/205 positive/total out of 389 1-step experiences with actions distribution [103  26  56  20]
average reward per episode = -240.0
Training on 5/204 positive/total out of 534 1-step experiences with actions distribution [84 17 67 36]
average reward per

[2017-03-12 12:08:42,495] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video001000.mp4


average reward per episode = -253.5
Training on 5/204 positive/total out of 543 1-step experiences with actions distribution [52 29 77 46]
average reward per episode = -230.6
Training on 4/204 positive/total out of 510 1-step experiences with actions distribution [50 22 93 39]
average reward per episode = -269.7
Training on 2/202 positive/total out of 572 1-step experiences with actions distribution [75 12 65 50]
average reward per episode = -275.6
Training on 3/203 positive/total out of 599 1-step experiences with actions distribution [87 10 30 76]
average reward per episode = -207.6
Training on 9/206 positive/total out of 481 1-step experiences with actions distribution [111  40  31  24]
average reward per episode = -369.7
Training on 2/202 positive/total out of 631 1-step experiences with actions distribution [ 68 131   2   1]
average reward per episode = -197.6
Training on 5/204 positive/total out of 439 1-step experiences with actions distribution [ 26  20 154   4]
average reward 

[2017-03-12 12:10:34,014] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video001100.mp4


average reward per episode = -311.4
Training on 1/201 positive/total out of 631 1-step experiences with actions distribution [  5   4 192]
average reward per episode = -229.7
Training on 3/203 positive/total out of 513 1-step experiences with actions distribution [165   4   5  29]
average reward per episode = -339.4
Training on 0/200 positive/total out of 684 1-step experiences with actions distribution [ 12   2   4 182]
average reward per episode = -72.6
Training on 14/207 positive/total out of 240 1-step experiences with actions distribution [  4 119  54  30]
average reward per episode = -309.7
Training on 4/203 positive/total out of 554 1-step experiences with actions distribution [  6  59 132   6]
average reward per episode = -268.7
Training on 5/203 positive/total out of 585 1-step experiences with actions distribution [179  12   3   9]
average reward per episode = -268.7
Training on 3/202 positive/total out of 576 1-step experiences with actions distribution [  6  12  60 124]
ave

[2017-03-12 12:12:28,095] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video001200.mp4


average reward per episode = -248.0
Training on 5/203 positive/total out of 455 1-step experiences with actions distribution [  5  75   6 117]
average reward per episode = -207.5
Training on 8/206 positive/total out of 416 1-step experiences with actions distribution [ 12 180   9   5]
average reward per episode = -314.9
Training on 1/201 positive/total out of 636 1-step experiences with actions distribution [  7   6 176  12]
average reward per episode = -317.9
Training on 0/200 positive/total out of 635 1-step experiences with actions distribution [ 18   4   4 174]
average reward per episode = -215.6
Training on 4/203 positive/total out of 498 1-step experiences with actions distribution [179  11   7   6]
average reward per episode = -219.6
Training on 6/204 positive/total out of 442 1-step experiences with actions distribution [ 29  44 124   7]
average reward per episode = -152.9
Training on 11/206 positive/total out of 341 1-step experiences with actions distribution [  3  74   7 122

[2017-03-12 12:14:15,536] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video001300.mp4


average reward per episode = -298.1
Training on 3/203 positive/total out of 517 1-step experiences with actions distribution [ 76 110   9   8]
average reward per episode = -280.3
Training on 2/202 positive/total out of 589 1-step experiences with actions distribution [61  2 98 41]
average reward per episode = -359.5
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [  6   9   5 180]
average reward per episode = -177.5
Training on 10/207 positive/total out of 380 1-step experiences with actions distribution [ 20 171  12   4]
average reward per episode = -278.3
Training on 2/201 positive/total out of 585 1-step experiences with actions distribution [ 28   6 165   2]
average reward per episode = -346.9
Training on 1/201 positive/total out of 696 1-step experiences with actions distribution [188   6   1   6]
average reward per episode = -64.8
Training on 9/206 positive/total out of 250 1-step experiences with actions distribution [  0  28   4 174]
ave

[2017-03-12 12:16:09,160] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video001400.mp4


average reward per episode = -190.6
Training on 5/204 positive/total out of 382 1-step experiences with actions distribution [  6  87  11 100]
average reward per episode = -112.4
Training on 8/205 positive/total out of 306 1-step experiences with actions distribution [  6  44 149   6]
average reward per episode = -177.9
Training on 9/205 positive/total out of 387 1-step experiences with actions distribution [ 5 55 50 95]
average reward per episode = -139.0
Training on 6/205 positive/total out of 362 1-step experiences with actions distribution [177  17   4   7]
average reward per episode = -32.8
Training on 22/209 positive/total out of 195 1-step experiences with actions distribution [  4 140  15  50]
average reward per episode = -156.4
Training on 10/205 positive/total out of 352 1-step experiences with actions distribution [ 6 72 89 38]
average reward per episode = -172.0
Training on 4/204 positive/total out of 423 1-step experiences with actions distribution [  5  12 167  20]
averag

[2017-03-12 12:18:10,684] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video001500.mp4


average reward per episode = -316.9
Training on 2/201 positive/total out of 632 1-step experiences with actions distribution [  1   7 161  32]
average reward per episode = -270.3
Training on 1/201 positive/total out of 566 1-step experiences with actions distribution [  8   4   9 180]
average reward per episode = -244.7
Training on 3/203 positive/total out of 531 1-step experiences with actions distribution [181   9   5   8]
average reward per episode = -251.6
Training on 6/205 positive/total out of 465 1-step experiences with actions distribution [ 16 155  23  11]
average reward per episode = -196.1
Training on 4/204 positive/total out of 447 1-step experiences with actions distribution [  1  15 157  31]
average reward per episode = -358.5
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [  9   5   4 182]
average reward per episode = -244.6
Training on 8/203 positive/total out of 457 1-step experiences with actions distribution [137  57   5   4]

[2017-03-12 12:20:33,834] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video001600.mp4


average reward per episode = -329.3
Training on 2/202 positive/total out of 561 1-step experiences with actions distribution [143  52   6   1]
average reward per episode = -190.4
Training on 4/204 positive/total out of 385 1-step experiences with actions distribution [43 89 51 21]
average reward per episode = -238.6
Training on 4/203 positive/total out of 519 1-step experiences with actions distribution [  8  14 148  33]
average reward per episode = -271.8
Training on 3/202 positive/total out of 569 1-step experiences with actions distribution [  6   6  49 141]
average reward per episode = -307.8
Training on 2/202 positive/total out of 595 1-step experiences with actions distribution [163  23   5  11]
average reward per episode = -218.3
Training on 8/205 positive/total out of 427 1-step experiences with actions distribution [  5 164   9  27]
average reward per episode = -304.3
Training on 3/202 positive/total out of 609 1-step experiences with actions distribution [  4  19 159  20]
ave

[2017-03-12 12:23:06,379] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video001700.mp4


average reward per episode = -278.3
Training on 3/202 positive/total out of 567 1-step experiences with actions distribution [ 11  12 158  21]
average reward per episode = -227.7
Training on 3/202 positive/total out of 497 1-step experiences with actions distribution [ 27  15  19 141]
average reward per episode = -219.0
Training on 6/205 positive/total out of 442 1-step experiences with actions distribution [131  61   9   4]
average reward per episode = -131.1
Training on 9/205 positive/total out of 306 1-step experiences with actions distribution [ 4 88 19 94]
average reward per episode = -127.8
Training on 10/205 positive/total out of 329 1-step experiences with actions distribution [  2  42 113  48]
average reward per episode = -252.1
Training on 4/204 positive/total out of 558 1-step experiences with actions distribution [177  15  10   2]
average reward per episode = -229.0
Training on 4/204 positive/total out of 440 1-step experiences with actions distribution [  2  46  24 132]
av

[2017-03-12 12:25:35,959] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video001800.mp4


average reward per episode = -288.0
Training on 3/203 positive/total out of 511 1-step experiences with actions distribution [  4  70   3 126]
average reward per episode = -274.0
Training on 7/205 positive/total out of 522 1-step experiences with actions distribution [  4 147  25  29]
average reward per episode = -358.5
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [ 21   7 168   4]
average reward per episode = -320.3
Training on 1/201 positive/total out of 642 1-step experiences with actions distribution [107   6   4  84]
average reward per episode = -194.0
Training on 5/204 positive/total out of 454 1-step experiences with actions distribution [ 18   8   6 172]
average reward per episode = -133.3
Training on 12/206 positive/total out of 312 1-step experiences with actions distribution [95 73 28 10]
average reward per episode = -106.5
Training on 10/207 positive/total out of 281 1-step experiences with actions distribution [ 25 124  43  15]
a

[2017-03-12 12:27:38,168] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video001900.mp4


average reward per episode = -94.2
Training on 10/207 positive/total out of 300 1-step experiences with actions distribution [ 8 42 74 83]
average reward per episode = -179.4
Training on 9/206 positive/total out of 406 1-step experiences with actions distribution [ 7 73 48 78]
average reward per episode = -235.0
Training on 3/203 positive/total out of 442 1-step experiences with actions distribution [ 17  56 129   1]
average reward per episode = -307.3
Training on 1/201 positive/total out of 625 1-step experiences with actions distribution [188   6   3   4]
average reward per episode = -118.2
Training on 9/206 positive/total out of 300 1-step experiences with actions distribution [ 7 56 44 99]
average reward per episode = -185.4
Training on 10/205 positive/total out of 372 1-step experiences with actions distribution [  5 110  58  32]
average reward per episode = -339.9
Training on 2/201 positive/total out of 688 1-step experiences with actions distribution [  7   4 186   4]
average re

[2017-03-12 12:29:38,854] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video002000.mp4


average reward per episode = -40.7
Training on 8/207 positive/total out of 207 1-step experiences with actions distribution [61 43 58 45]
average reward per episode = -62.6
Training on 12/208 positive/total out of 298 1-step experiences with actions distribution [  4  18  72 114]
average reward per episode = -255.4
Training on 7/205 positive/total out of 491 1-step experiences with actions distribution [130  62   9   4]
average reward per episode = -90.7
Training on 13/207 positive/total out of 256 1-step experiences with actions distribution [ 10 125  61  11]
average reward per episode = -145.3
Training on 7/205 positive/total out of 374 1-step experiences with actions distribution [  7  33 106  59]
average reward per episode = -167.4
Training on 9/205 positive/total out of 385 1-step experiences with actions distribution [ 15  40  29 121]
average reward per episode = -278.7
Training on 5/203 positive/total out of 492 1-step experiences with actions distribution [106  87   4   6]
aver

[2017-03-12 12:31:33,096] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video002100.mp4


average reward per episode = -33.8
Training on 15/208 positive/total out of 216 1-step experiences with actions distribution [ 6 54 51 97]
average reward per episode = -106.3
Training on 10/206 positive/total out of 317 1-step experiences with actions distribution [  3  37 100  66]
average reward per episode = -179.4
Training on 7/205 positive/total out of 427 1-step experiences with actions distribution [168  22   3  12]
average reward per episode = -121.9
Training on 10/207 positive/total out of 294 1-step experiences with actions distribution [  3 173   3  28]
average reward per episode = -79.0
Training on 14/208 positive/total out of 290 1-step experiences with actions distribution [ 8 41 62 97]
average reward per episode = -335.4
Training on 2/201 positive/total out of 661 1-step experiences with actions distribution [171  14   9   7]
average reward per episode = -192.6
Training on 9/206 positive/total out of 421 1-step experiences with actions distribution [ 4 67 68 67]
average r

[2017-03-12 12:33:26,113] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video002200.mp4


average reward per episode = -278.3
Training on 1/201 positive/total out of 567 1-step experiences with actions distribution [  2  13 129  57]
average reward per episode = -185.6
Training on 6/204 positive/total out of 433 1-step experiences with actions distribution [  6  11  44 143]
average reward per episode = -137.2
Training on 5/205 positive/total out of 317 1-step experiences with actions distribution [95 97  6  7]
average reward per episode = -217.8
Training on 4/203 positive/total out of 453 1-step experiences with actions distribution [129  45  24   5]
average reward per episode = -14.2
Training on 21/209 positive/total out of 197 1-step experiences with actions distribution [ 1 58 83 67]
average reward per episode = -98.6
Training on 8/206 positive/total out of 269 1-step experiences with actions distribution [  7 120  24  55]
average reward per episode = -169.0
Training on 9/205 positive/total out of 371 1-step experiences with actions distribution [ 3 37 93 72]
average rewa

[2017-03-12 12:35:21,069] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video002300.mp4


average reward per episode = -44.3
Training on 16/208 positive/total out of 212 1-step experiences with actions distribution [ 8 56 84 60]
average reward per episode = -439.5
Training on 0/200 positive/total out of 699 1-step experiences with actions distribution [115  71   6   8]
average reward per episode = -189.9
Training on 12/205 positive/total out of 448 1-step experiences with actions distribution [ 7 22 92 84]
average reward per episode = -197.0
Training on 5/204 positive/total out of 438 1-step experiences with actions distribution [  9  20 106  69]
average reward per episode = -161.4
Training on 12/206 positive/total out of 360 1-step experiences with actions distribution [ 46 108  44   8]
average reward per episode = -209.1
Training on 6/204 positive/total out of 442 1-step experiences with actions distribution [85 33 53 33]
average reward per episode = -224.8
Training on 4/203 positive/total out of 447 1-step experiences with actions distribution [ 6 66 77 54]
average rewar

[2017-03-12 12:37:19,131] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video002400.mp4


average reward per episode = -163.1
Training on 5/204 positive/total out of 376 1-step experiences with actions distribution [ 41  36 105  22]
average reward per episode = -19.2
Training on 14/208 positive/total out of 193 1-step experiences with actions distribution [56 55 62 35]
average reward per episode = -146.0
Training on 7/206 positive/total out of 344 1-step experiences with actions distribution [  4  83 109  10]
average reward per episode = -234.7
Training on 4/203 positive/total out of 453 1-step experiences with actions distribution [ 5 62 54 82]
average reward per episode = -209.2
Training on 6/204 positive/total out of 445 1-step experiences with actions distribution [92 44 48 20]
average reward per episode = -266.2
Training on 4/202 positive/total out of 506 1-step experiences with actions distribution [  1  61 138   2]
average reward per episode = 28.8
Training on 19/208 positive/total out of 115 1-step experiences with actions distribution [ 10  50   5 143]
average rewa

[2017-03-12 12:39:15,076] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video002500.mp4


average reward per episode = -156.2
Training on 9/206 positive/total out of 354 1-step experiences with actions distribution [ 17 157  24   8]
average reward per episode = -106.0
Training on 6/205 positive/total out of 319 1-step experiences with actions distribution [167  13  25]
average reward per episode = -217.6
Training on 5/203 positive/total out of 427 1-step experiences with actions distribution [  5  51 108  39]
average reward per episode = -80.1
Training on 10/206 positive/total out of 256 1-step experiences with actions distribution [13 52 83 58]
average reward per episode = -163.1
Training on 10/206 positive/total out of 389 1-step experiences with actions distribution [95 67 28 16]
average reward per episode = -197.3
Training on 8/204 positive/total out of 448 1-step experiences with actions distribution [  2  21 152  29]
average reward per episode = -144.1
Training on 9/205 positive/total out of 339 1-step experiences with actions distribution [ 68 117  12   8]
average re

[2017-03-12 12:41:09,720] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video002600.mp4


average reward per episode = -228.4
Training on 6/204 positive/total out of 450 1-step experiences with actions distribution [ 30 136  10  28]
average reward per episode = -256.4
Training on 4/203 positive/total out of 558 1-step experiences with actions distribution [163  10   6  24]
average reward per episode = -138.1
Training on 8/206 positive/total out of 354 1-step experiences with actions distribution [ 5 62 67 72]
average reward per episode = -67.5
Training on 11/206 positive/total out of 238 1-step experiences with actions distribution [  9  43  19 135]
average reward per episode = -12.3
Training on 13/207 positive/total out of 171 1-step experiences with actions distribution [ 7 47 54 99]
average reward per episode = -48.7
Training on 9/207 positive/total out of 224 1-step experiences with actions distribution [57 67 46 37]
average reward per episode = -81.6
Training on 8/206 positive/total out of 265 1-step experiences with actions distribution [41 73 11 81]
average reward pe

[2017-03-12 12:43:04,400] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video002700.mp4


average reward per episode = -135.1
Training on 10/206 positive/total out of 314 1-step experiences with actions distribution [  5 148  37  16]
average reward per episode = -206.9
Training on 5/205 positive/total out of 414 1-step experiences with actions distribution [14 84 68 39]
average reward per episode = -211.6
Training on 8/205 positive/total out of 412 1-step experiences with actions distribution [ 59 114  27   5]
average reward per episode = -288.6
Training on 7/205 positive/total out of 512 1-step experiences with actions distribution [  7 170  25   3]
average reward per episode = -76.3
Training on 13/207 positive/total out of 284 1-step experiences with actions distribution [136  27  31  13]
average reward per episode = -83.0
Training on 9/207 positive/total out of 287 1-step experiences with actions distribution [  7  33 104  63]
average reward per episode = 23.6
Training on 21/209 positive/total out of 142 1-step experiences with actions distribution [14 51 70 74]
average 

[2017-03-12 12:44:59,546] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video002800.mp4


average reward per episode = 15.4
Training on 15/208 positive/total out of 131 1-step experiences with actions distribution [10 82 76 40]
average reward per episode = -146.8
Training on 8/207 positive/total out of 413 1-step experiences with actions distribution [63 44 56 44]
average reward per episode = -35.4
Training on 10/207 positive/total out of 203 1-step experiences with actions distribution [30 61 63 53]
average reward per episode = -102.8
Training on 12/207 positive/total out of 290 1-step experiences with actions distribution [31 81 53 42]
average reward per episode = -86.4
Training on 13/206 positive/total out of 291 1-step experiences with actions distribution [117  53  30   6]
average reward per episode = -118.5
Training on 10/205 positive/total out of 334 1-step experiences with actions distribution [116  33  41  15]
average reward per episode = -217.4
Training on 7/203 positive/total out of 471 1-step experiences with actions distribution [ 21  28 112  42]
average reward

[2017-03-12 12:46:52,802] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video002900.mp4


average reward per episode = -137.8
Training on 9/206 positive/total out of 373 1-step experiences with actions distribution [145  28  25   8]
average reward per episode = -195.2
Training on 5/204 positive/total out of 445 1-step experiences with actions distribution [  5  21 173   5]
average reward per episode = -122.4
Training on 9/206 positive/total out of 346 1-step experiences with actions distribution [138  28  11  29]
average reward per episode = -95.2
Training on 16/208 positive/total out of 300 1-step experiences with actions distribution [ 15 123  28  42]
average reward per episode = -7.9
Training on 16/208 positive/total out of 169 1-step experiences with actions distribution [  5  33  20 150]
average reward per episode = -9.4
Training on 15/208 positive/total out of 190 1-step experiences with actions distribution [120  33  23  32]
average reward per episode = -102.7
Training on 10/205 positive/total out of 307 1-step experiences with actions distribution [  4  25  58 118]


[2017-03-12 12:48:47,154] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video003000.mp4


average reward per episode = -147.6
Training on 9/205 positive/total out of 382 1-step experiences with actions distribution [50 25 79 51]
average reward per episode = -117.7
Training on 9/205 positive/total out of 331 1-step experiences with actions distribution [49 35 77 44]
average reward per episode = 46.5
Training on 27/209 positive/total out of 96 1-step experiences with actions distribution [41 97 31 40]
average reward per episode = -15.8
Training on 16/207 positive/total out of 185 1-step experiences with actions distribution [ 14  28  65 100]
average reward per episode = 11.6
Training on 19/208 positive/total out of 136 1-step experiences with actions distribution [35 75 51 47]
average reward per episode = -141.9
Training on 10/206 positive/total out of 371 1-step experiences with actions distribution [138  45  14   9]
average reward per episode = -28.5
Training on 19/207 positive/total out of 198 1-step experiences with actions distribution [  6  51  47 103]
average reward pe

[2017-03-12 12:50:36,676] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video003100.mp4


average reward per episode = -126.5
Training on 10/205 positive/total out of 341 1-step experiences with actions distribution [37 35 47 86]
average reward per episode = -182.4
Training on 8/205 positive/total out of 425 1-step experiences with actions distribution [88 49 41 27]
average reward per episode = -26.4
Training on 16/207 positive/total out of 197 1-step experiences with actions distribution [15 48 46 98]
average reward per episode = -143.8
Training on 7/204 positive/total out of 372 1-step experiences with actions distribution [  3  17  36 148]
average reward per episode = -5.9
Training on 20/208 positive/total out of 165 1-step experiences with actions distribution [  8 117  32  51]
average reward per episode = 21.0
Training on 30/209 positive/total out of 129 1-step experiences with actions distribution [ 32 146  18  13]
average reward per episode = 26.8
Training on 19/209 positive/total out of 133 1-step experiences with actions distribution [37 63 46 63]
average reward pe

[2017-03-12 12:52:31,091] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video003200.mp4


average reward per episode = -90.5
Training on 8/207 positive/total out of 303 1-step experiences with actions distribution [13 39 78 77]
average reward per episode = -108.9
Training on 7/205 positive/total out of 324 1-step experiences with actions distribution [165  20  17   3]
average reward per episode = 1.3
Training on 17/208 positive/total out of 169 1-step experiences with actions distribution [ 9 58 89 52]
average reward per episode = -64.8
Training on 12/207 positive/total out of 261 1-step experiences with actions distribution [  8  20  79 100]
average reward per episode = -75.8
Training on 16/207 positive/total out of 269 1-step experiences with actions distribution [  7  47  24 129]
average reward per episode = -20.9
Training on 20/208 positive/total out of 186 1-step experiences with actions distribution [  1  68  36 103]
average reward per episode = 8.1
Training on 22/208 positive/total out of 145 1-step experiences with actions distribution [17 74 51 66]
average reward p

[2017-03-12 12:54:27,663] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video003300.mp4


average reward per episode = 34.6
Training on 20/209 positive/total out of 112 1-step experiences with actions distribution [  5 133  40  31]
average reward per episode = -54.2
Training on 15/207 positive/total out of 239 1-step experiences with actions distribution [ 6 69 44 88]
average reward per episode = -114.3
Training on 10/206 positive/total out of 330 1-step experiences with actions distribution [  8  31  54 113]
average reward per episode = 3.2
Training on 25/208 positive/total out of 153 1-step experiences with actions distribution [36 95 24 53]
average reward per episode = -140.8
Training on 12/205 positive/total out of 369 1-step experiences with actions distribution [114  21  25  45]
average reward per episode = 17.6
Training on 19/209 positive/total out of 140 1-step experiences with actions distribution [  4 103  52  50]
average reward per episode = -61.6
Training on 8/206 positive/total out of 254 1-step experiences with actions distribution [60 24 49 73]
average reward

[2017-03-12 12:56:19,050] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video003400.mp4


average reward per episode = -50.6
Training on 17/208 positive/total out of 264 1-step experiences with actions distribution [96 43 35 34]
average reward per episode = -20.9
Training on 21/209 positive/total out of 196 1-step experiences with actions distribution [  6 153  20  30]
average reward per episode = 44.8
Training on 27/209 positive/total out of 94 1-step experiences with actions distribution [  2 137  42  28]
average reward per episode = -105.6
Training on 8/206 positive/total out of 318 1-step experiences with actions distribution [  6  22 115  63]
average reward per episode = -45.0
Training on 20/209 positive/total out of 229 1-step experiences with actions distribution [ 12 114  53  30]
average reward per episode = -78.1
Training on 11/207 positive/total out of 277 1-step experiences with actions distribution [130  37   6  34]
average reward per episode = 7.0
Training on 16/208 positive/total out of 143 1-step experiences with actions distribution [10 95 36 67]
average rew

[2017-03-12 12:58:11,680] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video003500.mp4


average reward per episode = -111.7
Training on 6/206 positive/total out of 345 1-step experiences with actions distribution [11 18 80 97]
average reward per episode = -107.7
Training on 10/205 positive/total out of 320 1-step experiences with actions distribution [  6  34  52 113]
average reward per episode = -90.1
Training on 10/206 positive/total out of 307 1-step experiences with actions distribution [155  34   6  11]
average reward per episode = 20.9
Training on 22/209 positive/total out of 130 1-step experiences with actions distribution [  7 169  12  21]
average reward per episode = -23.6
Training on 17/207 positive/total out of 191 1-step experiences with actions distribution [ 2 55 82 68]
average reward per episode = -12.2
Training on 18/208 positive/total out of 176 1-step experiences with actions distribution [ 23 118  18  49]
average reward per episode = -132.3
Training on 7/205 positive/total out of 362 1-step experiences with actions distribution [54 33 21 97]
average rew

[2017-03-12 13:00:08,898] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video003600.mp4


average reward per episode = -30.2
Training on 13/208 positive/total out of 222 1-step experiences with actions distribution [50 46 55 57]
average reward per episode = 7.8
Training on 19/208 positive/total out of 148 1-step experiences with actions distribution [100  82  13  13]
average reward per episode = -73.3
Training on 15/206 positive/total out of 255 1-step experiences with actions distribution [ 3 63 58 82]
average reward per episode = -62.8
Training on 10/206 positive/total out of 238 1-step experiences with actions distribution [ 4 45 69 88]
average reward per episode = -65.3
Training on 16/206 positive/total out of 244 1-step experiences with actions distribution [79 40 16 71]
average reward per episode = -46.6
Training on 14/207 positive/total out of 221 1-step experiences with actions distribution [25 85  3 94]
average reward per episode = -106.6
Training on 10/206 positive/total out of 314 1-step experiences with actions distribution [112  54  24  16]
average reward per e

[2017-03-12 13:02:01,489] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video003700.mp4


average reward per episode = -7.5
Training on 16/208 positive/total out of 161 1-step experiences with actions distribution [  5 118  31  54]
average reward per episode = -169.3
Training on 6/205 positive/total out of 393 1-step experiences with actions distribution [ 6 45 58 96]
average reward per episode = 45.3
Training on 30/209 positive/total out of 95 1-step experiences with actions distribution [  3 130  29  47]
average reward per episode = 5.8
Training on 26/208 positive/total out of 144 1-step experiences with actions distribution [  4 102  65  37]
average reward per episode = 0.9
Training on 17/209 positive/total out of 157 1-step experiences with actions distribution [ 6 98 57 48]
average reward per episode = 44.7
Training on 31/209 positive/total out of 93 1-step experiences with actions distribution [  6 148  24  31]
average reward per episode = 45.3
Training on 24/209 positive/total out of 100 1-step experiences with actions distribution [ 5 87 64 53]
average reward per ep

[2017-03-12 13:03:48,950] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video003800.mp4


average reward per episode = 17.6
Training on 15/208 positive/total out of 133 1-step experiences with actions distribution [  0  65  34 109]
average reward per episode = -143.6
Training on 5/204 positive/total out of 372 1-step experiences with actions distribution [  3  13  23 165]
average reward per episode = -78.2
Training on 11/206 positive/total out of 276 1-step experiences with actions distribution [ 7 35 72 92]
average reward per episode = -79.5
Training on 12/207 positive/total out of 263 1-step experiences with actions distribution [101  58  19  29]
average reward per episode = 14.1
Training on 26/208 positive/total out of 138 1-step experiences with actions distribution [ 2 78 31 97]
average reward per episode = -98.5
Training on 8/205 positive/total out of 304 1-step experiences with actions distribution [  6  23  61 115]
average reward per episode = -3.4
Training on 16/208 positive/total out of 160 1-step experiences with actions distribution [ 5 99 44 60]
average reward 

[2017-03-12 13:05:46,818] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video003900.mp4


average reward per episode = -119.5
Training on 13/206 positive/total out of 305 1-step experiences with actions distribution [ 7 70 43 86]
average reward per episode = -58.1
Training on 13/206 positive/total out of 241 1-step experiences with actions distribution [38 36 43 89]
average reward per episode = -106.8
Training on 11/206 positive/total out of 293 1-step experiences with actions distribution [ 7 85 53 61]
average reward per episode = -66.1
Training on 10/206 positive/total out of 241 1-step experiences with actions distribution [ 4 66 46 90]
average reward per episode = 13.4
Training on 19/208 positive/total out of 131 1-step experiences with actions distribution [33 93 31 51]
average reward per episode = -63.1
Training on 11/207 positive/total out of 238 1-step experiences with actions distribution [95 38 14 60]
average reward per episode = -66.4
Training on 12/207 positive/total out of 258 1-step experiences with actions distribution [  6  44  34 123]
average reward per epi

[2017-03-12 13:07:41,927] Starting new video recorder writing to /Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning/openaigym.video.0.36214.video004000.mp4
[2017-03-12 13:08:02,906] Finished writing results. You can upload them to the scoreboard via gym.upload('/Users/gui/Dev/rl-study/tmp/DoomBasic_old_qlearning')
[2017-03-12 13:08:02,913] [DoomBasic-v0] Uploading 4100 episodes of training data


average reward per episode = -17.81


[2017-03-12 13:08:13,533] [DoomBasic-v0] Uploading videos of 41 training episodes (10178788 bytes)
[2017-03-12 13:09:45,458] [DoomBasic-v0] Creating evaluation object from tmp/DoomBasic_old_qlearning with learning curve and training video
[2017-03-12 13:09:45,825] 
****************************************************
You successfully uploaded your evaluation on DoomBasic-v0 to
OpenAI Gym! You can find it at:

    https://gym.openai.com/evaluations/eval_hfhBF7xdSf2tRV1Jsv0MXg

****************************************************
