# Deep Q-Learning

In [1]:
%matplotlib inline
import gym
from gym.wrappers import Monitor
import itertools
import numpy as np
import os
import random
import sys
import tensorflow as tf
from collections import deque, namedtuple
import matplotlib.pyplot as plt
from utils import EpisodeStats

In [2]:
env = gym.envs.make('Breakout-v0')

[2017-11-04 16:42:30,135] Making new env: Breakout-v0


### Actions

In [3]:
print(env.env.get_action_meanings())

['NOOP', 'FIRE', 'RIGHT', 'LEFT']


In [4]:
VALID_ACTIONS = [0,1,2,3]

### Preprocessing Environment State

raw Atari 2600 frames: 210x160 pixel images with a 128-colour palette


**basic preprocessing step:**
* 4 last screen images
* resize to 84x84
* convert to grayscale (256 gray levels)

$256^{84x84x4} \approx 10^{67970}$ possible game states

In [5]:
class StateProcessor():
    def __init__(self):
        # build TF graph
        with tf.variable_scope('state_processor'):
            self.input_state = tf.placeholder(shape=[210, 160, 3], dtype=tf.uint8)
            self.output = tf.image.rgb_to_grayscale(self.input_state)
            self.output = tf.image.crop_to_bounding_box(
                self.output, 34, 0, 160, 160)
            self.output = tf.image.resize_images(
                self.output, [84, 84], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.output = tf.squeeze(self.output)
    
    def process(self, sess, state):
        """
        Args:
            sess: TF session object
            state: [210, 160, 3] Atari RGB State
        Returns:
            processed [84,84,1] state
        """
        return sess.run(self.output, { self.input_state: state})

## Q-Network

**Network Architecture:**

|Layer|Input   |Filter size|Stride|Num filters|Activation|Output  |
|-----|--------|-----------|------|-----------|----------|--------|
|conv1|84x84x4 |8x8        |4     |32         |ReLU      |20x20x32|
|conv2|20x20x32|4x4        |3     |64         |ReLU      |9x9x64  |
|conv3|9x9x64  |3x3        |1     |64         |ReLU      |7x7x64  |
|fc4  |7x7x64  |           |      |512        |ReLU      |512     |
|fc5  |512     |           |      |18         |Linear    |18*     |

\* number of valid actions

**Optimizer RMSProp:**
* lr = 0.00025
* decay = 0.99
* no momentum
* epsilon = 1e-6

### Estimator

* Estimator is used for both Q-Network and the Target Network

In [6]:
class Estimator():
    def __init__(self, scope='estimator', summaries_dir=None):
        self.scope = scope
        # write Tensorboard summaries to disk
        self.summary_writer = None
        with tf.variable_scope(scope):
            self._build_model()
            if summaries_dir:
                summary_dir = os.path.join(summaries_dir, 'summaries_{}'.format(scope))
                if not os.path.exists(summary_dir):
                    os.makedirs(summary_dir)
                self.summary_writer = tf.summary.FileWriter(summary_dir)
    
    def _build_model(self):
        # input are 4 RGB frames w/ shape 160x160
        self.X = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name='X')
        # TD-target value
        self.y = tf.placeholder(shape=[None], dtype=tf.float32, name='y')
        # int id for selected actions
        self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name='actions')
        X = tf.to_float(self.X)/255.0
        batch_size = tf.shape(self.X)[0]
        # Network
        conv1 = tf.contrib.layers.conv2d(X, 32, 8, 4, activation_fn=tf.nn.relu)
        conv2 = tf.contrib.layers.conv2d(conv1, 64, 4, 3, activation_fn=tf.nn.relu)
        conv3 = tf.contrib.layers.conv2d(conv2, 64, 3, 1, activation_fn=tf.nn.relu)
        fc4 = tf.contrib.layers.fully_connected(
            tf.contrib.layers.flatten(conv3), 512)
        self.preds = tf.contrib.layers.fully_connected(fc4, len(VALID_ACTIONS))
        # predictions for chosen actions only
        gather_indices = tf.range(batch_size)*tf.shape(self.preds)[1]+self.actions
        self.action_preds = tf.gather(tf.reshape(self.preds, [-1]), gather_indices)
        # calculate the loss
        self.losses = tf.squared_difference(self.y, self.action_preds)
        self.loss = tf.reduce_mean(self.losses)
        # Optimizer
        self.optimizer = tf.train.RMSPropOptimizer(0.00025, decay=0.99, epsilon=1e-6)
        self.train_op = self.optimizer.minimize(
            self.loss, global_step=tf.contrib.framework.get_global_step())
        # summaries for Tensorboard
        self.summaries = tf.summary.merge([
            tf.summary.scalar('loss', self.loss),
            tf.summary.histogram('loss_hist', self.losses),
            tf.summary.histogram('q_values_hist', self.preds),
            tf.summary.scalar('max_q_value', tf.reduce_max(self.preds))
        ])
    
    def predict(self, sess, s):
        """
        Args:
            sess: TF session object
            s: state input of shape [batch_size, 4, 160, 160, 3]
        
        Returns:
            tensor of shape [batch_size, NUM_VALID_ACTIONS] containing estimated action values
        """
        return sess.run(self.preds, {self.X: s})
    
    def update(self, sess, s, a, y):
        """
        Updates estimator towards given targets (y)
        Args:
            sess: TF session object
            s: state input of shape [batch_size, 4, 160, 160, 3]
            a: chosen action of shape [batch_size]
            y: targets of shape [batch_size]
        Returns:
            calculated loss on the batch
        """
        feed_dict = {self.X: s, self.y: y, self.actions: a}
        summaries, global_step, _, loss = sess.run([
            self.summaries,
            tf.contrib.framework.get_global_step(),
            self.train_op,
            self.loss
        ], feed_dict)
        if self.summary_writer:
            self.summary_writer.add_summary(summaries, global_step)
        return loss

#### Test

In [7]:
tf.reset_default_graph()
global_step = tf.Variable(0, name='global_step', trainable=False)

e = Estimator(scope='test')
sp = StateProcessor()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # example observation batch
    observation = env.reset()
    
    observation_p = sp.process(sess, observation)
    observation = np.stack([observation_p]*4, axis=2)
    observations = np.array([observation]*2)
    # test prediction
    print(e.predict(sess, observations))
    # test training step
    y = np.array([10.0, 10.0])
    a = np.array([1, 3])
    print(e.update(sess, observations, a, y))
    

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


[[ 0.          0.04459491  0.0059645   0.00791901]
 [ 0.          0.04459491  0.0059645   0.00791901]]
99.4759


In [8]:
def copy_model_parameters(sess, estimator1, estimator2):
    """
    Copies model parameters from one estimator to another.
    Args:
        sess: TF session instance
        estimator1: Estimator to copy parameters from
        estimator2: Estimator to copy parameters to
    """
    e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]
    e1_params = sorted(e1_params, key=lambda v: v.name)
    e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]
    e2_params = sorted(e2_params, key=lambda v: v.name)
    
    update_ops = []
    for e1_v, e2_v in zip(e1_params, e2_params):
        op = e2_v.assign(e1_v)
        update_ops.append(op)
    
    sess.run(update_ops)

### $\epsilon$-greedy policy

In [9]:
def epsilon_greedy(estimator, nA):
    """
    Creates an epsilon-greedy policy based on a given Q-function approximator and epsilon.
    Args:
        estimator: An estimator that returns q values for a given state
        nA: Number of actions in the environment.
    Returns:
        A function that takes the (sess, observation, epsilon) as an argument and returns
        the probabilities for each action in the form of a numpy array of length nA.

    """
    def policy_fn(sess, observation, epsilon):
        A = np.ones(nA, dtype=float) * epsilon / nA
        q_values = estimator.predict(sess, np.expand_dims(observation, 0))[0]
        best_action = np.argmax(q_values)
        A[best_action] += (1.0 - epsilon)
        return A
    return policy_fn

### Q-Learning

In [10]:
def deep_q_learning(sess,
                    env,
                    q_estimator,
                    target_estimator,
                    state_processor,
                    n_episodes,
                    experiments_dir,
                    replay_mem_size=500000,
                    replay_mem_init_size=10000,
                    estimator_update_steps=10000,
                    discount_factor=0.99,
                    epsilon_start=1.0,
                    epsilon_end=0.1,
                    epsilon_decay_steps=500000,
                    batch_size=32,
                    record_video=50):
    """
    Q-Learning algorithm for TD control using Function Approximation
    Finds optimal greedy policy while following epsilon-greedy policy
    Args:
        sess: TF session object
        env: OpenAI env
        q_estimator: Estimator object used for the q values
        target_estimator: Estimator object used for targets
        state_processor: StateProcessor object
        n_episodes: Number of episodes to run
        experiments_dir: Dir to save TF summaries in
        replay_mem_size: Size of replay memory
        replay_mem_init_size: Number of random experiences to sample
                            when initializing the replay memory
        estimator_update_steps: Copy params from q_estimator to target_estimator every N steps
        discount_factor: Lambda time discount factor
        epsilon_start: Chance to sample a random action when taking action (decayed over time)
        epsilon_end: Final minimum value of epsilon after decay
        batch_size: Size of batches to sample from replay memory
        record_video: Record a video every N steps
    Returns:
        EpisodeStats object
    """
    Transition = namedtuple('Transition', ['state', 'action', 'reward', 'next_state', 'done'])
    
    # the replay memory
    replay_memory = []
    
    # keep track of useful stats
    stats = EpisodeStats(
        episode_lengths=np.zeros(n_episodes),
        episode_rewards=np.zeros(n_episodes))
    
    # create dirs for checkpoints and summaries
    checkpoint_dir = os.path.join(experiments_dir, 'checkpoints')
    checkpoint_path = os.path.join(checkpoint_dir, 'model')
    monitor_path = os.path.join(experiments_dir, 'monitor')
    
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    if not os.path.exists(monitor_path):
        os.makedirs(monitor_path)
    
    saver = tf.train.Saver()
    
    # load previous checkpoint if it exists
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    if latest_checkpoint:
        print('Loading model checkpoint {}...\n'.format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)
    
    # get current time step
    total_t = sess.run(tf.contrib.framework.get_global_step())
    
    # epsilon decay schedule
    epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)
    
    policy = epsilon_greedy(q_estimator, len(VALID_ACTIONS))
    
    # populate replay memory with initial experience
    print('Populating replay memory...')
    state = env.reset()
    state = state_processor.process(sess, state)
    state = np.stack([state]*4, axis=2)
    for i in range(replay_mem_init_size):
        a_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)])
        action = np.random.choice(np.arange(len(a_probs)), p=a_probs)
        next_state, reward, done, _ = env.step(VALID_ACTIONS[action])
        next_state = state_processor.process(sess, next_state)
        next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)
        replay_memory.append(Transition(state, action, reward, next_state, done))
        if done:
            state = env.reset()
            state = state_processor.process(sess, state)
            state = np.stack([state]*4, axis=2)
        else:
            state = next_state
    
    # record videos
    env = Monitor(env, directory=monitor_path,
                  resume=True, video_callable=lambda count: count%record_video == 0)
    
    for i_episode in range(n_episodes):
        # save current checkpoint
        saver.save(tf.get_default_session(), checkpoint_path)
        
        # reset the environment
        state = env.reset()
        state = state_processor.process(sess, state)
        state = np.stack([state]*4, axis=2)
        loss = None
        
        for t in itertools.count():
            # epsilon for this time step
            epsilon = epsilons[min(total_t, epsilon_decay_steps-1)]
            
            # add epsilon to Tensorboard
            episode_summary = tf.Summary()
            episode_summary.value.add(simple_value=epsilon, tag='epsilon')
            q_estimator.summary_writer.add_summary(episode_summary, total_t)
            
            # update target estimator
            if total_t%estimator_update_steps == 0:
                copy_model_parameters(sess, q_estimator, target_estimator)
                print('\n Copied model parameters to target network')
            
            # print step information
            print('\rStep {} ({}) @ Episode {}/{}, loss: {}'.format(
                t, total_t, i_episode+1, n_episodes, loss), end='')
            sys.stdout.flush()
            
            # take step in environment
            a_probs = policy(sess, state, epsilon)
            action = np.random.choice(np.arange(len(a_probs)), p=a_probs)
            next_state, reward, done, _ = env.step(VALID_ACTIONS[action])
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)
            
            # if replay memory is full, pop first element
            if len(replay_memory) == replay_mem_size:
                replay_memory.pop(0)
            
            # save transition to replay memory
            replay_memory.append(Transition(state, action, reward, next_state, done))
            
            # update stats
            stats.episode_lengths[i_episode] = t
            stats.episode_rewards[i_episode] += reward
            
            # sample minibatch from the replay memory
            samples = random.sample(replay_memory, batch_size)
            s_batch, a_batch, r_batch, next_s_batch, done_batch = map(np.array, zip(*samples))
            # calc q values and targets (Double DQN)
            q_values_next = q_estimator.predict(sess, next_s_batch)
            best_actions = np.argmax(q_values_next, axis=1)
            q_values_next_target = target_estimator.predict(sess, next_s_batch)
            targets_batch = r_batch+np.invert(done_batch).astype(np.float32)*\
                discount_factor*q_values_next_target[np.arange(batch_size), best_actions]
            # perform GD update
            s_batch = np.array(s_batch)
            loss = q_estimator.update(sess, s_batch, a_batch, targets_batch)
            
            if done:
                break
            
            state = next_state
            total_t += 1
            
        # add summaries to Tensorboard
        episode_summary = tf.Summary()
        episode_summary.value.add(
            simple_value=stats.episode_rewards[i_episode],
            node_name='episode_reward',
            tag='episode_reward')
        episode_summary.value.add(
            simple_value=stats.episode_lengths[i_episode],
            node_name='episode_length',
            tag='episode_length')
        q_estimator.summary_writer.add_summary(episode_summary, total_t)
        q_estimator.summary_writer.flush()

        yield total_t, EpisodeStats(
            episode_lengths=stats.episode_lengths[:i_episode+1],
            episode_rewards=stats.episode_rewards[:i_episode+1])
        
    env.monitor.close()
    return stats

In [None]:
tf.reset_default_graph()
experiments_dir = os.path.abspath('./experiments/{}'.format(env.spec.id))
# create global step variable
global_step = tf.Variable(0, name='global_step', trainable=False)
# estimators
q_estimator = Estimator(scope='q', summaries_dir=experiments_dir)
target_estimator = Estimator(scope='target_q')
# state processor
state_processor = StateProcessor()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for t, stats in deep_q_learning(sess,
                                    env,
                                    q_estimator=q_estimator,
                                    target_estimator=target_estimator,
                                    state_processor=state_processor,
                                    n_episodes=10000,
                                    experiments_dir=experiments_dir,
                                    replay_mem_size=500000,
                                    replay_mem_init_size=50000,
                                    estimator_update_steps=10000,
                                    discount_factor=0.99,
                                    epsilon_start=1.0,
                                    epsilon_end=0.1,
                                    epsilon_decay_steps=500000,
                                    batch_size=32):
        
        print('\nEpisode Reward: {}'.format(stats.episode_rewards[-1]))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Populating replay memory...


[2017-11-04 16:47:00,222] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000000.mp4



 Copied model parameters to target network
Step 176 (176) @ Episode 1/10000, loss: 0.0013382609467953444
Episode Reward: 0.0
Step 187 (363) @ Episode 2/10000, loss: 0.0006927750655449927
Episode Reward: 0.0
Step 164 (527) @ Episode 3/10000, loss: 0.0008092967327684164
Episode Reward: 0.0
Step 181 (708) @ Episode 4/10000, loss: 0.00071764533640816814
Episode Reward: 0.0
Step 172 (880) @ Episode 5/10000, loss: 0.00031979102641344076
Episode Reward: 0.0
Step 236 (1116) @ Episode 6/10000, loss: 0.00078760675387457017
Episode Reward: 1.0
Step 225 (1341) @ Episode 7/10000, loss: 0.00231808237731456766
Episode Reward: 1.0
Step 251 (1592) @ Episode 8/10000, loss: 6.0259255405981094e-05
Episode Reward: 1.0
Step 193 (1785) @ Episode 9/10000, loss: 0.00027064629830420017
Episode Reward: 0.0
Step 232 (2017) @ Episode 10/10000, loss: 0.03076225891709327737
Episode Reward: 1.0
Step 279 (2296) @ Episode 11/10000, loss: 0.00016380517627112567
Episode Reward: 2.0
Step 332 (2628) @ Episode 12/10000, lo

[2017-11-04 16:48:49,194] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000050.mp4


Step 249 (11903) @ Episode 51/10000, loss: 0.00013567335554398596
Episode Reward: 1.0
Step 350 (12253) @ Episode 52/10000, loss: 6.96593924658373e-0556
Episode Reward: 3.0
Step 307 (12560) @ Episode 53/10000, loss: 0.03114337474107742399
Episode Reward: 2.0
Step 239 (12799) @ Episode 54/10000, loss: 6.866150943096727e-053
Episode Reward: 1.0
Step 166 (12965) @ Episode 55/10000, loss: 0.03075840696692466755
Episode Reward: 0.0
Step 179 (13144) @ Episode 56/10000, loss: 3.109467070316896e-055
Episode Reward: 0.0
Step 167 (13311) @ Episode 57/10000, loss: 7.009273394942284e-055
Episode Reward: 0.0
Step 391 (13702) @ Episode 58/10000, loss: 1.961134148587007e-055
Episode Reward: 4.0
Step 179 (13881) @ Episode 59/10000, loss: 5.750271157012321e-055
Episode Reward: 0.0
Step 189 (14070) @ Episode 60/10000, loss: 9.60688921622932e-0555
Episode Reward: 0.0
Step 267 (14337) @ Episode 61/10000, loss: 1.5190262274700217e-05
Episode Reward: 2.0
Step 270 (14607) @ Episode 62/10000, loss: 0.000132677

[2017-11-04 16:50:37,849] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000100.mp4


Step 226 (23709) @ Episode 101/10000, loss: 2.5970868591684848e-05
Episode Reward: 1.0
Step 279 (23988) @ Episode 102/10000, loss: 4.6133493015076965e-05
Episode Reward: 2.0
Step 175 (24163) @ Episode 103/10000, loss: 0.00015031133079901338
Episode Reward: 0.0
Step 253 (24416) @ Episode 104/10000, loss: 1.0927294169960078e-05
Episode Reward: 2.0
Step 167 (24583) @ Episode 105/10000, loss: 2.9229169740574434e-05
Episode Reward: 0.0
Step 232 (24815) @ Episode 106/10000, loss: 4.669652844313532e-055
Episode Reward: 1.0
Step 181 (24996) @ Episode 107/10000, loss: 3.370702688698657e-055
Episode Reward: 0.0
Step 178 (25174) @ Episode 108/10000, loss: 2.5639035811764188e-05
Episode Reward: 0.0
Step 230 (25404) @ Episode 109/10000, loss: 0.03082400187849998505
Episode Reward: 1.0
Step 212 (25616) @ Episode 110/10000, loss: 0.03090857714414596605
Episode Reward: 1.0
Step 214 (25830) @ Episode 111/10000, loss: 0.02998815104365348855
Episode Reward: 1.0
Step 333 (26163) @ Episode 112/10000, loss:

[2017-11-04 16:52:30,872] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000150.mp4


Step 243 (35890) @ Episode 151/10000, loss: 0.00016578909708186984
Episode Reward: 1.0
Step 305 (36195) @ Episode 152/10000, loss: 0.00011105967860203236
Episode Reward: 2.0
Step 307 (36502) @ Episode 153/10000, loss: 1.701402106846217e-055
Episode Reward: 2.0
Step 225 (36727) @ Episode 154/10000, loss: 8.883620466804132e-065
Episode Reward: 1.0
Step 276 (37003) @ Episode 155/10000, loss: 7.011285924818367e-055
Episode Reward: 2.0
Step 279 (37282) @ Episode 156/10000, loss: 9.618548210710287e-057
Episode Reward: 2.0
Step 232 (37514) @ Episode 157/10000, loss: 0.00011187343625351787
Episode Reward: 1.0
Step 164 (37678) @ Episode 158/10000, loss: 6.19989586994052e-0552
Episode Reward: 0.0
Step 280 (37958) @ Episode 159/10000, loss: 4.726889528683387e-055
Episode Reward: 2.0
Step 226 (38184) @ Episode 160/10000, loss: 7.698403351241723e-055
Episode Reward: 1.0
Step 165 (38349) @ Episode 161/10000, loss: 3.3534844988025725e-05
Episode Reward: 0.0
Step 169 (38518) @ Episode 162/10000, loss:

[2017-11-04 16:54:30,718] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000200.mp4


Step 322 (49085) @ Episode 201/10000, loss: 0.00013777610729448497
Episode Reward: 3.0
Step 251 (49336) @ Episode 202/10000, loss: 0.00030435196822509179
Episode Reward: 2.0
Step 239 (49575) @ Episode 203/10000, loss: 0.00016286681056953967
Episode Reward: 1.0
Step 282 (49857) @ Episode 204/10000, loss: 2.8821254090871662e-05
Episode Reward: 2.0
Step 142 (49999) @ Episode 205/10000, loss: 0.00320332497358322145
 Copied model parameters to target network
Step 234 (50091) @ Episode 205/10000, loss: 0.00073502992745488883
Episode Reward: 1.0
Step 192 (50283) @ Episode 206/10000, loss: 0.00145963439717888833
Episode Reward: 0.0
Step 344 (50627) @ Episode 207/10000, loss: 0.00235614110715687273
Episode Reward: 3.0
Step 443 (51070) @ Episode 208/10000, loss: 0.00141975376754999163
Episode Reward: 5.0
Step 180 (51250) @ Episode 209/10000, loss: 0.00018924841424450278
Episode Reward: 0.0
Step 177 (51427) @ Episode 210/10000, loss: 0.00354386423714458946
Episode Reward: 0.0
Step 180 (51607) @ E

[2017-11-04 16:56:27,679] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000250.mp4


Step 191 (61577) @ Episode 251/10000, loss: 0.00012438654084689915
Episode Reward: 0.0
Step 235 (61812) @ Episode 252/10000, loss: 0.00012234260793775325
Episode Reward: 1.0
Step 241 (62053) @ Episode 253/10000, loss: 0.00011586687469389295
Episode Reward: 2.0
Step 205 (62258) @ Episode 254/10000, loss: 0.00051947083557024642
Episode Reward: 1.0
Step 263 (62521) @ Episode 255/10000, loss: 0.00067020795540884144
Episode Reward: 2.0
Step 198 (62719) @ Episode 256/10000, loss: 0.00040016139973886315
Episode Reward: 0.0
Step 236 (62955) @ Episode 257/10000, loss: 5.8716155763249844e-05
Episode Reward: 1.0
Step 235 (63190) @ Episode 258/10000, loss: 0.00012927592615596954
Episode Reward: 1.0
Step 232 (63422) @ Episode 259/10000, loss: 0.00021012077922932804
Episode Reward: 1.0
Step 159 (63581) @ Episode 260/10000, loss: 0.00011865461419802159
Episode Reward: 0.0
Step 207 (63788) @ Episode 261/10000, loss: 7.542640378233045e-053
Episode Reward: 1.0
Step 167 (63955) @ Episode 262/10000, loss:

[2017-11-04 16:58:19,320] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000300.mp4


Step 185 (73683) @ Episode 301/10000, loss: 0.02394571900367736887
Episode Reward: 0.0
Step 307 (73990) @ Episode 302/10000, loss: 6.006577314110473e-055
Episode Reward: 2.0
Step 167 (74157) @ Episode 303/10000, loss: 0.00020039545779582113
Episode Reward: 0.0
Step 194 (74351) @ Episode 304/10000, loss: 0.00018524186452850747
Episode Reward: 0.0
Step 166 (74517) @ Episode 305/10000, loss: 0.00046851555816829205
Episode Reward: 0.0
Step 184 (74701) @ Episode 306/10000, loss: 0.00028571000439114875
Episode Reward: 0.0
Step 176 (74877) @ Episode 307/10000, loss: 0.00011463381815701723
Episode Reward: 0.0
Step 279 (75156) @ Episode 308/10000, loss: 0.00044758760486729443
Episode Reward: 2.0
Step 357 (75513) @ Episode 309/10000, loss: 0.00348509638570249143
Episode Reward: 4.0
Step 214 (75727) @ Episode 310/10000, loss: 0.00026709609664976597
Episode Reward: 1.0
Step 170 (75897) @ Episode 311/10000, loss: 0.00013115261390339583
Episode Reward: 0.0
Step 228 (76125) @ Episode 312/10000, loss:

[2017-11-04 17:00:07,638] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000350.mp4


Step 178 (85556) @ Episode 351/10000, loss: 0.00013161716924514621
Episode Reward: 0.0
Step 238 (85794) @ Episode 352/10000, loss: 0.00076188641833141457
Episode Reward: 1.0
Step 332 (86126) @ Episode 353/10000, loss: 0.00052852317458018662
Episode Reward: 3.0
Step 230 (86356) @ Episode 354/10000, loss: 0.00223943335004150875
Episode Reward: 1.0
Step 504 (86860) @ Episode 355/10000, loss: 0.00011955306399613619
Episode Reward: 5.0
Step 234 (87094) @ Episode 356/10000, loss: 8.570092904847115e-056
Episode Reward: 1.0
Step 171 (87265) @ Episode 357/10000, loss: 0.00032880617072805766
Episode Reward: 0.0
Step 249 (87514) @ Episode 358/10000, loss: 0.00290116947144269945
Episode Reward: 1.0
Step 325 (87839) @ Episode 359/10000, loss: 0.00095845269970595846
Episode Reward: 3.0
Step 190 (88029) @ Episode 360/10000, loss: 0.00093172461492940782
Episode Reward: 0.0
Step 210 (88239) @ Episode 361/10000, loss: 0.00179942732211202383
Episode Reward: 1.0
Step 349 (88588) @ Episode 362/10000, loss:

[2017-11-04 17:01:57,313] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000400.mp4


Step 182 (97478) @ Episode 401/10000, loss: 0.00230901525355875525
Episode Reward: 0.0
Step 225 (97703) @ Episode 402/10000, loss: 0.00019027441157959402
Episode Reward: 1.0
Step 250 (97953) @ Episode 403/10000, loss: 0.00197931914590299137
Episode Reward: 1.0
Step 212 (98165) @ Episode 404/10000, loss: 0.00086792249931022521
Episode Reward: 1.0
Step 240 (98405) @ Episode 405/10000, loss: 0.00066849787253886463
Episode Reward: 1.0
Step 172 (98577) @ Episode 406/10000, loss: 0.00048637503641657537
Episode Reward: 0.0
Step 277 (98854) @ Episode 407/10000, loss: 0.00069855630863457924
Episode Reward: 2.0
Step 253 (99107) @ Episode 408/10000, loss: 0.00062006391817703844
Episode Reward: 1.0
Step 168 (99275) @ Episode 409/10000, loss: 0.00045058407704345884
Episode Reward: 0.0
Step 355 (99630) @ Episode 410/10000, loss: 0.00025065016234293586
Episode Reward: 3.0
Step 369 (99999) @ Episode 411/10000, loss: 0.00080551445716992028
 Copied model parameters to target network
Step 422 (100052) @ 

[2017-11-04 17:03:52,703] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000450.mp4


Step 176 (109982) @ Episode 451/10000, loss: 0.00033036217791959643
Episode Reward: 0.0
Step 17 (109999) @ Episode 452/10000, loss: 7.928442937554792e-05
 Copied model parameters to target network
Step 183 (110165) @ Episode 452/10000, loss: 0.00175402476452291018
Episode Reward: 0.0
Step 229 (110394) @ Episode 453/10000, loss: 0.00185433519072830683
Episode Reward: 1.0
Step 277 (110671) @ Episode 454/10000, loss: 0.00036634848220273857
Episode Reward: 2.0
Step 344 (111015) @ Episode 455/10000, loss: 0.00196659052744507815
Episode Reward: 3.0
Step 183 (111198) @ Episode 456/10000, loss: 0.00071119295898824936
Episode Reward: 0.0
Step 172 (111370) @ Episode 457/10000, loss: 0.00287052523344755173
Episode Reward: 0.0
Step 244 (111614) @ Episode 458/10000, loss: 0.00744606787338852944
Episode Reward: 1.0
Step 209 (111823) @ Episode 459/10000, loss: 0.00195423024706542597
Episode Reward: 1.0
Step 275 (112098) @ Episode 460/10000, loss: 0.00059237744426354772
Episode Reward: 2.0
Step 561 (1

[2017-11-04 17:05:41,572] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000500.mp4


Step 186 (121758) @ Episode 501/10000, loss: 0.00098320585675537598
Episode Reward: 0.0
Step 245 (122003) @ Episode 502/10000, loss: 0.00085095490794628864
Episode Reward: 1.0
Step 210 (122213) @ Episode 503/10000, loss: 0.00079279462806880474
Episode Reward: 1.0
Step 243 (122456) @ Episode 504/10000, loss: 0.00047976727364584804
Episode Reward: 2.0
Step 226 (122682) @ Episode 505/10000, loss: 0.00051688606617972257
Episode Reward: 1.0
Step 239 (122921) @ Episode 506/10000, loss: 0.00149187573697417974
Episode Reward: 1.0
Step 250 (123171) @ Episode 507/10000, loss: 0.00021232402650639415
Episode Reward: 1.0
Step 267 (123438) @ Episode 508/10000, loss: 0.00296107586473226553
Episode Reward: 2.0
Step 163 (123601) @ Episode 509/10000, loss: 0.00072440132498741156
Episode Reward: 0.0
Step 367 (123968) @ Episode 510/10000, loss: 0.00050437636673450475
Episode Reward: 3.0
Step 388 (124356) @ Episode 511/10000, loss: 0.00059635844081640245
Episode Reward: 4.0
Step 236 (124592) @ Episode 512/

[2017-11-04 17:07:36,736] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000550.mp4


Step 518 (134511) @ Episode 551/10000, loss: 0.00043025446939282126
Episode Reward: 5.0
Step 177 (134688) @ Episode 552/10000, loss: 0.00047966698184609413
Episode Reward: 0.0
Step 252 (134940) @ Episode 553/10000, loss: 0.00611695367842912755
Episode Reward: 1.0
Step 209 (135149) @ Episode 554/10000, loss: 0.00090447289403527987
Episode Reward: 1.0
Step 343 (135492) @ Episode 555/10000, loss: 9.462649177294225e-056
Episode Reward: 3.0
Step 197 (135689) @ Episode 556/10000, loss: 0.00036697639734484255
Episode Reward: 0.0
Step 210 (135899) @ Episode 557/10000, loss: 0.00022550973517354578
Episode Reward: 1.0
Step 274 (136173) @ Episode 558/10000, loss: 4.278805135982111e-052
Episode Reward: 2.0
Step 226 (136399) @ Episode 559/10000, loss: 9.29120578803122e-0573
Episode Reward: 1.0
Step 237 (136636) @ Episode 560/10000, loss: 0.00095384009182453166
Episode Reward: 1.0
Step 178 (136814) @ Episode 561/10000, loss: 0.00067162630148231983
Episode Reward: 0.0
Step 262 (137076) @ Episode 562/

[2017-11-04 17:09:29,821] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000600.mp4


Step 175 (146428) @ Episode 601/10000, loss: 0.00070066790794953727
Episode Reward: 0.0
Step 443 (146871) @ Episode 602/10000, loss: 0.00057326769456267368
Episode Reward: 5.0
Step 181 (147052) @ Episode 603/10000, loss: 0.00057493452914059167
Episode Reward: 0.0
Step 311 (147363) @ Episode 604/10000, loss: 0.00201682560145854954
Episode Reward: 2.0
Step 279 (147642) @ Episode 605/10000, loss: 0.00033299170900136234
Episode Reward: 2.0
Step 167 (147809) @ Episode 606/10000, loss: 0.00108478334732353692
Episode Reward: 0.0
Step 243 (148052) @ Episode 607/10000, loss: 0.01078216917812824253
Episode Reward: 2.0
Step 178 (148230) @ Episode 608/10000, loss: 0.00223748292773962355
Episode Reward: 0.0
Step 245 (148475) @ Episode 609/10000, loss: 0.00035347117227502167
Episode Reward: 1.0
Step 170 (148645) @ Episode 610/10000, loss: 0.00135524407960474574
Episode Reward: 0.0
Step 319 (148964) @ Episode 611/10000, loss: 0.00073563703335821634
Episode Reward: 3.0
Step 185 (149149) @ Episode 612/

[2017-11-04 17:11:23,253] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000650.mp4


Step 166 (158534) @ Episode 651/10000, loss: 0.00083070702385157355
Episode Reward: 0.0
Step 286 (158820) @ Episode 652/10000, loss: 0.00022302397701423615
Episode Reward: 2.0
Step 194 (159014) @ Episode 653/10000, loss: 0.00013710325583815575
Episode Reward: 0.0
Step 173 (159187) @ Episode 654/10000, loss: 0.00027728971326723695
Episode Reward: 0.0
Step 362 (159549) @ Episode 655/10000, loss: 0.00236578867770731455
Episode Reward: 4.0
Step 263 (159812) @ Episode 656/10000, loss: 0.00083409360377117996
Episode Reward: 1.0
Step 187 (159999) @ Episode 657/10000, loss: 0.00048449329915456474
 Copied model parameters to target network
Step 387 (160199) @ Episode 657/10000, loss: 0.00082042271969839934
Episode Reward: 3.0
Step 201 (160400) @ Episode 658/10000, loss: 0.00063262390904128555
Episode Reward: 0.0
Step 273 (160673) @ Episode 659/10000, loss: 8.426474232692271e-054
Episode Reward: 2.0
Step 185 (160858) @ Episode 660/10000, loss: 0.00108398951124399959
Episode Reward: 0.0
Step 169 

[2017-11-04 17:13:19,126] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000700.mp4


Step 307 (171172) @ Episode 701/10000, loss: 0.00092659093206748374
Episode Reward: 2.0
Step 183 (171355) @ Episode 702/10000, loss: 0.00036426621954888105
Episode Reward: 0.0
Step 168 (171523) @ Episode 703/10000, loss: 0.00028095167363062567
Episode Reward: 0.0
Step 237 (171760) @ Episode 704/10000, loss: 0.00027671884163282812
Episode Reward: 1.0
Step 172 (171932) @ Episode 705/10000, loss: 0.00042223138734698296
Episode Reward: 0.0
Step 471 (172403) @ Episode 706/10000, loss: 0.00353525392711162573
Episode Reward: 5.0
Step 171 (172574) @ Episode 707/10000, loss: 0.00082910357741639024
Episode Reward: 0.0
Step 238 (172812) @ Episode 708/10000, loss: 0.00096531974850222477
Episode Reward: 1.0
Step 416 (173228) @ Episode 709/10000, loss: 0.00569185195490717934
Episode Reward: 4.0
Step 345 (173573) @ Episode 710/10000, loss: 0.00062545685796067124
Episode Reward: 3.0
Step 241 (173814) @ Episode 711/10000, loss: 0.00128412863705307254
Episode Reward: 1.0
Step 202 (174016) @ Episode 712/

[2017-11-04 17:15:24,008] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000750.mp4


Step 282 (184643) @ Episode 751/10000, loss: 0.00018807135347742587
Episode Reward: 2.0
Step 264 (184907) @ Episode 752/10000, loss: 0.00040583114605396986
Episode Reward: 2.0
Step 174 (185081) @ Episode 753/10000, loss: 0.00414607254788279565
Episode Reward: 0.0
Step 193 (185274) @ Episode 754/10000, loss: 0.00402726139873266273
Episode Reward: 0.0
Step 232 (185506) @ Episode 755/10000, loss: 0.00294055650010705494
Episode Reward: 1.0
Step 247 (185753) @ Episode 756/10000, loss: 0.00042608834337443113
Episode Reward: 1.0
Step 175 (185928) @ Episode 757/10000, loss: 0.00034242655965499583
Episode Reward: 0.0
Step 177 (186105) @ Episode 758/10000, loss: 0.00031987659167498355
Episode Reward: 0.0
Step 227 (186332) @ Episode 759/10000, loss: 0.00020812210277654233
Episode Reward: 1.0
Step 178 (186510) @ Episode 760/10000, loss: 0.00015124569472391158
Episode Reward: 0.0
Step 321 (186831) @ Episode 761/10000, loss: 0.00051307334797456864
Episode Reward: 3.0
Step 360 (187191) @ Episode 762/

[2017-11-04 17:17:13,625] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000800.mp4


Step 297 (196528) @ Episode 801/10000, loss: 0.00021608901442959905
Episode Reward: 2.0
Step 227 (196755) @ Episode 802/10000, loss: 0.00015236574108712375
Episode Reward: 1.0
Step 193 (196948) @ Episode 803/10000, loss: 0.00062990887090563773
Episode Reward: 0.0
Step 160 (197108) @ Episode 804/10000, loss: 4.634944343706593e-055
Episode Reward: 0.0
Step 184 (197292) @ Episode 805/10000, loss: 0.00044270334183238447
Episode Reward: 0.0
Step 258 (197550) @ Episode 806/10000, loss: 0.00427728937938809433
Episode Reward: 1.0
Step 296 (197846) @ Episode 807/10000, loss: 0.00194705941248685122
Episode Reward: 2.0
Step 397 (198243) @ Episode 808/10000, loss: 0.00031899104942567647
Episode Reward: 4.0
Step 168 (198411) @ Episode 809/10000, loss: 0.00148686091415584094
Episode Reward: 0.0
Step 172 (198583) @ Episode 810/10000, loss: 0.00075528281740844253
Episode Reward: 0.0
Step 277 (198860) @ Episode 811/10000, loss: 0.00025037003797478974
Episode Reward: 2.0
Step 325 (199185) @ Episode 812/

[2017-11-04 17:19:15,439] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000850.mp4


Step 220 (209579) @ Episode 851/10000, loss: 0.00076294247992336752
Episode Reward: 1.0
Step 175 (209754) @ Episode 852/10000, loss: 0.00039756472688168287
Episode Reward: 0.0
Step 242 (209996) @ Episode 853/10000, loss: 0.00022263766732066873
Episode Reward: 1.0
Step 3 (209999) @ Episode 854/10000, loss: 8.930838521337137e-05
 Copied model parameters to target network
Step 216 (210212) @ Episode 854/10000, loss: 0.00058767921291291715
Episode Reward: 1.0
Step 204 (210416) @ Episode 855/10000, loss: 0.00161744072102010254
Episode Reward: 0.0
Step 180 (210596) @ Episode 856/10000, loss: 0.00058398250257596376
Episode Reward: 0.0
Step 164 (210760) @ Episode 857/10000, loss: 9.09668451640755e-0534
Episode Reward: 0.0
Step 393 (211153) @ Episode 858/10000, loss: 0.00111379683949053296
Episode Reward: 4.0
Step 242 (211395) @ Episode 859/10000, loss: 0.00046947621740400794
Episode Reward: 1.0
Step 167 (211562) @ Episode 860/10000, loss: 0.00042423751438036565
Episode Reward: 0.0
Step 199 (21

[2017-11-04 17:21:01,978] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000900.mp4


Step 164 (221103) @ Episode 901/10000, loss: 0.00017733842832967647
Episode Reward: 0.0
Step 169 (221272) @ Episode 902/10000, loss: 0.00054257432930171496
Episode Reward: 0.0
Step 169 (221441) @ Episode 903/10000, loss: 0.00021995211136527364
Episode Reward: 0.0
Step 174 (221615) @ Episode 904/10000, loss: 0.00036758324131369599
Episode Reward: 0.0
Step 270 (221885) @ Episode 905/10000, loss: 0.00093306700000539425
Episode Reward: 2.0
Step 236 (222121) @ Episode 906/10000, loss: 0.00085005251457914718
Episode Reward: 1.0
Step 240 (222361) @ Episode 907/10000, loss: 0.00039464957080781466
Episode Reward: 1.0
Step 165 (222526) @ Episode 908/10000, loss: 0.00020242558093741536
Episode Reward: 0.0
Step 294 (222820) @ Episode 909/10000, loss: 0.00043897141586057846
Episode Reward: 2.0
Step 177 (222997) @ Episode 910/10000, loss: 0.00050686392933130266
Episode Reward: 0.0
Step 401 (223398) @ Episode 911/10000, loss: 0.00013595377095043662
Episode Reward: 4.0
Step 170 (223568) @ Episode 912/

[2017-11-04 17:22:55,160] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video000950.mp4


Step 184 (233208) @ Episode 951/10000, loss: 0.00090099556837230927
Episode Reward: 0.0
Step 335 (233543) @ Episode 952/10000, loss: 0.00142958050128072532
Episode Reward: 3.0
Step 186 (233729) @ Episode 953/10000, loss: 0.00333799910731613645
Episode Reward: 0.0
Step 181 (233910) @ Episode 954/10000, loss: 0.00086406164336949594
Episode Reward: 0.0
Step 186 (234096) @ Episode 955/10000, loss: 0.00050535169430077087
Episode Reward: 0.0
Step 179 (234275) @ Episode 956/10000, loss: 0.00053766561904922134
Episode Reward: 0.0
Step 303 (234578) @ Episode 957/10000, loss: 0.00023939243692439054
Episode Reward: 2.0
Step 220 (234798) @ Episode 958/10000, loss: 0.00013519011554308236
Episode Reward: 1.0
Step 169 (234967) @ Episode 959/10000, loss: 0.00084991287440061574
Episode Reward: 0.0
Step 276 (235243) @ Episode 960/10000, loss: 0.00056871859123930344
Episode Reward: 2.0
Step 193 (235436) @ Episode 961/10000, loss: 0.00073785689892247325
Episode Reward: 0.0
Step 161 (235597) @ Episode 962/

[2017-11-04 17:24:48,255] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001000.mp4


Step 213 (245513) @ Episode 1001/10000, loss: 0.00012105189671274275
Episode Reward: 0.0
Step 242 (245755) @ Episode 1002/10000, loss: 0.00071939080953598023
Episode Reward: 1.0
Step 317 (246072) @ Episode 1003/10000, loss: 0.00010782494791783392
Episode Reward: 2.0
Step 177 (246249) @ Episode 1004/10000, loss: 0.00085658911848440771
Episode Reward: 0.0
Step 254 (246503) @ Episode 1005/10000, loss: 0.00031361222499981525
Episode Reward: 2.0
Step 175 (246678) @ Episode 1006/10000, loss: 0.00018233961600344628
Episode Reward: 0.0
Step 266 (246944) @ Episode 1007/10000, loss: 0.00012048200005665421
Episode Reward: 1.0
Step 335 (247279) @ Episode 1008/10000, loss: 0.00202202051877975464
Episode Reward: 3.0
Step 281 (247560) @ Episode 1009/10000, loss: 0.00025720882695168257
Episode Reward: 2.0
Step 193 (247753) @ Episode 1010/10000, loss: 0.00087110779713839295
Episode Reward: 0.0
Step 356 (248109) @ Episode 1011/10000, loss: 0.00010800758900586516
Episode Reward: 3.0
Step 235 (248344) @ E

[2017-11-04 17:26:44,304] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001050.mp4


Step 185 (258088) @ Episode 1051/10000, loss: 8.123601583065465e-056
Episode Reward: 0.0
Step 307 (258395) @ Episode 1052/10000, loss: 0.00060886412393301734
Episode Reward: 3.0
Step 403 (258798) @ Episode 1053/10000, loss: 0.00042382060200907293
Episode Reward: 4.0
Step 191 (258989) @ Episode 1054/10000, loss: 0.00019397535652387887
Episode Reward: 0.0
Step 338 (259327) @ Episode 1055/10000, loss: 0.00047299210564233364
Episode Reward: 3.0
Step 192 (259519) @ Episode 1056/10000, loss: 0.00011806437396444384
Episode Reward: 0.0
Step 194 (259713) @ Episode 1057/10000, loss: 0.00025073249707929791
Episode Reward: 0.0
Step 215 (259928) @ Episode 1058/10000, loss: 0.00028362561715766796
Episode Reward: 1.0
Step 71 (259999) @ Episode 1059/10000, loss: 0.00296598603017628245
 Copied model parameters to target network
Step 308 (260236) @ Episode 1059/10000, loss: 0.00098099978640675545
Episode Reward: 2.0
Step 253 (260489) @ Episode 1060/10000, loss: 0.00017817252955865115
Episode Reward: 1.0

[2017-11-04 17:28:40,551] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001100.mp4


Step 231 (270714) @ Episode 1101/10000, loss: 0.00048899766989052354
Episode Reward: 1.0
Step 309 (271023) @ Episode 1102/10000, loss: 9.107861842494458e-057
Episode Reward: 2.0
Step 181 (271204) @ Episode 1103/10000, loss: 0.00020489323651418093
Episode Reward: 0.0
Step 428 (271632) @ Episode 1104/10000, loss: 0.00032761821057647467
Episode Reward: 4.0
Step 223 (271855) @ Episode 1105/10000, loss: 0.00127341691404581071
Episode Reward: 1.0
Step 207 (272062) @ Episode 1106/10000, loss: 0.00159010000061243776
Episode Reward: 1.0
Step 364 (272426) @ Episode 1107/10000, loss: 5.909327228437178e-052
Episode Reward: 4.0
Step 323 (272749) @ Episode 1108/10000, loss: 0.00026532268384471536
Episode Reward: 2.0
Step 195 (272944) @ Episode 1109/10000, loss: 0.00221257796511054046
Episode Reward: 0.0
Step 224 (273168) @ Episode 1110/10000, loss: 0.00041145147406496115
Episode Reward: 1.0
Step 282 (273450) @ Episode 1111/10000, loss: 0.00461653154343366683
Episode Reward: 2.0
Step 281 (273731) @ E

[2017-11-04 17:30:39,719] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001150.mp4


Step 355 (283792) @ Episode 1151/10000, loss: 0.00063994515221565964
Episode Reward: 3.0
Step 272 (284064) @ Episode 1152/10000, loss: 0.00113154563587158926
Episode Reward: 2.0
Step 260 (284324) @ Episode 1153/10000, loss: 0.00041876704199239617
Episode Reward: 1.0
Step 327 (284651) @ Episode 1154/10000, loss: 0.00074967317050322899
Episode Reward: 3.0
Step 364 (285015) @ Episode 1155/10000, loss: 0.00169674132484942672
Episode Reward: 4.0
Step 211 (285226) @ Episode 1156/10000, loss: 0.00108727719634771352
Episode Reward: 1.0
Step 173 (285399) @ Episode 1157/10000, loss: 0.00020188037888146937
Episode Reward: 0.0
Step 343 (285742) @ Episode 1158/10000, loss: 0.00014491977344732732
Episode Reward: 3.0
Step 429 (286171) @ Episode 1159/10000, loss: 0.00025503218057565394
Episode Reward: 4.0
Step 284 (286455) @ Episode 1160/10000, loss: 0.00194067310076206925
Episode Reward: 2.0
Step 402 (286857) @ Episode 1161/10000, loss: 0.00016084618982858956
Episode Reward: 4.0
Step 379 (287236) @ E

[2017-11-04 17:32:49,340] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001200.mp4


Step 235 (297656) @ Episode 1201/10000, loss: 0.00031521407072432346
Episode Reward: 1.0
Step 225 (297881) @ Episode 1202/10000, loss: 0.00044732572860084474
Episode Reward: 1.0
Step 222 (298103) @ Episode 1203/10000, loss: 0.00022781889128964394
Episode Reward: 0.0
Step 243 (298346) @ Episode 1204/10000, loss: 0.00211378559470176722
Episode Reward: 1.0
Step 237 (298583) @ Episode 1205/10000, loss: 0.00021093280520290136
Episode Reward: 1.0
Step 298 (298881) @ Episode 1206/10000, loss: 9.318927914137021e-053
Episode Reward: 2.0
Step 223 (299104) @ Episode 1207/10000, loss: 0.00068958057090640076
Episode Reward: 1.0
Step 340 (299444) @ Episode 1208/10000, loss: 0.00187641347292810686
Episode Reward: 3.0
Step 244 (299688) @ Episode 1209/10000, loss: 0.00046034634578973055
Episode Reward: 1.0
Step 309 (299997) @ Episode 1210/10000, loss: 0.00038762562326155603
Episode Reward: 2.0
Step 2 (299999) @ Episode 1211/10000, loss: 0.0003385043819434941
 Copied model parameters to target network
S

[2017-11-04 17:34:53,757] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001250.mp4


Step 183 (311122) @ Episode 1251/10000, loss: 0.00100531789939850577
Episode Reward: 0.0
Step 272 (311394) @ Episode 1252/10000, loss: 0.00193608645349740987
Episode Reward: 2.0
Step 221 (311615) @ Episode 1253/10000, loss: 0.00039821868995204574
Episode Reward: 1.0
Step 319 (311934) @ Episode 1254/10000, loss: 0.00140383373945951462
Episode Reward: 2.0
Step 388 (312322) @ Episode 1255/10000, loss: 0.00215642852708697326
Episode Reward: 4.0
Step 430 (312752) @ Episode 1256/10000, loss: 0.00055556511506438265
Episode Reward: 5.0
Step 502 (313254) @ Episode 1257/10000, loss: 0.00058506045024842023
Episode Reward: 5.0
Step 348 (313602) @ Episode 1258/10000, loss: 0.00136604614090174444
Episode Reward: 3.0
Step 258 (313860) @ Episode 1259/10000, loss: 0.00024880334967747335
Episode Reward: 1.0
Step 212 (314072) @ Episode 1260/10000, loss: 0.00025177441420964897
Episode Reward: 0.0
Step 161 (314233) @ Episode 1261/10000, loss: 0.00023726522340439262
Episode Reward: 0.0
Step 298 (314531) @ E

[2017-11-04 17:37:05,398] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001300.mp4


Step 220 (325475) @ Episode 1301/10000, loss: 0.00111790047958493234
Episode Reward: 0.0
Step 262 (325737) @ Episode 1302/10000, loss: 0.00020900524395983666
Episode Reward: 2.0
Step 212 (325949) @ Episode 1303/10000, loss: 0.00114345108158886433
Episode Reward: 1.0
Step 366 (326315) @ Episode 1304/10000, loss: 0.00076543173054233195
Episode Reward: 3.0
Step 314 (326629) @ Episode 1305/10000, loss: 0.00064041384030133496
Episode Reward: 2.0
Step 174 (326803) @ Episode 1306/10000, loss: 0.00110202503856271545
Episode Reward: 0.0
Step 198 (327001) @ Episode 1307/10000, loss: 0.00042333159944973886
Episode Reward: 0.0
Step 347 (327348) @ Episode 1308/10000, loss: 0.00092895410489290955
Episode Reward: 3.0
Step 208 (327556) @ Episode 1309/10000, loss: 0.00025470688706263953
Episode Reward: 1.0
Step 170 (327726) @ Episode 1310/10000, loss: 0.00045083274017088115
Episode Reward: 0.0
Step 208 (327934) @ Episode 1311/10000, loss: 0.00029093917692080148
Episode Reward: 0.0
Step 266 (328200) @ E

[2017-11-04 17:39:13,000] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001350.mp4


Step 372 (339720) @ Episode 1351/10000, loss: 0.00062192021869122987
Episode Reward: 3.0
Step 279 (339999) @ Episode 1352/10000, loss: 0.00435321405529975987
 Copied model parameters to target network
Step 351 (340071) @ Episode 1352/10000, loss: 0.00784769933670759253
Episode Reward: 4.0
Step 429 (340500) @ Episode 1353/10000, loss: 0.00048759146011434495
Episode Reward: 5.0
Step 459 (340959) @ Episode 1354/10000, loss: 0.00305023021064698755
Episode Reward: 5.0
Step 356 (341315) @ Episode 1355/10000, loss: 0.00055770663311704994
Episode Reward: 3.0
Step 343 (341658) @ Episode 1356/10000, loss: 0.00164159806445240973
Episode Reward: 3.0
Step 457 (342115) @ Episode 1357/10000, loss: 0.00023026371491141617
Episode Reward: 5.0
Step 226 (342341) @ Episode 1358/10000, loss: 0.00095352064818143847
Episode Reward: 1.0
Step 345 (342686) @ Episode 1359/10000, loss: 0.00016612434410490096
Episode Reward: 3.0
Step 567 (343253) @ Episode 1360/10000, loss: 0.00068097439361736188
Episode Reward: 7.

[2017-11-04 17:41:51,083] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001400.mp4


Step 350 (357366) @ Episode 1401/10000, loss: 0.00081971869803965095
Episode Reward: 3.0
Step 382 (357748) @ Episode 1402/10000, loss: 0.00030419169343076646
Episode Reward: 4.0
Step 320 (358068) @ Episode 1403/10000, loss: 0.00087926606647670272
Episode Reward: 3.0
Step 270 (358338) @ Episode 1404/10000, loss: 0.00078485545236617334
Episode Reward: 2.0
Step 477 (358815) @ Episode 1405/10000, loss: 0.00045872016926296055
Episode Reward: 5.0
Step 426 (359241) @ Episode 1406/10000, loss: 0.00055410107597708716
Episode Reward: 5.0
Step 421 (359662) @ Episode 1407/10000, loss: 0.00312338164076209073
Episode Reward: 5.0
Step 337 (359999) @ Episode 1408/10000, loss: 0.00056761724408715964
 Copied model parameters to target network
Step 412 (360074) @ Episode 1408/10000, loss: 0.00023987462918739766
Episode Reward: 4.0
Step 593 (360667) @ Episode 1409/10000, loss: 0.00192808744031935933
Episode Reward: 7.0
Step 500 (361167) @ Episode 1410/10000, loss: 0.00051603972679004074
Episode Reward: 6.

[2017-11-04 17:44:57,132] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001450.mp4


Step 309 (378580) @ Episode 1451/10000, loss: 0.00037581671494990587
Episode Reward: 3.0
Step 674 (379254) @ Episode 1452/10000, loss: 0.00056173437042161823
Episode Reward: 11.0
Step 459 (379713) @ Episode 1453/10000, loss: 0.00523214275017380776
Episode Reward: 5.0
Step 286 (379999) @ Episode 1454/10000, loss: 0.00261398334987461575
 Copied model parameters to target network
Step 472 (380185) @ Episode 1454/10000, loss: 0.00125898921396583327
Episode Reward: 6.0
Step 449 (380634) @ Episode 1455/10000, loss: 0.00128634157590568074
Episode Reward: 5.0
Step 543 (381177) @ Episode 1456/10000, loss: 0.00064326159190386536
Episode Reward: 6.0
Step 370 (381547) @ Episode 1457/10000, loss: 0.00871228054165840125
Episode Reward: 4.0
Step 562 (382109) @ Episode 1458/10000, loss: 0.00065698754042387013
Episode Reward: 5.0
Step 480 (382589) @ Episode 1459/10000, loss: 0.00184887752402573824
Episode Reward: 6.0
Step 357 (382946) @ Episode 1460/10000, loss: 0.00098010711371898658
Episode Reward: 3

[2017-11-04 17:48:25,730] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001500.mp4


Step 454 (402645) @ Episode 1501/10000, loss: 0.00044097995851188916
Episode Reward: 6.0
Step 545 (403190) @ Episode 1502/10000, loss: 0.00078764604404568677
Episode Reward: 6.0
Step 416 (403606) @ Episode 1503/10000, loss: 0.00225125811994075784
Episode Reward: 6.0
Step 664 (404270) @ Episode 1504/10000, loss: 0.00223504239693284034
Episode Reward: 8.0
Step 411 (404681) @ Episode 1505/10000, loss: 0.00252436846494674747
Episode Reward: 5.0
Step 508 (405189) @ Episode 1506/10000, loss: 0.00075944117270410066
Episode Reward: 6.0
Step 543 (405732) @ Episode 1507/10000, loss: 0.00048649945529177785
Episode Reward: 6.0
Step 612 (406344) @ Episode 1508/10000, loss: 0.00204989337362349036
Episode Reward: 8.0
Step 645 (406989) @ Episode 1509/10000, loss: 0.00045037042582407594
Episode Reward: 8.0
Step 607 (407596) @ Episode 1510/10000, loss: 0.00141619204077869654
Episode Reward: 8.0
Step 622 (408218) @ Episode 1511/10000, loss: 0.00382013013586401946
Episode Reward: 8.0
Step 389 (408607) @ E

[2017-11-04 17:52:26,627] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001550.mp4


Step 313 (429999) @ Episode 1551/10000, loss: 0.00065637892112135894
 Copied model parameters to target network
Step 515 (430201) @ Episode 1551/10000, loss: 0.00785540975630283486
Episode Reward: 7.0
Step 543 (430744) @ Episode 1552/10000, loss: 0.01108171511441469225
Episode Reward: 7.0
Step 523 (431267) @ Episode 1553/10000, loss: 0.00101985794026404627
Episode Reward: 6.0
Step 528 (431795) @ Episode 1554/10000, loss: 0.00227865274064242843
Episode Reward: 6.0
Step 510 (432305) @ Episode 1555/10000, loss: 0.00083226716378703715
Episode Reward: 5.0
Step 622 (432927) @ Episode 1556/10000, loss: 0.01059809699654579243
Episode Reward: 9.0
Step 636 (433563) @ Episode 1557/10000, loss: 0.00259350379928946524
Episode Reward: 7.0
Step 724 (434287) @ Episode 1558/10000, loss: 0.00244317809119820674
Episode Reward: 10.0
Step 645 (434932) @ Episode 1559/10000, loss: 0.00096516311168670655
Episode Reward: 9.0
Step 655 (435587) @ Episode 1560/10000, loss: 0.00130599306430667644
Episode Reward: 1

[2017-11-04 17:57:58,722] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001600.mp4


Step 792 (467924) @ Episode 1601/10000, loss: 0.00264182733371853835
Episode Reward: 10.0
Step 692 (468616) @ Episode 1602/10000, loss: 0.00290880491957068444
Episode Reward: 8.0
Step 862 (469478) @ Episode 1603/10000, loss: 0.00474187731742858924
Episode Reward: 18.0
Step 521 (469999) @ Episode 1604/10000, loss: 0.00317810382694005974
 Copied model parameters to target network
Step 869 (470347) @ Episode 1604/10000, loss: 0.00218910910189151766
Episode Reward: 15.0
Step 526 (470873) @ Episode 1605/10000, loss: 0.00218551768921315674
Episode Reward: 6.0
Step 874 (471747) @ Episode 1606/10000, loss: 0.00399695243686437634
Episode Reward: 16.0
Step 731 (472478) @ Episode 1607/10000, loss: 0.00049311073962599046
Episode Reward: 13.0
Step 728 (473206) @ Episode 1608/10000, loss: 0.00137550430372357375
Episode Reward: 11.0
Step 935 (474141) @ Episode 1609/10000, loss: 0.00180404330603778367
Episode Reward: 14.0
Step 780 (474921) @ Episode 1610/10000, loss: 0.00250393222086131575
Episode Rew

[2017-11-04 18:04:04,526] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001650.mp4


Step 666 (508612) @ Episode 1651/10000, loss: 0.00326832034625113966
Episode Reward: 11.0
Step 625 (509237) @ Episode 1652/10000, loss: 0.00124973803758621224
Episode Reward: 10.0
Step 762 (509999) @ Episode 1653/10000, loss: 0.00124375673476606664
 Copied model parameters to target network
Step 795 (510032) @ Episode 1653/10000, loss: 0.0105529129505157475
Episode Reward: 17.0
Step 812 (510844) @ Episode 1654/10000, loss: 0.0023215520195662975
Episode Reward: 13.0
Step 782 (511626) @ Episode 1655/10000, loss: 0.00533791678026318555
Episode Reward: 13.0
Step 846 (512472) @ Episode 1656/10000, loss: 0.00391224399209022587
Episode Reward: 16.0
Step 647 (513119) @ Episode 1657/10000, loss: 0.0028300131671130657
Episode Reward: 10.0
Step 963 (514082) @ Episode 1658/10000, loss: 0.00496996054425835675
Episode Reward: 21.0
Step 596 (514678) @ Episode 1659/10000, loss: 0.00212489860132336655
Episode Reward: 13.0
Step 717 (515395) @ Episode 1660/10000, loss: 0.00101002142764627934
Episode Rewa

[2017-11-04 18:10:17,227] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001700.mp4


Step 680 (549991) @ Episode 1701/10000, loss: 0.0357096269726753233
Episode Reward: 14.0
Step 8 (549999) @ Episode 1702/10000, loss: 0.0023735018912702817
 Copied model parameters to target network
Step 868 (550859) @ Episode 1702/10000, loss: 0.0132257081568241124
Episode Reward: 21.0
Step 967 (551826) @ Episode 1703/10000, loss: 0.0008348650881089274
Episode Reward: 20.0
Step 751 (552577) @ Episode 1704/10000, loss: 0.01071628369390964556
Episode Reward: 15.0
Step 968 (553545) @ Episode 1705/10000, loss: 0.00407147733494639447
Episode Reward: 17.0
Step 715 (554260) @ Episode 1706/10000, loss: 0.0011032656766474247
Episode Reward: 11.0
Step 890 (555150) @ Episode 1707/10000, loss: 0.00117790489457547666
Episode Reward: 14.0
Step 773 (555923) @ Episode 1708/10000, loss: 0.00083251821342855696
Episode Reward: 12.0
Step 769 (556692) @ Episode 1709/10000, loss: 0.00250006257556378847
Episode Reward: 13.0
Step 872 (557564) @ Episode 1710/10000, loss: 0.0020228973589837553
Episode Reward: 1

[2017-11-04 18:16:42,806] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001750.mp4


Step 954 (593116) @ Episode 1751/10000, loss: 0.00127430888824164873
Episode Reward: 19.0
Step 807 (593923) @ Episode 1752/10000, loss: 0.0016359977889806032
Episode Reward: 14.0
Step 1108 (595031) @ Episode 1753/10000, loss: 0.0014475395437330008
Episode Reward: 19.0
Step 1442 (596473) @ Episode 1754/10000, loss: 0.0018244474194943905
Episode Reward: 33.0
Step 1346 (597819) @ Episode 1755/10000, loss: 0.00140199041925370723
Episode Reward: 30.0
Step 786 (598605) @ Episode 1756/10000, loss: 0.00092482578475028287
Episode Reward: 13.0
Step 1394 (599999) @ Episode 1757/10000, loss: 0.0010218662209808826
 Copied model parameters to target network
Step 1545 (600150) @ Episode 1757/10000, loss: 0.0115452697500586516
Episode Reward: 35.0
Step 807 (600957) @ Episode 1758/10000, loss: 0.0025508166290819645
Episode Reward: 18.0
Step 761 (601718) @ Episode 1759/10000, loss: 0.00144963059574365623
Episode Reward: 13.0
Step 1324 (603042) @ Episode 1760/10000, loss: 0.00115101970732212077
Episode R

[2017-11-04 18:23:49,656] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001800.mp4


Step 339 (639999) @ Episode 1801/10000, loss: 0.00187475432176142935
 Copied model parameters to target network
Step 797 (640457) @ Episode 1801/10000, loss: 0.0017379745841026306
Episode Reward: 19.0
Step 728 (641185) @ Episode 1802/10000, loss: 0.00434535555541515355
Episode Reward: 12.0
Step 903 (642088) @ Episode 1803/10000, loss: 0.00216683326289057734
Episode Reward: 18.0
Step 785 (642873) @ Episode 1804/10000, loss: 0.00171170278917998084
Episode Reward: 13.0
Step 961 (643834) @ Episode 1805/10000, loss: 0.00081215251702815295
Episode Reward: 24.0
Step 921 (644755) @ Episode 1806/10000, loss: 0.00336827477440238744
Episode Reward: 15.0
Step 905 (645660) @ Episode 1807/10000, loss: 0.00162524171173572546
Episode Reward: 19.0
Step 1015 (646675) @ Episode 1808/10000, loss: 0.0037778252735733986
Episode Reward: 22.0
Step 914 (647589) @ Episode 1809/10000, loss: 0.00486887060105800646
Episode Reward: 20.0
Step 1418 (649007) @ Episode 1810/10000, loss: 0.00175373500678688293
Episode R

[2017-11-04 18:31:03,124] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001850.mp4


Step 1053 (688973) @ Episode 1851/10000, loss: 0.0057131489738821989
Episode Reward: 21.0
Step 779 (689752) @ Episode 1852/10000, loss: 0.03285607323050499177
Episode Reward: 13.0
Step 247 (689999) @ Episode 1853/10000, loss: 0.00529812090098857965
 Copied model parameters to target network
Step 924 (690676) @ Episode 1853/10000, loss: 0.0015462700976058842
Episode Reward: 18.0
Step 895 (691571) @ Episode 1854/10000, loss: 0.00072713172994554044
Episode Reward: 16.0
Step 1116 (692687) @ Episode 1855/10000, loss: 0.0010943206725642085
Episode Reward: 19.0
Step 840 (693527) @ Episode 1856/10000, loss: 0.00471498258411884325
Episode Reward: 21.0
Step 1069 (694596) @ Episode 1857/10000, loss: 0.0007132332539185882
Episode Reward: 19.0
Step 663 (695259) @ Episode 1858/10000, loss: 0.00097669428214430815
Episode Reward: 8.0
Step 631 (695890) @ Episode 1859/10000, loss: 0.00856091547757387274
Episode Reward: 10.0
Step 869 (696759) @ Episode 1860/10000, loss: 0.00109121017158031465
Episode Rew

[2017-11-04 18:37:54,510] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001900.mp4


Step 1021 (734752) @ Episode 1901/10000, loss: 0.0020886461716145277
Episode Reward: 21.0
Step 931 (735683) @ Episode 1902/10000, loss: 0.02078149840235713476
Episode Reward: 21.0
Step 1053 (736736) @ Episode 1903/10000, loss: 0.0013559279032051563
Episode Reward: 18.0
Step 998 (737734) @ Episode 1904/10000, loss: 0.0014682461041957145
Episode Reward: 22.0
Step 1201 (738935) @ Episode 1905/10000, loss: 0.0383612439036369373
Episode Reward: 26.0
Step 802 (739737) @ Episode 1906/10000, loss: 0.00192569650243967773
Episode Reward: 14.0
Step 262 (739999) @ Episode 1907/10000, loss: 0.0032346202060580254
 Copied model parameters to target network
Step 845 (740582) @ Episode 1907/10000, loss: 0.00171670189592987326
Episode Reward: 24.0
Step 816 (741398) @ Episode 1908/10000, loss: 0.00208701076917350344
Episode Reward: 18.0
Step 894 (742292) @ Episode 1909/10000, loss: 0.0021561481989920147
Episode Reward: 20.0
Step 643 (742935) @ Episode 1910/10000, loss: 0.0021820773836225279
Episode Rewar

[2017-11-04 18:44:59,679] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video001950.mp4


Step 910 (781889) @ Episode 1951/10000, loss: 0.0289425905793905268
Episode Reward: 15.0
Step 510 (782399) @ Episode 1952/10000, loss: 0.0028551616705954075
Episode Reward: 7.0
Step 700 (783099) @ Episode 1953/10000, loss: 0.0017428551800549034
Episode Reward: 12.0
Step 948 (784047) @ Episode 1954/10000, loss: 0.0021862282883375883
Episode Reward: 17.0
Step 1014 (785061) @ Episode 1955/10000, loss: 0.0206310693174600695
Episode Reward: 23.0
Step 1097 (786158) @ Episode 1956/10000, loss: 0.0037858304567635065
Episode Reward: 17.0
Step 646 (786804) @ Episode 1957/10000, loss: 0.00204508937895298377
Episode Reward: 10.0
Step 848 (787652) @ Episode 1958/10000, loss: 0.00495630968362093427
Episode Reward: 21.0
Step 974 (788626) @ Episode 1959/10000, loss: 0.00339075503870844845
Episode Reward: 20.0
Step 1151 (789777) @ Episode 1960/10000, loss: 0.0012372653000056744
Episode Reward: 27.0
Step 222 (789999) @ Episode 1961/10000, loss: 0.00145878433249890814
 Copied model parameters to target n

[2017-11-04 18:51:42,146] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002000.mp4


Step 1170 (826612) @ Episode 2001/10000, loss: 0.0065606283023953447
Episode Reward: 21.0
Step 709 (827321) @ Episode 2002/10000, loss: 0.01189628336578607686
Episode Reward: 11.0
Step 645 (827966) @ Episode 2003/10000, loss: 0.00154683133587241174
Episode Reward: 13.0
Step 901 (828867) @ Episode 2004/10000, loss: 0.00255071325227618236
Episode Reward: 15.0
Step 667 (829534) @ Episode 2005/10000, loss: 0.00368109717965126047
Episode Reward: 13.0
Step 465 (829999) @ Episode 2006/10000, loss: 0.0133148021996021277
 Copied model parameters to target network
Step 845 (830379) @ Episode 2006/10000, loss: 0.0016447800444439054
Episode Reward: 13.0
Step 1252 (831631) @ Episode 2007/10000, loss: 0.0109896399080753336
Episode Reward: 24.0
Step 1110 (832741) @ Episode 2008/10000, loss: 0.0026279801968485117
Episode Reward: 21.0
Step 882 (833623) @ Episode 2009/10000, loss: 0.0016404251800850034
Episode Reward: 14.0
Step 762 (834385) @ Episode 2010/10000, loss: 0.0026231436058878935
Episode Rewar

[2017-11-04 18:58:11,688] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002050.mp4


Step 911 (869756) @ Episode 2051/10000, loss: 0.0016671664780005813
Episode Reward: 17.0
Step 243 (869999) @ Episode 2052/10000, loss: 0.0018522131722420454
 Copied model parameters to target network
Step 735 (870491) @ Episode 2052/10000, loss: 0.0434286110103130347
Episode Reward: 14.0
Step 1023 (871514) @ Episode 2053/10000, loss: 0.0048179505392909057
Episode Reward: 19.0
Step 563 (872077) @ Episode 2054/10000, loss: 0.0108549576252698943
Episode Reward: 7.0
Step 864 (872941) @ Episode 2055/10000, loss: 0.0244668144732713757
Episode Reward: 16.0
Step 760 (873701) @ Episode 2056/10000, loss: 0.0108874775469303135
Episode Reward: 13.0
Step 693 (874394) @ Episode 2057/10000, loss: 0.0018978278385475278
Episode Reward: 14.0
Step 1422 (875816) @ Episode 2058/10000, loss: 0.0052253077737987044
Episode Reward: 31.0
Step 1019 (876835) @ Episode 2059/10000, loss: 0.0076760854572057727
Episode Reward: 31.0
Step 1003 (877838) @ Episode 2060/10000, loss: 0.0075743240304291255
Episode Reward: 2

[2017-11-04 19:05:18,642] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002100.mp4


Step 1301 (917599) @ Episode 2101/10000, loss: 0.0035948022268712525
Episode Reward: 25.0
Step 1289 (918888) @ Episode 2102/10000, loss: 0.0087875062599778185
Episode Reward: 27.0
Step 1111 (919999) @ Episode 2103/10000, loss: 0.0055216373875737195
 Copied model parameters to target network
Step 1114 (920002) @ Episode 2103/10000, loss: 0.0019459335599094635
Episode Reward: 30.0
Step 901 (920903) @ Episode 2104/10000, loss: 0.0024203108623623858
Episode Reward: 22.0
Step 1079 (921982) @ Episode 2105/10000, loss: 0.0026529207825660706
Episode Reward: 24.0
Step 983 (922965) @ Episode 2106/10000, loss: 0.0034515094012022023
Episode Reward: 20.0
Step 1400 (924365) @ Episode 2107/10000, loss: 0.0053131161257624636
Episode Reward: 36.0
Step 1269 (925634) @ Episode 2108/10000, loss: 0.0064114518463611683
Episode Reward: 25.0
Step 1330 (926964) @ Episode 2109/10000, loss: 0.0064080641604959965
Episode Reward: 27.0
Step 764 (927728) @ Episode 2110/10000, loss: 0.0055478117428729572
Episode Rewa

[2017-11-04 19:12:52,082] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002150.mp4


Step 1195 (968536) @ Episode 2151/10000, loss: 0.0031237627845257529
Episode Reward: 31.0
Step 678 (969214) @ Episode 2152/10000, loss: 0.0044282693415880235
Episode Reward: 10.0
Step 785 (969999) @ Episode 2153/10000, loss: 0.0089915283024311073
 Copied model parameters to target network
Step 1384 (970598) @ Episode 2153/10000, loss: 0.0126293236389756235
Episode Reward: 56.0
Step 793 (971391) @ Episode 2154/10000, loss: 0.0105248466134071354
Episode Reward: 13.0
Step 1332 (972723) @ Episode 2155/10000, loss: 0.0211042333394289316
Episode Reward: 31.0
Step 1060 (973783) @ Episode 2156/10000, loss: 0.0088960658758878795
Episode Reward: 21.0
Step 1252 (975035) @ Episode 2157/10000, loss: 0.0024682981893420224
Episode Reward: 35.0
Step 1214 (976249) @ Episode 2158/10000, loss: 0.0025715788360685117
Episode Reward: 19.0
Step 808 (977057) @ Episode 2159/10000, loss: 0.0070543047040700915
Episode Reward: 17.0
Step 1013 (978070) @ Episode 2160/10000, loss: 0.0304396916180849086
Episode Rewar

[2017-11-04 19:20:48,717] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002200.mp4


Step 966 (1021244) @ Episode 2201/10000, loss: 0.0074922316707670696
Episode Reward: 17.0
Step 1255 (1022499) @ Episode 2202/10000, loss: 0.0048772427253425122
Episode Reward: 37.0
Step 1383 (1023882) @ Episode 2203/10000, loss: 0.0189033225178718578
Episode Reward: 27.0
Step 816 (1024698) @ Episode 2204/10000, loss: 0.0125466054305434235
Episode Reward: 18.0
Step 934 (1025632) @ Episode 2205/10000, loss: 0.00697492854669690155
Episode Reward: 16.0
Step 872 (1026504) @ Episode 2206/10000, loss: 0.0020131017081439495
Episode Reward: 21.0
Step 860 (1027364) @ Episode 2207/10000, loss: 0.0097432080656290054
Episode Reward: 21.0
Step 954 (1028318) @ Episode 2208/10000, loss: 0.0031764626037329435
Episode Reward: 21.0
Step 903 (1029221) @ Episode 2209/10000, loss: 0.0047123730182647705
Episode Reward: 16.0
Step 778 (1029999) @ Episode 2210/10000, loss: 0.0153043214231729545
 Copied model parameters to target network
Step 1024 (1030245) @ Episode 2210/10000, loss: 0.0281267631798982627
Episo

[2017-11-04 19:28:29,922] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002250.mp4


Step 1189 (1072995) @ Episode 2251/10000, loss: 0.0058120060712099075
Episode Reward: 20.0
Step 998 (1073993) @ Episode 2252/10000, loss: 0.0049238940700888633
Episode Reward: 29.0
Step 1397 (1075390) @ Episode 2253/10000, loss: 0.0039123473688960075
Episode Reward: 33.0
Step 1282 (1076672) @ Episode 2254/10000, loss: 0.0047966754063963895
Episode Reward: 27.0
Step 813 (1077485) @ Episode 2255/10000, loss: 0.0031343123409897094
Episode Reward: 14.0
Step 1418 (1078903) @ Episode 2256/10000, loss: 0.0056126266717910774
Episode Reward: 33.0
Step 1096 (1079999) @ Episode 2257/10000, loss: 0.0046326047740876675
 Copied model parameters to target network
Step 1198 (1080101) @ Episode 2257/10000, loss: 0.0044673108495771885
Episode Reward: 22.0
Step 1305 (1081406) @ Episode 2258/10000, loss: 0.0129668321460485466
Episode Reward: 30.0
Step 1116 (1082522) @ Episode 2259/10000, loss: 0.0104641234502196317
Episode Reward: 28.0
Step 1608 (1084130) @ Episode 2260/10000, loss: 0.0021904555615037687


[2017-11-04 19:36:42,771] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002300.mp4


Step 894 (1127769) @ Episode 2301/10000, loss: 0.0364310927689075585
Episode Reward: 21.0
Step 1385 (1129154) @ Episode 2302/10000, loss: 0.0057538971304893494
Episode Reward: 37.0
Step 845 (1129999) @ Episode 2303/10000, loss: 0.0134161533787846573
 Copied model parameters to target network
Step 921 (1130075) @ Episode 2303/10000, loss: 0.0039369710721075535
Episode Reward: 16.0
Step 1405 (1131480) @ Episode 2304/10000, loss: 0.0036340605001896623
Episode Reward: 33.0
Step 823 (1132303) @ Episode 2305/10000, loss: 0.0038216304965317253
Episode Reward: 14.0
Step 1217 (1133520) @ Episode 2306/10000, loss: 0.0141555778682231945
Episode Reward: 26.0
Step 494 (1134014) @ Episode 2307/10000, loss: 0.0100466217845678334
Episode Reward: 7.0
Step 1273 (1135287) @ Episode 2308/10000, loss: 0.0074746012687683105
Episode Reward: 29.0
Step 906 (1136193) @ Episode 2309/10000, loss: 0.0033567713107913733
Episode Reward: 16.0
Step 1095 (1137288) @ Episode 2310/10000, loss: 0.0091446368023753175
Episo

[2017-11-04 19:43:59,455] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002350.mp4


Step 1052 (1177165) @ Episode 2351/10000, loss: 0.0023847396951168776
Episode Reward: 17.0
Step 1057 (1178222) @ Episode 2352/10000, loss: 0.0137875555083155634
Episode Reward: 26.0
Step 958 (1179180) @ Episode 2353/10000, loss: 0.0046438663266599183
Episode Reward: 23.0
Step 819 (1179999) @ Episode 2354/10000, loss: 0.0048787007108330735
 Copied model parameters to target network
Step 927 (1180107) @ Episode 2354/10000, loss: 0.0062933694571256642
Episode Reward: 25.0
Step 1226 (1181333) @ Episode 2355/10000, loss: 0.0053750923834741116
Episode Reward: 22.0
Step 1029 (1182362) @ Episode 2356/10000, loss: 0.0160477757453918465
Episode Reward: 24.0
Step 1054 (1183416) @ Episode 2357/10000, loss: 0.0345045067369937954
Episode Reward: 20.0
Step 687 (1184103) @ Episode 2358/10000, loss: 0.0691755264997482355
Episode Reward: 14.0
Step 970 (1185073) @ Episode 2359/10000, loss: 0.0033855959773063664
Episode Reward: 18.0
Step 1184 (1186257) @ Episode 2360/10000, loss: 0.0717117786407470727
Epi

[2017-11-04 19:51:06,303] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002400.mp4


Step 605 (1223886) @ Episode 2401/10000, loss: 0.0039190752431750356
Episode Reward: 9.0
Step 1326 (1225212) @ Episode 2402/10000, loss: 0.0033926460891962056
Episode Reward: 31.0
Step 729 (1225941) @ Episode 2403/10000, loss: 0.0075327465310692797
Episode Reward: 12.0
Step 838 (1226779) @ Episode 2404/10000, loss: 0.0114368759095668835
Episode Reward: 17.0
Step 1358 (1228137) @ Episode 2405/10000, loss: 0.0055287242867052555
Episode Reward: 33.0
Step 750 (1228887) @ Episode 2406/10000, loss: 0.0150207234546542175
Episode Reward: 15.0
Step 480 (1229367) @ Episode 2407/10000, loss: 0.0124660115689039235
Episode Reward: 6.0
Step 632 (1229999) @ Episode 2408/10000, loss: 0.0109151694923639375
 Copied model parameters to target network
Step 758 (1230125) @ Episode 2408/10000, loss: 0.0730143189430236884
Episode Reward: 14.0
Step 857 (1230982) @ Episode 2409/10000, loss: 0.0145259033888578413
Episode Reward: 14.0
Step 590 (1231572) @ Episode 2410/10000, loss: 0.0063800960779190065
Episode R

[2017-11-04 19:57:36,752] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002450.mp4


Step 1226 (1267824) @ Episode 2451/10000, loss: 0.0106939924880862247
Episode Reward: 34.0
Step 1381 (1269205) @ Episode 2452/10000, loss: 0.0109222438186407095
Episode Reward: 33.0
Step 704 (1269909) @ Episode 2453/10000, loss: 0.0040212962776422564
Episode Reward: 12.0
Step 90 (1269999) @ Episode 2454/10000, loss: 0.0077080102637410165
 Copied model parameters to target network
Step 618 (1270527) @ Episode 2454/10000, loss: 0.0139473341405391765
Episode Reward: 11.0
Step 1042 (1271569) @ Episode 2455/10000, loss: 0.0076092928647994995
Episode Reward: 20.0
Step 1195 (1272764) @ Episode 2456/10000, loss: 0.0115096587687730794
Episode Reward: 21.0
Step 1567 (1274331) @ Episode 2457/10000, loss: 0.0057895402424037465
Episode Reward: 34.0
Step 884 (1275215) @ Episode 2458/10000, loss: 0.0102277807891368877
Episode Reward: 22.0
Step 884 (1276099) @ Episode 2459/10000, loss: 0.0107862809672951763
Episode Reward: 16.0
Step 1275 (1277374) @ Episode 2460/10000, loss: 0.0026997651439160117
Epis

[2017-11-04 20:04:54,748] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002500.mp4


Step 805 (1316043) @ Episode 2501/10000, loss: 0.0246911570429801944
Episode Reward: 17.0
Step 1377 (1317420) @ Episode 2502/10000, loss: 0.0045362901873886585
Episode Reward: 44.0
Step 1518 (1318938) @ Episode 2503/10000, loss: 0.0107721192762255674
Episode Reward: 43.0
Step 893 (1319831) @ Episode 2504/10000, loss: 0.0141349993646144875
Episode Reward: 21.0
Step 168 (1319999) @ Episode 2505/10000, loss: 0.0060716019943356516
 Copied model parameters to target network
Step 892 (1320723) @ Episode 2505/10000, loss: 0.0089004253968596467
Episode Reward: 20.0
Step 1267 (1321990) @ Episode 2506/10000, loss: 0.0128054479137063035
Episode Reward: 25.0
Step 1305 (1323295) @ Episode 2507/10000, loss: 0.1141275614500045875
Episode Reward: 37.0
Step 1651 (1324946) @ Episode 2508/10000, loss: 0.0057455725036561495
Episode Reward: 40.0
Step 846 (1325792) @ Episode 2509/10000, loss: 0.0067666624672710896
Episode Reward: 15.0
Step 876 (1326668) @ Episode 2510/10000, loss: 0.0068257870152592666
Epis

[2017-11-04 20:12:46,282] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002550.mp4


Step 1162 (1369195) @ Episode 2551/10000, loss: 0.0197409689426422127
Episode Reward: 33.0
Step 804 (1369999) @ Episode 2552/10000, loss: 0.0084969298914074973
 Copied model parameters to target network
Step 1450 (1370645) @ Episode 2552/10000, loss: 0.0163201577961444854
Episode Reward: 43.0
Step 1625 (1372270) @ Episode 2553/10000, loss: 0.0038186910096555956
Episode Reward: 46.0
Step 1180 (1373450) @ Episode 2554/10000, loss: 0.0208310410380363465
Episode Reward: 27.0
Step 910 (1374360) @ Episode 2555/10000, loss: 0.0105513632297515877
Episode Reward: 15.0
Step 1203 (1375563) @ Episode 2556/10000, loss: 0.0040314863435924053
Episode Reward: 28.0
Step 1074 (1376637) @ Episode 2557/10000, loss: 0.0039543756283819675
Episode Reward: 21.0
Step 842 (1377479) @ Episode 2558/10000, loss: 0.0050622979179024785
Episode Reward: 18.0
Step 912 (1378391) @ Episode 2559/10000, loss: 0.0042117726989090446
Episode Reward: 17.0
Step 838 (1379229) @ Episode 2560/10000, loss: 0.0081407679244875948
Epi

[2017-11-04 20:20:18,614] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002600.mp4


Step 1459 (1419943) @ Episode 2601/10000, loss: 0.0375502817332744667
Episode Reward: 36.0
Step 56 (1419999) @ Episode 2602/10000, loss: 0.0075926892459392555
 Copied model parameters to target network
Step 632 (1420575) @ Episode 2602/10000, loss: 0.0108084129169583327
Episode Reward: 9.0
Step 822 (1421397) @ Episode 2603/10000, loss: 0.0461382195353508744
Episode Reward: 15.0
Step 621 (1422018) @ Episode 2604/10000, loss: 0.0075518139638006694
Episode Reward: 10.0
Step 696 (1422714) @ Episode 2605/10000, loss: 0.0088661070913076423
Episode Reward: 11.0
Step 779 (1423493) @ Episode 2606/10000, loss: 0.0140488231554627425
Episode Reward: 14.0
Step 1112 (1424605) @ Episode 2607/10000, loss: 0.0124505739659070975
Episode Reward: 21.0
Step 1432 (1426037) @ Episode 2608/10000, loss: 0.0133586991578340536
Episode Reward: 33.0
Step 960 (1426997) @ Episode 2609/10000, loss: 0.0138920061290264136
Episode Reward: 15.0
Step 936 (1427933) @ Episode 2610/10000, loss: 0.0019269356271252036
Episode 

[2017-11-04 20:27:24,522] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002650.mp4


Step 1135 (1466911) @ Episode 2651/10000, loss: 0.0070664295926690155
Episode Reward: 25.0
Step 1055 (1467966) @ Episode 2652/10000, loss: 0.2494766116142273477
Episode Reward: 28.0
Step 868 (1468834) @ Episode 2653/10000, loss: 0.0060186479240655945
Episode Reward: 20.0
Step 1038 (1469872) @ Episode 2654/10000, loss: 0.0156499966979026845
Episode Reward: 22.0
Step 127 (1469999) @ Episode 2655/10000, loss: 0.0054928120225667955
 Copied model parameters to target network
Step 541 (1470413) @ Episode 2655/10000, loss: 0.0154538433998823174
Episode Reward: 9.0
Step 992 (1471405) @ Episode 2656/10000, loss: 0.0105462921783328064
Episode Reward: 24.0
Step 1220 (1472625) @ Episode 2657/10000, loss: 0.0100831668823957443
Episode Reward: 25.0
Step 886 (1473511) @ Episode 2658/10000, loss: 0.0075574028305709364
Episode Reward: 22.0
Step 821 (1474332) @ Episode 2659/10000, loss: 0.0073693683370947843
Episode Reward: 13.0
Step 1014 (1475346) @ Episode 2660/10000, loss: 0.0196389667689800265
Episo

[2017-11-04 20:34:30,811] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002700.mp4


Step 875 (1514321) @ Episode 2701/10000, loss: 0.0028097173199057588
Episode Reward: 13.0
Step 854 (1515175) @ Episode 2702/10000, loss: 0.0087194815278053285
Episode Reward: 12.0
Step 804 (1515979) @ Episode 2703/10000, loss: 0.0130918044596910483
Episode Reward: 19.0
Step 787 (1516766) @ Episode 2704/10000, loss: 0.0117772091180086142
Episode Reward: 13.0
Step 1399 (1518165) @ Episode 2705/10000, loss: 0.0084031205624341963
Episode Reward: 43.0
Step 1294 (1519459) @ Episode 2706/10000, loss: 0.0074804322794079786
Episode Reward: 29.0
Step 540 (1519999) @ Episode 2707/10000, loss: 0.0126155698671936995
 Copied model parameters to target network
Step 1008 (1520467) @ Episode 2707/10000, loss: 0.0165019650012254735
Episode Reward: 26.0
Step 850 (1521317) @ Episode 2708/10000, loss: 0.0058576110750436786
Episode Reward: 21.0
Step 1090 (1522407) @ Episode 2709/10000, loss: 0.0083439815789461144
Episode Reward: 35.0
Step 826 (1523233) @ Episode 2710/10000, loss: 0.0821950361132621862
Episo

[2017-11-04 20:41:16,027] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002750.mp4


Step 1193 (1559384) @ Episode 2751/10000, loss: 0.0072018741630017762
Episode Reward: 20.0
Step 615 (1559999) @ Episode 2752/10000, loss: 0.0204139351844787642
 Copied model parameters to target network
Step 676 (1560060) @ Episode 2752/10000, loss: 0.0085146659985184678
Episode Reward: 11.0
Step 852 (1560912) @ Episode 2753/10000, loss: 0.0157315023243427285
Episode Reward: 14.0
Step 810 (1561722) @ Episode 2754/10000, loss: 0.0135010490193963055
Episode Reward: 13.0
Step 898 (1562620) @ Episode 2755/10000, loss: 0.0066626053303480155
Episode Reward: 21.0
Step 964 (1563584) @ Episode 2756/10000, loss: 0.0095291025936603555
Episode Reward: 20.0
Step 680 (1564264) @ Episode 2757/10000, loss: 0.0047726808115839966
Episode Reward: 12.0
Step 1226 (1565490) @ Episode 2758/10000, loss: 0.0290527045726776124
Episode Reward: 32.0
Step 790 (1566280) @ Episode 2759/10000, loss: 0.0103477416560053835
Episode Reward: 13.0
Step 796 (1567076) @ Episode 2760/10000, loss: 0.0067531075328588495
Episode

[2017-11-04 20:48:02,685] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002800.mp4


Step 863 (1604067) @ Episode 2801/10000, loss: 0.0144864851608872415
Episode Reward: 14.0
Step 720 (1604787) @ Episode 2802/10000, loss: 0.0041745272465050226
Episode Reward: 16.0
Step 1078 (1605865) @ Episode 2803/10000, loss: 0.0305617302656173767
Episode Reward: 18.0
Step 693 (1606558) @ Episode 2804/10000, loss: 0.0191971436142921455
Episode Reward: 14.0
Step 679 (1607237) @ Episode 2805/10000, loss: 0.0209640543907880857
Episode Reward: 18.0
Step 714 (1607951) @ Episode 2806/10000, loss: 0.0171958971768617638
Episode Reward: 10.0
Step 1024 (1608975) @ Episode 2807/10000, loss: 0.0084794824942946435
Episode Reward: 24.0
Step 945 (1609920) @ Episode 2808/10000, loss: 0.0084272157400846485
Episode Reward: 25.0
Step 79 (1609999) @ Episode 2809/10000, loss: 0.0154870804399251945
 Copied model parameters to target network
Step 897 (1610817) @ Episode 2809/10000, loss: 0.0335934869945049334
Episode Reward: 23.0
Step 600 (1611417) @ Episode 2810/10000, loss: 0.0186586249619722377
Episode 

[2017-11-04 20:54:42,983] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002850.mp4


Step 548 (1647842) @ Episode 2851/10000, loss: 0.0177977941930294044
Episode Reward: 9.0
Step 925 (1648767) @ Episode 2852/10000, loss: 0.0108764525502920155
Episode Reward: 21.0
Step 876 (1649643) @ Episode 2853/10000, loss: 0.0068866321817040445
Episode Reward: 14.0
Step 356 (1649999) @ Episode 2854/10000, loss: 0.0070807849988341334
 Copied model parameters to target network
Step 454 (1650097) @ Episode 2854/10000, loss: 0.0217068456113338474
Episode Reward: 8.0
Step 572 (1650669) @ Episode 2855/10000, loss: 0.0059720994904637343
Episode Reward: 11.0
Step 1302 (1651971) @ Episode 2856/10000, loss: 0.0340512394905090392
Episode Reward: 25.0
Step 1089 (1653060) @ Episode 2857/10000, loss: 0.0193376876413822175
Episode Reward: 25.0
Step 996 (1654056) @ Episode 2858/10000, loss: 0.0119326580315828327
Episode Reward: 18.0
Step 1250 (1655306) @ Episode 2859/10000, loss: 0.0065002380870282655
Episode Reward: 25.0
Step 1025 (1656331) @ Episode 2860/10000, loss: 0.0431950315833091746
Episode

[2017-11-04 21:01:16,182] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002900.mp4


Step 868 (1691530) @ Episode 2901/10000, loss: 0.0078053716570138933
Episode Reward: 19.0
Step 745 (1692275) @ Episode 2902/10000, loss: 0.0046268505975604065
Episode Reward: 12.0
Step 974 (1693249) @ Episode 2903/10000, loss: 0.0050471508875489235
Episode Reward: 21.0
Step 870 (1694119) @ Episode 2904/10000, loss: 0.0062280292622745044
Episode Reward: 15.0
Step 1168 (1695287) @ Episode 2905/10000, loss: 0.0070625031366944314
Episode Reward: 25.0
Step 640 (1695927) @ Episode 2906/10000, loss: 0.0087744230404496225
Episode Reward: 9.0
Step 507 (1696434) @ Episode 2907/10000, loss: 0.0091300588101148685
Episode Reward: 8.0
Step 940 (1697374) @ Episode 2908/10000, loss: 0.0195672661066055325
Episode Reward: 20.0
Step 241 (1697615) @ Episode 2909/10000, loss: 0.0079507278278470048
Episode Reward: 2.0
Step 898 (1698513) @ Episode 2910/10000, loss: 0.0052260998636484156
Episode Reward: 15.0
Step 860 (1699373) @ Episode 2911/10000, loss: 0.0072829695418477066
Episode Reward: 19.0
Step 480 (16

[2017-11-04 21:07:54,496] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video002950.mp4


Step 920 (1735961) @ Episode 2951/10000, loss: 0.0065040448680520065
Episode Reward: 20.0
Step 940 (1736901) @ Episode 2952/10000, loss: 0.0068919835612177855
Episode Reward: 14.0
Step 1228 (1738129) @ Episode 2953/10000, loss: 0.0050592049956321725
Episode Reward: 26.0
Step 912 (1739041) @ Episode 2954/10000, loss: 0.0065119015052914625
Episode Reward: 14.0
Step 941 (1739982) @ Episode 2955/10000, loss: 0.0093725323677062995
Episode Reward: 17.0
Step 17 (1739999) @ Episode 2956/10000, loss: 0.0294118840247392655
 Copied model parameters to target network
Step 623 (1740605) @ Episode 2956/10000, loss: 0.0047012325376272264
Episode Reward: 9.0
Step 886 (1741491) @ Episode 2957/10000, loss: 0.0037385411560535435
Episode Reward: 15.0
Step 936 (1742427) @ Episode 2958/10000, loss: 0.0062839724123477946
Episode Reward: 20.0
Step 762 (1743189) @ Episode 2959/10000, loss: 0.0093524213880300527
Episode Reward: 12.0
Step 1342 (1744531) @ Episode 2960/10000, loss: 0.0135654918849468233
Episode R

[2017-11-04 21:14:38,550] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003000.mp4


Step 416 (1779999) @ Episode 3001/10000, loss: 0.0048481570556759834
 Copied model parameters to target network
Step 536 (1780119) @ Episode 3001/10000, loss: 0.0034292074851691723
Episode Reward: 6.0
Step 885 (1781004) @ Episode 3002/10000, loss: 0.0048756292089819915
Episode Reward: 16.0
Step 1068 (1782072) @ Episode 3003/10000, loss: 0.0076806298457086096
Episode Reward: 20.0
Step 1000 (1783072) @ Episode 3004/10000, loss: 0.010624892078340054
Episode Reward: 25.0
Step 998 (1784070) @ Episode 3005/10000, loss: 0.0301970224827528215
Episode Reward: 22.0
Step 789 (1784859) @ Episode 3006/10000, loss: 0.0098440479487180717
Episode Reward: 19.0
Step 755 (1785614) @ Episode 3007/10000, loss: 0.0120356259867548945
Episode Reward: 15.0
Step 715 (1786329) @ Episode 3008/10000, loss: 0.0038687502965331078
Episode Reward: 15.0
Step 766 (1787095) @ Episode 3009/10000, loss: 0.0030133714899420745
Episode Reward: 12.0
Step 1060 (1788155) @ Episode 3010/10000, loss: 0.0035551064647734165
Episode 

[2017-11-04 21:21:11,300] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003050.mp4


Step 708 (1823907) @ Episode 3051/10000, loss: 0.0056410273537039766
Episode Reward: 11.0
Step 531 (1824438) @ Episode 3052/10000, loss: 0.0129920681938529014
Episode Reward: 8.0
Step 872 (1825310) @ Episode 3053/10000, loss: 0.0072009107097983363
Episode Reward: 19.0
Step 841 (1826151) @ Episode 3054/10000, loss: 0.0029760203324258327
Episode Reward: 14.0
Step 935 (1827086) @ Episode 3055/10000, loss: 0.0055606961250305183
Episode Reward: 22.0
Step 836 (1827922) @ Episode 3056/10000, loss: 0.0075172092765569695
Episode Reward: 23.0
Step 1093 (1829015) @ Episode 3057/10000, loss: 0.0089164692908525475
Episode Reward: 26.0
Step 984 (1829999) @ Episode 3058/10000, loss: 0.0023370634298771626
 Copied model parameters to target network
Step 1177 (1830192) @ Episode 3058/10000, loss: 0.0029230425134301186
Episode Reward: 25.0
Step 928 (1831120) @ Episode 3059/10000, loss: 0.0019133910536766052
Episode Reward: 23.0
Step 996 (1832116) @ Episode 3060/10000, loss: 0.0035385852679610252
Episode 

[2017-11-04 21:27:42,239] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003100.mp4


Step 927 (1867371) @ Episode 3101/10000, loss: 0.0072556696832180022
Episode Reward: 15.0
Step 726 (1868097) @ Episode 3102/10000, loss: 0.0086071928963065154
Episode Reward: 12.0
Step 688 (1868785) @ Episode 3103/10000, loss: 0.0137255312874913227
Episode Reward: 12.0
Step 904 (1869689) @ Episode 3104/10000, loss: 0.0088085513561964046
Episode Reward: 19.0
Step 310 (1869999) @ Episode 3105/10000, loss: 0.0041758017614483838
 Copied model parameters to target network
Step 675 (1870364) @ Episode 3105/10000, loss: 0.0029210783541202545
Episode Reward: 10.0
Step 972 (1871336) @ Episode 3106/10000, loss: 0.0045102741569280624
Episode Reward: 18.0
Step 1005 (1872341) @ Episode 3107/10000, loss: 0.0138813909143209465
Episode Reward: 23.0
Step 683 (1873024) @ Episode 3108/10000, loss: 0.0029499083757400513
Episode Reward: 9.0
Step 667 (1873691) @ Episode 3109/10000, loss: 0.0063094408251345165
Episode Reward: 9.0
Step 817 (1874508) @ Episode 3110/10000, loss: 0.0050728642381727695
Episode Re

[2017-11-04 21:34:08,866] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003150.mp4


Step 572 (1909674) @ Episode 3151/10000, loss: 0.0083194598555564885
Episode Reward: 9.0
Step 325 (1909999) @ Episode 3152/10000, loss: 0.0036574043333530426
 Copied model parameters to target network
Step 1007 (1910681) @ Episode 3152/10000, loss: 0.0049212984740734128
Episode Reward: 22.0
Step 753 (1911434) @ Episode 3153/10000, loss: 0.0090847080573439675
Episode Reward: 13.0
Step 775 (1912209) @ Episode 3154/10000, loss: 0.0027395505458116533
Episode Reward: 12.0
Step 929 (1913138) @ Episode 3155/10000, loss: 0.0051909298636019237
Episode Reward: 16.0
Step 780 (1913918) @ Episode 3156/10000, loss: 0.0024678937625139953
Episode Reward: 13.0
Step 1096 (1915014) @ Episode 3157/10000, loss: 0.0032856571488082415
Episode Reward: 21.0
Step 1315 (1916329) @ Episode 3158/10000, loss: 0.0063203480094671251
Episode Reward: 28.0
Step 692 (1917021) @ Episode 3159/10000, loss: 0.0096722040325403215
Episode Reward: 12.0
Step 1090 (1918111) @ Episode 3160/10000, loss: 0.0018590614199638367
Episod

[2017-11-04 21:40:44,781] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003200.mp4


Step 881 (1953350) @ Episode 3201/10000, loss: 0.0159671530127525336
Episode Reward: 15.0
Step 1196 (1954546) @ Episode 3202/10000, loss: 0.0038633942604064947
Episode Reward: 30.0
Step 869 (1955415) @ Episode 3203/10000, loss: 0.0072121964767575265
Episode Reward: 15.0
Step 764 (1956179) @ Episode 3204/10000, loss: 0.0154866296797990835
Episode Reward: 17.0
Step 805 (1956984) @ Episode 3205/10000, loss: 0.0068464321084320545
Episode Reward: 16.0
Step 1076 (1958060) @ Episode 3206/10000, loss: 0.0129385823383927356
Episode Reward: 25.0
Step 468 (1958528) @ Episode 3207/10000, loss: 0.0475919917225837745
Episode Reward: 9.0
Step 831 (1959359) @ Episode 3208/10000, loss: 0.0118349650874733927
Episode Reward: 25.0
Step 640 (1959999) @ Episode 3209/10000, loss: 0.0040689678862690926
 Copied model parameters to target network
Step 1496 (1960855) @ Episode 3209/10000, loss: 0.0214739739894866947
Episode Reward: 32.0
Step 687 (1961542) @ Episode 3210/10000, loss: 0.0102768540382385255
Episode

[2017-11-04 21:47:15,454] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003250.mp4


Step 1069 (1996939) @ Episode 3251/10000, loss: 0.0074214264750480655
Episode Reward: 18.0
Step 576 (1997515) @ Episode 3252/10000, loss: 0.0216148104518651963
Episode Reward: 9.0
Step 981 (1998496) @ Episode 3253/10000, loss: 0.0062355571426451216
Episode Reward: 28.0
Step 1268 (1999764) @ Episode 3254/10000, loss: 0.0072502172552049165
Episode Reward: 27.0
Step 235 (1999999) @ Episode 3255/10000, loss: 0.0090202772989869127
 Copied model parameters to target network
Step 876 (2000640) @ Episode 3255/10000, loss: 0.0062454454600811005
Episode Reward: 19.0
Step 927 (2001567) @ Episode 3256/10000, loss: 0.0037592432927340275
Episode Reward: 16.0
Step 1023 (2002590) @ Episode 3257/10000, loss: 0.0076982146129012115
Episode Reward: 18.0
Step 871 (2003461) @ Episode 3258/10000, loss: 0.0056158509105443954
Episode Reward: 15.0
Step 942 (2004403) @ Episode 3259/10000, loss: 0.0038058252539485693
Episode Reward: 20.0
Step 1064 (2005467) @ Episode 3260/10000, loss: 0.0126179168000817387
Episod

[2017-11-04 21:53:53,959] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003300.mp4


Step 321 (2039999) @ Episode 3301/10000, loss: 0.0029571116901934147
 Copied model parameters to target network
Step 678 (2040356) @ Episode 3301/10000, loss: 0.0061438763514161114
Episode Reward: 11.0
Step 934 (2041290) @ Episode 3302/10000, loss: 0.0214844327419996264
Episode Reward: 23.0
Step 669 (2041959) @ Episode 3303/10000, loss: 0.0027914545498788357
Episode Reward: 13.0
Step 937 (2042896) @ Episode 3304/10000, loss: 0.0088263824582099917
Episode Reward: 22.0
Step 1163 (2044059) @ Episode 3305/10000, loss: 0.0042083780281245716
Episode Reward: 28.0
Step 781 (2044840) @ Episode 3306/10000, loss: 0.0052173933945596224
Episode Reward: 27.0
Step 1017 (2045857) @ Episode 3307/10000, loss: 0.0071236896328628064
Episode Reward: 22.0
Step 1230 (2047087) @ Episode 3308/10000, loss: 0.0079220179468393335
Episode Reward: 25.0
Step 1074 (2048161) @ Episode 3309/10000, loss: 0.0095526576042175385
Episode Reward: 21.0
Step 1050 (2049211) @ Episode 3310/10000, loss: 0.0014210459776222706
Epis

[2017-11-04 22:01:17,175] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003350.mp4


Step 830 (2089308) @ Episode 3351/10000, loss: 0.0104139540344476755
Episode Reward: 14.0
Step 691 (2089999) @ Episode 3352/10000, loss: 0.0062800124287605286
 Copied model parameters to target network
Step 813 (2090121) @ Episode 3352/10000, loss: 0.0092752315104007727
Episode Reward: 15.0
Step 1341 (2091462) @ Episode 3353/10000, loss: 0.0039705475792288784
Episode Reward: 35.0
Step 584 (2092046) @ Episode 3354/10000, loss: 0.0047994004562497146
Episode Reward: 9.0
Step 779 (2092825) @ Episode 3355/10000, loss: 0.0414960086345672645
Episode Reward: 14.0
Step 841 (2093666) @ Episode 3356/10000, loss: 0.0263900961726903935
Episode Reward: 12.0
Step 1238 (2094904) @ Episode 3357/10000, loss: 0.0036142652388662164
Episode Reward: 25.0
Step 700 (2095604) @ Episode 3358/10000, loss: 0.0047409208491444595
Episode Reward: 10.0
Step 872 (2096476) @ Episode 3359/10000, loss: 0.0045513920485973365
Episode Reward: 15.0
Step 773 (2097249) @ Episode 3360/10000, loss: 0.0136106405407190325
Episode 

[2017-11-04 22:07:58,906] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003400.mp4


Step 1001 (2134076) @ Episode 3401/10000, loss: 0.005217570811510086
Episode Reward: 20.0
Step 925 (2135001) @ Episode 3402/10000, loss: 0.0036585964262485504
Episode Reward: 19.0
Step 1390 (2136391) @ Episode 3403/10000, loss: 0.0044708652421832085
Episode Reward: 33.0
Step 945 (2137336) @ Episode 3404/10000, loss: 0.0076701072975993164
Episode Reward: 19.0
Step 829 (2138165) @ Episode 3405/10000, loss: 0.0053251856006681925
Episode Reward: 13.0
Step 1267 (2139432) @ Episode 3406/10000, loss: 0.0085579985752701764
Episode Reward: 21.0
Step 567 (2139999) @ Episode 3407/10000, loss: 0.0077065704390406616
 Copied model parameters to target network
Step 670 (2140102) @ Episode 3407/10000, loss: 0.0038708637002855543
Episode Reward: 15.0
Step 1224 (2141326) @ Episode 3408/10000, loss: 0.0041999812237918384
Episode Reward: 25.0
Step 1231 (2142557) @ Episode 3409/10000, loss: 0.0056093242019414917
Episode Reward: 23.0
Step 722 (2143279) @ Episode 3410/10000, loss: 0.0087381843477487566
Episo

[2017-11-04 22:14:46,232] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003450.mp4


Step 1309 (2179626) @ Episode 3451/10000, loss: 0.0038947830908000473
Episode Reward: 30.0
Step 373 (2179999) @ Episode 3452/10000, loss: 0.0095097860321402556
 Copied model parameters to target network
Step 1413 (2181039) @ Episode 3452/10000, loss: 0.0049258237704634677
Episode Reward: 37.0
Step 866 (2181905) @ Episode 3453/10000, loss: 0.0077695241197943692
Episode Reward: 18.0
Step 880 (2182785) @ Episode 3454/10000, loss: 0.0093947276473045353
Episode Reward: 15.0
Step 766 (2183551) @ Episode 3455/10000, loss: 0.0046170465648174295
Episode Reward: 10.0
Step 962 (2184513) @ Episode 3456/10000, loss: 0.0134949255734682084
Episode Reward: 16.0
Step 821 (2185334) @ Episode 3457/10000, loss: 0.0150997042655944825
Episode Reward: 13.0
Step 1115 (2186449) @ Episode 3458/10000, loss: 0.0079496446996927263
Episode Reward: 26.0
Step 796 (2187245) @ Episode 3459/10000, loss: 0.0057534137740731245
Episode Reward: 19.0
Step 668 (2187913) @ Episode 3460/10000, loss: 0.0060651563107967385
Episod

[2017-11-04 22:21:14,654] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003500.mp4


Step 1027 (2221833) @ Episode 3501/10000, loss: 0.0041066044941544536
Episode Reward: 23.0
Step 786 (2222619) @ Episode 3502/10000, loss: 0.0062259496189653875
Episode Reward: 18.0
Step 1291 (2223910) @ Episode 3503/10000, loss: 0.0022298051044344975
Episode Reward: 20.0
Step 642 (2224552) @ Episode 3504/10000, loss: 0.0033983620814979076
Episode Reward: 9.0
Step 1214 (2225766) @ Episode 3505/10000, loss: 0.0055659632198512555
Episode Reward: 27.0
Step 1181 (2226947) @ Episode 3506/10000, loss: 0.0923325493931770355
Episode Reward: 30.0
Step 837 (2227784) @ Episode 3507/10000, loss: 0.0018772834446281195
Episode Reward: 22.0
Step 728 (2228512) @ Episode 3508/10000, loss: 0.0051669557578861718
Episode Reward: 14.0
Step 1101 (2229613) @ Episode 3509/10000, loss: 0.0048462059348821646
Episode Reward: 24.0
Step 386 (2229999) @ Episode 3510/10000, loss: 0.0079190377146005635
 Copied model parameters to target network
Step 677 (2230290) @ Episode 3510/10000, loss: 0.0033201735932379965
Episo

[2017-11-04 22:27:59,210] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003550.mp4


Step 625 (2265905) @ Episode 3551/10000, loss: 0.0086180679500103715
Episode Reward: 8.0
Step 1013 (2266918) @ Episode 3552/10000, loss: 0.0082675646990537645
Episode Reward: 31.0
Step 768 (2267686) @ Episode 3553/10000, loss: 0.0062003834173083305
Episode Reward: 14.0
Step 798 (2268484) @ Episode 3554/10000, loss: 0.0146057633683085445
Episode Reward: 14.0
Step 510 (2268994) @ Episode 3555/10000, loss: 0.0034407763741910458
Episode Reward: 6.0
Step 1005 (2269999) @ Episode 3556/10000, loss: 0.011004636064171791
 Copied model parameters to target network
Step 1249 (2270243) @ Episode 3556/10000, loss: 0.0024376832880079746
Episode Reward: 27.0
Step 810 (2271053) @ Episode 3557/10000, loss: 0.0149131752550601965
Episode Reward: 18.0
Step 770 (2271823) @ Episode 3558/10000, loss: 0.0060185650363564496
Episode Reward: 15.0
Step 946 (2272769) @ Episode 3559/10000, loss: 0.0047074612230062485
Episode Reward: 21.0
Step 1053 (2273822) @ Episode 3560/10000, loss: 0.0046272603794932365
Episode 

[2017-11-04 22:34:57,044] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003600.mp4


Step 969 (2312785) @ Episode 3601/10000, loss: 0.0039388593286275866
Episode Reward: 26.0
Step 622 (2313407) @ Episode 3602/10000, loss: 0.0129813170060515453
Episode Reward: 11.0
Step 788 (2314195) @ Episode 3603/10000, loss: 0.0034447226207703352
Episode Reward: 20.0
Step 564 (2314759) @ Episode 3604/10000, loss: 0.0054498147219419485
Episode Reward: 7.0
Step 1448 (2316207) @ Episode 3605/10000, loss: 0.0044047534465789795
Episode Reward: 24.0
Step 1194 (2317401) @ Episode 3606/10000, loss: 0.0036614201962947845
Episode Reward: 20.0
Step 609 (2318010) @ Episode 3607/10000, loss: 0.0111525012180209165
Episode Reward: 8.0
Step 838 (2318848) @ Episode 3608/10000, loss: 0.0030771275050938137
Episode Reward: 14.0
Step 941 (2319789) @ Episode 3609/10000, loss: 0.0053087119013071065
Episode Reward: 16.0
Step 210 (2319999) @ Episode 3610/10000, loss: 0.0087680574506521225
 Copied model parameters to target network
Step 950 (2320739) @ Episode 3610/10000, loss: 0.0159120075404644514
Episode R

[2017-11-04 22:41:42,652] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003650.mp4


Step 1022 (2357585) @ Episode 3651/10000, loss: 0.0099806329235434534
Episode Reward: 18.0
Step 679 (2358264) @ Episode 3652/10000, loss: 0.0041180332191288477
Episode Reward: 23.0
Step 1522 (2359786) @ Episode 3653/10000, loss: 0.0055464478209614754
Episode Reward: 36.0
Step 213 (2359999) @ Episode 3654/10000, loss: 0.0027823164127767086
 Copied model parameters to target network
Step 1506 (2361292) @ Episode 3654/10000, loss: 0.0115285310894250875
Episode Reward: 32.0
Step 905 (2362197) @ Episode 3655/10000, loss: 0.0080298958346247675
Episode Reward: 14.0
Step 1199 (2363396) @ Episode 3656/10000, loss: 0.0045375693589448934
Episode Reward: 27.0
Step 1210 (2364606) @ Episode 3657/10000, loss: 0.0024529765360057354
Episode Reward: 25.0
Step 999 (2365605) @ Episode 3658/10000, loss: 0.0073962789028882983
Episode Reward: 20.0
Step 988 (2366593) @ Episode 3659/10000, loss: 0.0060328375548124315
Episode Reward: 20.0
Step 1139 (2367732) @ Episode 3660/10000, loss: 0.0029084472917020325
Epi

[2017-11-04 22:49:24,482] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003700.mp4


Step 1095 (2409073) @ Episode 3701/10000, loss: 0.0041571687906980515
Episode Reward: 23.0
Step 926 (2409999) @ Episode 3702/10000, loss: 0.0096454815939068843
 Copied model parameters to target network
Step 963 (2410036) @ Episode 3702/10000, loss: 0.0028505166992545135
Episode Reward: 21.0
Step 1061 (2411097) @ Episode 3703/10000, loss: 0.0050529381260275844
Episode Reward: 20.0
Step 724 (2411821) @ Episode 3704/10000, loss: 0.0090387053787708286
Episode Reward: 25.0
Step 1495 (2413316) @ Episode 3705/10000, loss: 0.0029056514613330364
Episode Reward: 40.0
Step 1312 (2414628) @ Episode 3706/10000, loss: 0.0053353845141828062
Episode Reward: 27.0
Step 1289 (2415917) @ Episode 3707/10000, loss: 0.0095447525382041936
Episode Reward: 30.0
Step 995 (2416912) @ Episode 3708/10000, loss: 0.0031645623967051506
Episode Reward: 21.0
Step 872 (2417784) @ Episode 3709/10000, loss: 0.0052982205525040632
Episode Reward: 19.0
Step 975 (2418759) @ Episode 3710/10000, loss: 0.0025360337458550935
Epis

[2017-11-04 22:57:07,461] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003750.mp4


Step 768 (2459999) @ Episode 3751/10000, loss: 0.0049056624993681917
 Copied model parameters to target network
Step 991 (2460222) @ Episode 3751/10000, loss: 0.0463016815483570115
Episode Reward: 24.0
Step 1321 (2461543) @ Episode 3752/10000, loss: 0.0168913155794143684
Episode Reward: 27.0
Step 921 (2462464) @ Episode 3753/10000, loss: 0.0036743560340255595
Episode Reward: 18.0
Step 1387 (2463851) @ Episode 3754/10000, loss: 0.0082584088668227236
Episode Reward: 36.0
Step 1141 (2464992) @ Episode 3755/10000, loss: 0.0116741368547081955
Episode Reward: 25.0
Step 1080 (2466072) @ Episode 3756/10000, loss: 0.0099801728501915935
Episode Reward: 29.0
Step 924 (2466996) @ Episode 3757/10000, loss: 0.0080991275608539586
Episode Reward: 16.0
Step 1239 (2468235) @ Episode 3758/10000, loss: 0.0073043415322899825
Episode Reward: 26.0
Step 1406 (2469641) @ Episode 3759/10000, loss: 0.0087204333394765855
Episode Reward: 30.0
Step 358 (2469999) @ Episode 3760/10000, loss: 0.0073854662477970125
 Co

[2017-11-04 23:04:32,288] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003800.mp4


Step 845 (2509020) @ Episode 3801/10000, loss: 0.0056404932402074345
Episode Reward: 18.0
Step 861 (2509881) @ Episode 3802/10000, loss: 0.0068033938296139246
Episode Reward: 14.0
Step 118 (2509999) @ Episode 3803/10000, loss: 0.0048647476360201836
 Copied model parameters to target network
Step 973 (2510854) @ Episode 3803/10000, loss: 0.0056703700684010985
Episode Reward: 21.0
Step 1105 (2511959) @ Episode 3804/10000, loss: 0.0082243783399462785
Episode Reward: 22.0
Step 1317 (2513276) @ Episode 3805/10000, loss: 0.0025155090261250734
Episode Reward: 26.0
Step 1124 (2514400) @ Episode 3806/10000, loss: 0.0090188272297382355
Episode Reward: 22.0
Step 1126 (2515526) @ Episode 3807/10000, loss: 0.0295910220593214046
Episode Reward: 19.0
Step 1140 (2516666) @ Episode 3808/10000, loss: 0.0110694672912359247
Episode Reward: 28.0
Step 1023 (2517689) @ Episode 3809/10000, loss: 0.0034080811310559515
Episode Reward: 20.0
Step 446 (2518135) @ Episode 3810/10000, loss: 0.0054033193737268455
Epi

[2017-11-04 23:11:48,607] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003850.mp4


Step 1018 (2557358) @ Episode 3851/10000, loss: 0.0055718235671520237
Episode Reward: 29.0
Step 969 (2558327) @ Episode 3852/10000, loss: 0.0233659110963344573
Episode Reward: 24.0
Step 1199 (2559526) @ Episode 3853/10000, loss: 0.0421963632106781157
Episode Reward: 26.0
Step 473 (2559999) @ Episode 3854/10000, loss: 0.0028666029684245586
 Copied model parameters to target network
Step 725 (2560251) @ Episode 3854/10000, loss: 0.0045820972882211213
Episode Reward: 11.0
Step 895 (2561146) @ Episode 3855/10000, loss: 0.0083382036536932913
Episode Reward: 23.0
Step 545 (2561691) @ Episode 3856/10000, loss: 0.0059192078188061716
Episode Reward: 8.0
Step 730 (2562421) @ Episode 3857/10000, loss: 0.0062800943851470955
Episode Reward: 12.0
Step 852 (2563273) @ Episode 3858/10000, loss: 0.0030073397792875767
Episode Reward: 11.0
Step 1206 (2564479) @ Episode 3859/10000, loss: 0.0150855826213955887
Episode Reward: 17.0
Step 1578 (2566057) @ Episode 3860/10000, loss: 0.0085098128765821463
Episod

[2017-11-04 23:18:57,233] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003900.mp4


Step 1352 (2604708) @ Episode 3901/10000, loss: 0.0079751303419470795
Episode Reward: 33.0
Step 1035 (2605743) @ Episode 3902/10000, loss: 0.0020810174755752087
Episode Reward: 24.0
Step 1230 (2606973) @ Episode 3903/10000, loss: 0.0057910326868295676
Episode Reward: 31.0
Step 1689 (2608662) @ Episode 3904/10000, loss: 0.0754794627428054814
Episode Reward: 47.0
Step 773 (2609435) @ Episode 3905/10000, loss: 0.0163189768791198736
Episode Reward: 17.0
Step 564 (2609999) @ Episode 3906/10000, loss: 0.0028109543491154913
 Copied model parameters to target network
Step 977 (2610412) @ Episode 3906/10000, loss: 0.0124367196112871173
Episode Reward: 19.0
Step 1111 (2611523) @ Episode 3907/10000, loss: 0.0082467384636402134
Episode Reward: 22.0
Step 747 (2612270) @ Episode 3908/10000, loss: 0.0051909266039729124
Episode Reward: 19.0
Step 592 (2612862) @ Episode 3909/10000, loss: 0.0080769546329975137
Episode Reward: 10.0
Step 1004 (2613866) @ Episode 3910/10000, loss: 0.0092839645221829415
Epi

[2017-11-04 23:26:23,967] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video003950.mp4


Step 936 (2653356) @ Episode 3951/10000, loss: 0.0047600725665688515
Episode Reward: 16.0
Step 1133 (2654489) @ Episode 3952/10000, loss: 0.0059916656464338375
Episode Reward: 29.0
Step 1157 (2655646) @ Episode 3953/10000, loss: 0.0041461884975433357
Episode Reward: 30.0
Step 1276 (2656922) @ Episode 3954/10000, loss: 0.0039778500795364385
Episode Reward: 29.0
Step 1222 (2658144) @ Episode 3955/10000, loss: 0.0065138922072947025
Episode Reward: 18.0
Step 711 (2658855) @ Episode 3956/10000, loss: 0.0066333203576505187
Episode Reward: 14.0
Step 938 (2659793) @ Episode 3957/10000, loss: 0.0085784466937184336
Episode Reward: 15.0
Step 206 (2659999) @ Episode 3958/10000, loss: 0.0049524749629199505
 Copied model parameters to target network
Step 843 (2660636) @ Episode 3958/10000, loss: 0.0096740871667861945
Episode Reward: 14.0
Step 690 (2661326) @ Episode 3959/10000, loss: 0.0074196169152855878
Episode Reward: 13.0
Step 868 (2662194) @ Episode 3960/10000, loss: 0.0044904039241373544
Episo

[2017-11-04 23:34:13,690] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004000.mp4


Step 713 (2705152) @ Episode 4001/10000, loss: 0.0119478106498718265
Episode Reward: 15.0
Step 765 (2705917) @ Episode 4002/10000, loss: 0.0128281544893980033
Episode Reward: 14.0
Step 770 (2706687) @ Episode 4003/10000, loss: 0.0229922924190759665
Episode Reward: 15.0
Step 1284 (2707971) @ Episode 4004/10000, loss: 0.0075572468340396886
Episode Reward: 25.0
Step 1317 (2709288) @ Episode 4005/10000, loss: 0.0075698643922805793
Episode Reward: 26.0
Step 711 (2709999) @ Episode 4006/10000, loss: 0.0253122970461845486
 Copied model parameters to target network
Step 728 (2710016) @ Episode 4006/10000, loss: 0.0029948907904326916
Episode Reward: 12.0
Step 1011 (2711027) @ Episode 4007/10000, loss: 0.0093925446271896362
Episode Reward: 20.0
Step 1125 (2712152) @ Episode 4008/10000, loss: 0.0039376378990709785
Episode Reward: 29.0
Step 992 (2713144) @ Episode 4009/10000, loss: 0.0211144350469112493
Episode Reward: 14.0
Step 753 (2713897) @ Episode 4010/10000, loss: 0.0049317963421344766
Episo

[2017-11-04 23:40:46,113] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004050.mp4


Step 676 (2748130) @ Episode 4051/10000, loss: 0.0028472696430981165
Episode Reward: 11.0
Step 961 (2749091) @ Episode 4052/10000, loss: 0.0070286481641232976
Episode Reward: 19.0
Step 852 (2749943) @ Episode 4053/10000, loss: 0.0049339169636368754
Episode Reward: 14.0
Step 56 (2749999) @ Episode 4054/10000, loss: 0.0195538457483053248
 Copied model parameters to target network
Step 1059 (2751002) @ Episode 4054/10000, loss: 0.0047551658935844905
Episode Reward: 21.0
Step 998 (2752000) @ Episode 4055/10000, loss: 0.0056117251515388497
Episode Reward: 18.0
Step 562 (2752562) @ Episode 4056/10000, loss: 0.0113815851509571085
Episode Reward: 9.0
Step 1334 (2753896) @ Episode 4057/10000, loss: 0.0047502936795353896
Episode Reward: 33.0
Step 818 (2754714) @ Episode 4058/10000, loss: 0.0096859242767095577
Episode Reward: 13.0
Step 942 (2755656) @ Episode 4059/10000, loss: 0.0086049120873212815
Episode Reward: 19.0
Step 1207 (2756863) @ Episode 4060/10000, loss: 0.0081306742504239085
Episode 

[2017-11-04 23:48:06,609] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004100.mp4


Step 912 (2797035) @ Episode 4101/10000, loss: 0.0106944208964705475
Episode Reward: 18.0
Step 747 (2797782) @ Episode 4102/10000, loss: 0.0072958124801516535
Episode Reward: 18.0
Step 697 (2798479) @ Episode 4103/10000, loss: 0.0054861176759004596
Episode Reward: 14.0
Step 683 (2799162) @ Episode 4104/10000, loss: 0.0038774483837187296
Episode Reward: 14.0
Step 837 (2799999) @ Episode 4105/10000, loss: 0.0046816682443022737
 Copied model parameters to target network
Step 840 (2800002) @ Episode 4105/10000, loss: 0.0037915417924523354
Episode Reward: 15.0
Step 586 (2800588) @ Episode 4106/10000, loss: 0.0027743065729737284
Episode Reward: 9.0
Step 482 (2801070) @ Episode 4107/10000, loss: 0.0046471073292195815
Episode Reward: 7.0
Step 1296 (2802366) @ Episode 4108/10000, loss: 0.0107557065784931186
Episode Reward: 28.0
Step 986 (2803352) @ Episode 4109/10000, loss: 0.0087998555973172194
Episode Reward: 14.0
Step 538 (2803890) @ Episode 4110/10000, loss: 0.0050619696266949185
Episode Re

[2017-11-04 23:54:26,224] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004150.mp4


Step 868 (2838547) @ Episode 4151/10000, loss: 0.0055150827392935755
Episode Reward: 16.0
Step 812 (2839359) @ Episode 4152/10000, loss: 0.0114527456462383275
Episode Reward: 15.0
Step 640 (2839999) @ Episode 4153/10000, loss: 0.0049302112311124832
 Copied model parameters to target network
Step 737 (2840096) @ Episode 4153/10000, loss: 0.0057383487001061445
Episode Reward: 12.0
Step 827 (2840923) @ Episode 4154/10000, loss: 0.0057196905836462975
Episode Reward: 17.0
Step 694 (2841617) @ Episode 4155/10000, loss: 0.0463741347193717968
Episode Reward: 11.0
Step 992 (2842609) @ Episode 4156/10000, loss: 0.0135514363646507264
Episode Reward: 21.0
Step 982 (2843591) @ Episode 4157/10000, loss: 0.0083414614200592045
Episode Reward: 21.0
Step 880 (2844471) @ Episode 4158/10000, loss: 0.0203036479651927955
Episode Reward: 20.0
Step 508 (2844979) @ Episode 4159/10000, loss: 0.0043074721470475273
Episode Reward: 8.0
Step 1151 (2846130) @ Episode 4160/10000, loss: 0.0145377349108457575
Episode R

[2017-11-05 00:01:05,352] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004200.mp4


Step 947 (2882813) @ Episode 4201/10000, loss: 0.0066718780435621742
Episode Reward: 16.0
Step 961 (2883774) @ Episode 4202/10000, loss: 0.0077313417568802836
Episode Reward: 22.0
Step 677 (2884451) @ Episode 4203/10000, loss: 0.1214024946093559317
Episode Reward: 10.0
Step 698 (2885149) @ Episode 4204/10000, loss: 0.0046627791598439224
Episode Reward: 13.0
Step 907 (2886056) @ Episode 4205/10000, loss: 0.0032351464033126836
Episode Reward: 22.0
Step 752 (2886808) @ Episode 4206/10000, loss: 0.0094775678589940077
Episode Reward: 13.0
Step 633 (2887441) @ Episode 4207/10000, loss: 0.0056731244549155235
Episode Reward: 11.0
Step 555 (2887996) @ Episode 4208/10000, loss: 0.0037733013741672042
Episode Reward: 9.0
Step 552 (2888548) @ Episode 4209/10000, loss: 0.0109168495982885365
Episode Reward: 7.0
Step 1287 (2889835) @ Episode 4210/10000, loss: 0.0039848382584750655
Episode Reward: 29.0
Step 164 (2889999) @ Episode 4211/10000, loss: 0.0043803281150758274
 Copied model parameters to targ

[2017-11-05 00:08:19,499] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004250.mp4


Step 184 (2929999) @ Episode 4251/10000, loss: 0.0327764749526977547
 Copied model parameters to target network
Step 1468 (2931283) @ Episode 4251/10000, loss: 0.0865284353494644268
Episode Reward: 43.0
Step 829 (2932112) @ Episode 4252/10000, loss: 0.0109792277216911328
Episode Reward: 14.0
Step 732 (2932844) @ Episode 4253/10000, loss: 0.0136492094025015836
Episode Reward: 15.0
Step 576 (2933420) @ Episode 4254/10000, loss: 0.0025953124277293685
Episode Reward: 8.0
Step 1219 (2934639) @ Episode 4255/10000, loss: 0.0026163749862462282
Episode Reward: 35.0
Step 1259 (2935898) @ Episode 4256/10000, loss: 0.0039999126456677915
Episode Reward: 24.0
Step 1201 (2937099) @ Episode 4257/10000, loss: 0.0045733395963907245
Episode Reward: 27.0
Step 907 (2938006) @ Episode 4258/10000, loss: 0.0071564549580216415
Episode Reward: 23.0
Step 1051 (2939057) @ Episode 4259/10000, loss: 0.0060557280667126183
Episode Reward: 24.0
Step 942 (2939999) @ Episode 4260/10000, loss: 0.0063729314133524895
 Copi

[2017-11-05 00:15:14,676] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004300.mp4


Step 662 (2975920) @ Episode 4301/10000, loss: 0.0069519216194748888
Episode Reward: 10.0
Step 1027 (2976947) @ Episode 4302/10000, loss: 0.0046449052169919015
Episode Reward: 20.0
Step 622 (2977569) @ Episode 4303/10000, loss: 0.0081717781722545625
Episode Reward: 12.0
Step 832 (2978401) @ Episode 4304/10000, loss: 0.1644851416349411565
Episode Reward: 22.0
Step 919 (2979320) @ Episode 4305/10000, loss: 0.0041511491872370245
Episode Reward: 20.0
Step 679 (2979999) @ Episode 4306/10000, loss: 0.0101522821933031082
 Copied model parameters to target network
Step 1035 (2980355) @ Episode 4306/10000, loss: 0.0071360412985086443
Episode Reward: 21.0
Step 757 (2981112) @ Episode 4307/10000, loss: 0.0052183927036821843
Episode Reward: 11.0
Step 838 (2981950) @ Episode 4308/10000, loss: 0.0031731347553431988
Episode Reward: 12.0
Step 685 (2982635) @ Episode 4309/10000, loss: 0.0064557278528809554
Episode Reward: 12.0
Step 1425 (2984060) @ Episode 4310/10000, loss: 0.0059472341090440755
Episod

[2017-11-05 00:22:05,437] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004350.mp4


Step 632 (3020940) @ Episode 4351/10000, loss: 0.0071354615502059462
Episode Reward: 9.0
Step 543 (3021483) @ Episode 4352/10000, loss: 0.0047277957201004037
Episode Reward: 8.0
Step 1239 (3022722) @ Episode 4353/10000, loss: 0.0081134065985679635
Episode Reward: 27.0
Step 927 (3023649) @ Episode 4354/10000, loss: 0.0112232407554984146
Episode Reward: 17.0
Step 1128 (3024777) @ Episode 4355/10000, loss: 0.0109114469960331926
Episode Reward: 19.0
Step 1384 (3026161) @ Episode 4356/10000, loss: 0.0037200555671006443
Episode Reward: 26.0
Step 654 (3026815) @ Episode 4357/10000, loss: 0.0242052879184484485
Episode Reward: 14.0
Step 793 (3027608) @ Episode 4358/10000, loss: 0.0092623345553874974
Episode Reward: 14.0
Step 915 (3028523) @ Episode 4359/10000, loss: 0.0038855210877954965
Episode Reward: 16.0
Step 895 (3029418) @ Episode 4360/10000, loss: 0.0126545606181025526
Episode Reward: 18.0
Step 581 (3029999) @ Episode 4361/10000, loss: 0.0031660175882279873
 Copied model parameters to ta

[2017-11-05 00:29:18,959] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004400.mp4


Step 886 (3068312) @ Episode 4401/10000, loss: 0.0072049824520945556
Episode Reward: 14.0
Step 1075 (3069387) @ Episode 4402/10000, loss: 0.0062614353373646744
Episode Reward: 22.0
Step 447 (3069834) @ Episode 4403/10000, loss: 0.0053820968605577953
Episode Reward: 6.0
Step 165 (3069999) @ Episode 4404/10000, loss: 0.0164051372557878548
 Copied model parameters to target network
Step 1191 (3071025) @ Episode 4404/10000, loss: 0.0107612740248441785
Episode Reward: 28.0
Step 922 (3071947) @ Episode 4405/10000, loss: 0.0027933889068663124
Episode Reward: 16.0
Step 895 (3072842) @ Episode 4406/10000, loss: 0.0097898617386817936
Episode Reward: 18.0
Step 987 (3073829) @ Episode 4407/10000, loss: 0.0036593698896467686
Episode Reward: 25.0
Step 1203 (3075032) @ Episode 4408/10000, loss: 0.0039973277598619463
Episode Reward: 22.0
Step 856 (3075888) @ Episode 4409/10000, loss: 0.0137908235192298896
Episode Reward: 14.0
Step 1078 (3076966) @ Episode 4410/10000, loss: 0.0028469446115195755
Episod

[2017-11-05 00:36:19,087] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004450.mp4


Step 668 (3113866) @ Episode 4451/10000, loss: 0.0050742039456963547
Episode Reward: 11.0
Step 557 (3114423) @ Episode 4452/10000, loss: 0.0036111830268055285
Episode Reward: 8.0
Step 710 (3115133) @ Episode 4453/10000, loss: 0.0032271030358970165
Episode Reward: 18.0
Step 602 (3115735) @ Episode 4454/10000, loss: 0.0369162298738956453
Episode Reward: 11.0
Step 871 (3116606) @ Episode 4455/10000, loss: 0.0101516554132103922
Episode Reward: 14.0
Step 466 (3117072) @ Episode 4456/10000, loss: 0.0181560255587101047
Episode Reward: 7.0
Step 1045 (3118117) @ Episode 4457/10000, loss: 0.0031378250569105155
Episode Reward: 21.0
Step 718 (3118835) @ Episode 4458/10000, loss: 0.0054811863228678788
Episode Reward: 23.0
Step 791 (3119626) @ Episode 4459/10000, loss: 0.0086852144449949267
Episode Reward: 18.0
Step 373 (3119999) @ Episode 4460/10000, loss: 0.0105005847290158275
 Copied model parameters to target network
Step 696 (3120322) @ Episode 4460/10000, loss: 0.0015097993891686201
Episode Re

[2017-11-05 00:42:48,039] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004500.mp4


Step 890 (3156184) @ Episode 4501/10000, loss: 0.0037877373397350315
Episode Reward: 15.0
Step 1428 (3157612) @ Episode 4502/10000, loss: 0.0085489936172962194
Episode Reward: 30.0
Step 805 (3158417) @ Episode 4503/10000, loss: 0.0065747769549489025
Episode Reward: 16.0
Step 1106 (3159523) @ Episode 4504/10000, loss: 0.0037599888164550066
Episode Reward: 18.0
Step 476 (3159999) @ Episode 4505/10000, loss: 0.0065967985428869725
 Copied model parameters to target network
Step 586 (3160109) @ Episode 4505/10000, loss: 0.0047121183015406136
Episode Reward: 9.0
Step 714 (3160823) @ Episode 4506/10000, loss: 0.0037704727146774533
Episode Reward: 10.0
Step 755 (3161578) @ Episode 4507/10000, loss: 0.0072699873708188534
Episode Reward: 12.0
Step 1090 (3162668) @ Episode 4508/10000, loss: 0.0035210489295423036
Episode Reward: 24.0
Step 755 (3163423) @ Episode 4509/10000, loss: 0.0029100179672241215
Episode Reward: 12.0
Step 945 (3164368) @ Episode 4510/10000, loss: 0.0102842543274164285
Episode

[2017-11-05 00:49:21,896] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004550.mp4


Step 746 (3198815) @ Episode 4551/10000, loss: 0.0040406500920653345
Episode Reward: 10.0
Step 757 (3199572) @ Episode 4552/10000, loss: 0.0017781867645680904
Episode Reward: 11.0
Step 427 (3199999) @ Episode 4553/10000, loss: 0.0046573672443628313
 Copied model parameters to target network
Step 845 (3200417) @ Episode 4553/10000, loss: 0.0048586772754788446
Episode Reward: 24.0
Step 772 (3201189) @ Episode 4554/10000, loss: 0.0040827877819538127
Episode Reward: 13.0
Step 800 (3201989) @ Episode 4555/10000, loss: 0.0057769492268562323
Episode Reward: 10.0
Step 889 (3202878) @ Episode 4556/10000, loss: 0.0045190542005002524
Episode Reward: 14.0
Step 1031 (3203909) @ Episode 4557/10000, loss: 0.0045210518874228045
Episode Reward: 21.0
Step 1055 (3204964) @ Episode 4558/10000, loss: 0.0047396896407008173
Episode Reward: 20.0
Step 682 (3205646) @ Episode 4559/10000, loss: 0.0071044210344552994
Episode Reward: 12.0
Step 917 (3206563) @ Episode 4560/10000, loss: 0.0074238115921616554
Episode

[2017-11-05 00:56:12,667] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004600.mp4


Step 546 (3243241) @ Episode 4601/10000, loss: 0.0026261648163199425
Episode Reward: 8.0
Step 864 (3244105) @ Episode 4602/10000, loss: 0.1121790930628776664
Episode Reward: 11.0
Step 600 (3244705) @ Episode 4603/10000, loss: 0.0077686472795903685
Episode Reward: 7.0
Step 970 (3245675) @ Episode 4604/10000, loss: 0.0082212733104825025
Episode Reward: 15.0
Step 685 (3246360) @ Episode 4605/10000, loss: 0.0041526891291141516
Episode Reward: 11.0
Step 559 (3246919) @ Episode 4606/10000, loss: 0.0032960220705717872
Episode Reward: 8.0
Step 1107 (3248026) @ Episode 4607/10000, loss: 0.0045260712504386933
Episode Reward: 24.0
Step 787 (3248813) @ Episode 4608/10000, loss: 0.0076571637764573178
Episode Reward: 13.0
Step 764 (3249577) @ Episode 4609/10000, loss: 0.0035341051407158375
Episode Reward: 12.0
Step 422 (3249999) @ Episode 4610/10000, loss: 0.0029641110450029373
 Copied model parameters to target network
Step 1232 (3250809) @ Episode 4610/10000, loss: 0.0038998313248157514
Episode Re

[2017-11-05 01:03:01,510] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004650.mp4


Step 917 (3288227) @ Episode 4651/10000, loss: 0.0126978028565645227
Episode Reward: 15.0
Step 690 (3288917) @ Episode 4652/10000, loss: 0.0057542305439710624
Episode Reward: 12.0
Step 1082 (3289999) @ Episode 4653/10000, loss: 0.0047280164435505872
 Copied model parameters to target network
Step 1329 (3290246) @ Episode 4653/10000, loss: 0.0043719066306948664
Episode Reward: 27.0
Step 852 (3291098) @ Episode 4654/10000, loss: 0.0035820775665342808
Episode Reward: 19.0
Step 1033 (3292131) @ Episode 4655/10000, loss: 0.0098064821213483815
Episode Reward: 19.0
Step 869 (3293000) @ Episode 4656/10000, loss: 0.0105135701596736987
Episode Reward: 17.0
Step 741 (3293741) @ Episode 4657/10000, loss: 0.0053770220838487153
Episode Reward: 12.0
Step 432 (3294173) @ Episode 4658/10000, loss: 0.0049831909127533445
Episode Reward: 5.0
Step 1066 (3295239) @ Episode 4659/10000, loss: 0.0015734690241515636
Episode Reward: 23.0
Step 846 (3296085) @ Episode 4660/10000, loss: 0.0049627870321273865
Episod

[2017-11-05 01:09:42,552] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004700.mp4


Step 944 (3331996) @ Episode 4701/10000, loss: 0.0856983438134193424
Episode Reward: 20.0
Step 1211 (3333207) @ Episode 4702/10000, loss: 0.0078174285590648655
Episode Reward: 26.0
Step 853 (3334060) @ Episode 4703/10000, loss: 0.0043853898532688626
Episode Reward: 18.0
Step 1405 (3335465) @ Episode 4704/10000, loss: 0.0059275575913488865
Episode Reward: 31.0
Step 602 (3336067) @ Episode 4705/10000, loss: 0.0092843966558575635
Episode Reward: 10.0
Step 748 (3336815) @ Episode 4706/10000, loss: 0.0208736117929220224
Episode Reward: 12.0
Step 659 (3337474) @ Episode 4707/10000, loss: 0.0054217232391238216
Episode Reward: 11.0
Step 923 (3338397) @ Episode 4708/10000, loss: 0.0064464099705219274
Episode Reward: 19.0
Step 830 (3339227) @ Episode 4709/10000, loss: 0.0966412425041198763
Episode Reward: 20.0
Step 772 (3339999) @ Episode 4710/10000, loss: 0.0058375522494316133
 Copied model parameters to target network
Step 1465 (3340692) @ Episode 4710/10000, loss: 0.0084984879940748214
Episod

[2017-11-05 01:16:41,912] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004750.mp4


Step 714 (3377572) @ Episode 4751/10000, loss: 0.0136211225762963365
Episode Reward: 11.0
Step 1244 (3378816) @ Episode 4752/10000, loss: 0.0053493352606892595
Episode Reward: 30.0
Step 611 (3379427) @ Episode 4753/10000, loss: 0.0096414070576429375
Episode Reward: 17.0
Step 572 (3379999) @ Episode 4754/10000, loss: 0.0036446489393711095
 Copied model parameters to target network
Step 1154 (3380581) @ Episode 4754/10000, loss: 0.0032158941030502326
Episode Reward: 28.0
Step 1125 (3381706) @ Episode 4755/10000, loss: 0.0094891907647252084
Episode Reward: 26.0
Step 1067 (3382773) @ Episode 4756/10000, loss: 0.0045582824386656284
Episode Reward: 33.0
Step 803 (3383576) @ Episode 4757/10000, loss: 0.0040214699693024163
Episode Reward: 18.0
Step 785 (3384361) @ Episode 4758/10000, loss: 0.1109403818845748944
Episode Reward: 14.0
Step 1010 (3385371) @ Episode 4759/10000, loss: 0.0047047901898622513
Episode Reward: 23.0
Step 732 (3386103) @ Episode 4760/10000, loss: 0.0033607003279030323
Epis

[2017-11-05 01:23:12,233] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004800.mp4


Step 758 (3419953) @ Episode 4801/10000, loss: 0.0079093929380178455
Episode Reward: 12.0
Step 46 (3419999) @ Episode 4802/10000, loss: 0.0058533409610390667
 Copied model parameters to target network
Step 935 (3420888) @ Episode 4802/10000, loss: 0.0057163243182003545
Episode Reward: 19.0
Step 855 (3421743) @ Episode 4803/10000, loss: 0.0076935868710279465
Episode Reward: 17.0
Step 1046 (3422789) @ Episode 4804/10000, loss: 0.0062126349657773975
Episode Reward: 18.0
Step 824 (3423613) @ Episode 4805/10000, loss: 0.0325613692402839666
Episode Reward: 14.0
Step 1095 (3424708) @ Episode 4806/10000, loss: 0.0039358944632112985
Episode Reward: 18.0
Step 1231 (3425939) @ Episode 4807/10000, loss: 0.0055104284547269343
Episode Reward: 33.0
Step 873 (3426812) @ Episode 4808/10000, loss: 0.0086929593235254295
Episode Reward: 25.0
Step 770 (3427582) @ Episode 4809/10000, loss: 0.0057324441149830825
Episode Reward: 22.0
Step 877 (3428459) @ Episode 4810/10000, loss: 0.0055967690423130995
Episode

[2017-11-05 01:29:51,408] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004850.mp4


Step 1530 (3464108) @ Episode 4851/10000, loss: 0.0027279183268547063
Episode Reward: 38.0
Step 962 (3465070) @ Episode 4852/10000, loss: 0.0040634805336594586
Episode Reward: 27.0
Step 815 (3465885) @ Episode 4853/10000, loss: 0.0026025734841823586
Episode Reward: 18.0
Step 920 (3466805) @ Episode 4854/10000, loss: 0.0074241757392883354
Episode Reward: 14.0
Step 1449 (3468254) @ Episode 4855/10000, loss: 0.0065182615071535116
Episode Reward: 30.0
Step 1036 (3469290) @ Episode 4856/10000, loss: 0.0087145371362566956
Episode Reward: 21.0
Step 709 (3469999) @ Episode 4857/10000, loss: 0.0032121108379215325
 Copied model parameters to target network
Step 1101 (3470391) @ Episode 4857/10000, loss: 0.0049596750177443038
Episode Reward: 26.0
Step 663 (3471054) @ Episode 4858/10000, loss: 0.0045052464120090015
Episode Reward: 13.0
Step 638 (3471692) @ Episode 4859/10000, loss: 0.0132223386317491535
Episode Reward: 10.0
Step 907 (3472599) @ Episode 4860/10000, loss: 0.0034774558153003454
Episo

[2017-11-05 01:37:18,614] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004900.mp4


Step 940 (3512155) @ Episode 4901/10000, loss: 0.0124096889048814774
Episode Reward: 14.0
Step 645 (3512800) @ Episode 4902/10000, loss: 0.0067613767459988593
Episode Reward: 13.0
Step 972 (3513772) @ Episode 4903/10000, loss: 0.0037252353504300117
Episode Reward: 18.0
Step 1242 (3515014) @ Episode 4904/10000, loss: 0.0098483096808195117
Episode Reward: 30.0
Step 960 (3515974) @ Episode 4905/10000, loss: 0.0030966354534029967
Episode Reward: 23.0
Step 1132 (3517106) @ Episode 4906/10000, loss: 0.0038012354634702206
Episode Reward: 25.0
Step 858 (3517964) @ Episode 4907/10000, loss: 0.0032436405308544636
Episode Reward: 22.0
Step 1128 (3519092) @ Episode 4908/10000, loss: 0.0027447282336652286
Episode Reward: 27.0
Step 639 (3519731) @ Episode 4909/10000, loss: 0.0064612627029418945
Episode Reward: 10.0
Step 268 (3519999) @ Episode 4910/10000, loss: 0.0110574569553136834
 Copied model parameters to target network
Step 1214 (3520945) @ Episode 4910/10000, loss: 0.0055650305002927785
Episo

[2017-11-05 01:44:44,030] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video004950.mp4


Step 1411 (3561587) @ Episode 4951/10000, loss: 0.0037722894921898844
Episode Reward: 32.0
Step 1003 (3562590) @ Episode 4952/10000, loss: 0.011667391285300255
Episode Reward: 19.0
Step 1008 (3563598) @ Episode 4953/10000, loss: 0.0052613853476941586
Episode Reward: 21.0
Step 1617 (3565215) @ Episode 4954/10000, loss: 0.0041079991497099473
Episode Reward: 42.0
Step 1600 (3566815) @ Episode 4955/10000, loss: 0.0024711419828236103
Episode Reward: 46.0
Step 1101 (3567916) @ Episode 4956/10000, loss: 0.0104466043412685423
Episode Reward: 27.0
Step 678 (3568594) @ Episode 4957/10000, loss: 0.0142748905345797545
Episode Reward: 12.0
Step 1045 (3569639) @ Episode 4958/10000, loss: 0.0047679007984697825
Episode Reward: 26.0
Step 360 (3569999) @ Episode 4959/10000, loss: 0.0264683868736028673
 Copied model parameters to target network
Step 959 (3570598) @ Episode 4959/10000, loss: 0.0039914809167385154
Episode Reward: 18.0
Step 920 (3571518) @ Episode 4960/10000, loss: 0.0122329909354448325
Epi

[2017-11-05 01:52:12,786] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005000.mp4


Step 754 (3609751) @ Episode 5001/10000, loss: 0.0074268230237066754
Episode Reward: 16.0
Step 248 (3609999) @ Episode 5002/10000, loss: 0.0042750081047415736
 Copied model parameters to target network
Step 899 (3610650) @ Episode 5002/10000, loss: 0.0070833940990269186
Episode Reward: 19.0
Step 920 (3611570) @ Episode 5003/10000, loss: 0.0074823815375566484
Episode Reward: 15.0
Step 872 (3612442) @ Episode 5004/10000, loss: 0.0032549514435231686
Episode Reward: 17.0
Step 1314 (3613756) @ Episode 5005/10000, loss: 0.0056465510278940265
Episode Reward: 28.0
Step 816 (3614572) @ Episode 5006/10000, loss: 0.0096681490540504464
Episode Reward: 14.0
Step 1026 (3615598) @ Episode 5007/10000, loss: 0.0034931940026581287
Episode Reward: 19.0
Step 959 (3616557) @ Episode 5008/10000, loss: 0.0058498829603195195
Episode Reward: 18.0
Step 1161 (3617718) @ Episode 5009/10000, loss: 0.0041315993294119835
Episode Reward: 31.0
Step 685 (3618403) @ Episode 5010/10000, loss: 0.0075337509624660015
Episod

[2017-11-05 01:59:32,588] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005050.mp4


Step 1241 (3658001) @ Episode 5051/10000, loss: 0.0030776574276387693
Episode Reward: 27.0
Step 407 (3658408) @ Episode 5052/10000, loss: 0.0064678983762860325
Episode Reward: 3.0
Step 1056 (3659464) @ Episode 5053/10000, loss: 0.0048323660157620915
Episode Reward: 19.0
Step 535 (3659999) @ Episode 5054/10000, loss: 0.0046084020286798487
 Copied model parameters to target network
Step 936 (3660400) @ Episode 5054/10000, loss: 0.0263595040887594222
Episode Reward: 16.0
Step 860 (3661260) @ Episode 5055/10000, loss: 0.0059431772679090516
Episode Reward: 17.0
Step 1330 (3662590) @ Episode 5056/10000, loss: 0.0076811565086245546
Episode Reward: 25.0
Step 866 (3663456) @ Episode 5057/10000, loss: 0.0103743467479944234
Episode Reward: 20.0
Step 901 (3664357) @ Episode 5058/10000, loss: 0.0083541981875896455
Episode Reward: 19.0
Step 1096 (3665453) @ Episode 5059/10000, loss: 0.0124233737587928775
Episode Reward: 30.0
Step 945 (3666398) @ Episode 5060/10000, loss: 0.0126612372696399694
Episod

[2017-11-05 02:06:53,963] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005100.mp4


Step 758 (3705859) @ Episode 5101/10000, loss: 0.0057040196843445373
Episode Reward: 13.0
Step 1171 (3707030) @ Episode 5102/10000, loss: 0.0042838240042328835
Episode Reward: 22.0
Step 1137 (3708167) @ Episode 5103/10000, loss: 0.0062057031318545345
Episode Reward: 33.0
Step 944 (3709111) @ Episode 5104/10000, loss: 0.0068149613216519367
Episode Reward: 21.0
Step 888 (3709999) @ Episode 5105/10000, loss: 0.0049479734152555475
 Copied model parameters to target network
Step 1134 (3710245) @ Episode 5105/10000, loss: 0.0065275849774479875
Episode Reward: 24.0
Step 780 (3711025) @ Episode 5106/10000, loss: 0.0065417485311627394
Episode Reward: 13.0
Step 991 (3712016) @ Episode 5107/10000, loss: 0.0031167806591838632
Episode Reward: 19.0
Step 921 (3712937) @ Episode 5108/10000, loss: 0.0145417284220457085
Episode Reward: 19.0
Step 921 (3713858) @ Episode 5109/10000, loss: 0.0058174808509647856
Episode Reward: 15.0
Step 825 (3714683) @ Episode 5110/10000, loss: 0.0046426095068454743
Episod

[2017-11-05 02:14:02,094] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005150.mp4


Step 862 (3752885) @ Episode 5151/10000, loss: 0.0095980446785688476
Episode Reward: 22.0
Step 1076 (3753961) @ Episode 5152/10000, loss: 0.0042181639000773437
Episode Reward: 34.0
Step 1446 (3755407) @ Episode 5153/10000, loss: 0.0031025847420096397
Episode Reward: 35.0
Step 515 (3755922) @ Episode 5154/10000, loss: 0.0166304614394903255
Episode Reward: 8.0
Step 1009 (3756931) @ Episode 5155/10000, loss: 0.0080030225217342387
Episode Reward: 25.0
Step 1213 (3758144) @ Episode 5156/10000, loss: 0.0036768619902431965
Episode Reward: 32.0
Step 891 (3759035) @ Episode 5157/10000, loss: 0.0325465723872184755
Episode Reward: 19.0
Step 828 (3759863) @ Episode 5158/10000, loss: 0.0107528679072856964
Episode Reward: 17.0
Step 136 (3759999) @ Episode 5159/10000, loss: 0.0023369155824184418
 Copied model parameters to target network
Step 1060 (3760923) @ Episode 5159/10000, loss: 0.0034258496016263963
Episode Reward: 23.0
Step 438 (3761361) @ Episode 5160/10000, loss: 0.0053683705627918244
Episo

[2017-11-05 02:21:02,428] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005200.mp4


Step 1035 (3798691) @ Episode 5201/10000, loss: 0.0024959535803645855
Episode Reward: 19.0
Step 888 (3799579) @ Episode 5202/10000, loss: 0.0053439657203853136
Episode Reward: 19.0
Step 420 (3799999) @ Episode 5203/10000, loss: 0.0030149640515446663
 Copied model parameters to target network
Step 968 (3800547) @ Episode 5203/10000, loss: 0.0226519517600536354
Episode Reward: 25.0
Step 870 (3801417) @ Episode 5204/10000, loss: 0.0104493508115410822
Episode Reward: 17.0
Step 755 (3802172) @ Episode 5205/10000, loss: 0.0059305555187165745
Episode Reward: 11.0
Step 907 (3803079) @ Episode 5206/10000, loss: 0.0031339928973466164
Episode Reward: 23.0
Step 902 (3803981) @ Episode 5207/10000, loss: 0.0090850237756967544
Episode Reward: 16.0
Step 409 (3804390) @ Episode 5208/10000, loss: 0.0064667426049709325
Episode Reward: 5.0
Step 932 (3805322) @ Episode 5209/10000, loss: 0.0030733775347471237
Episode Reward: 28.0
Step 753 (3806075) @ Episode 5210/10000, loss: 0.0066201025620102887
Episode R

[2017-11-05 02:28:11,341] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005250.mp4


Step 704 (3845445) @ Episode 5251/10000, loss: 0.0070788888260722165
Episode Reward: 14.0
Step 514 (3845959) @ Episode 5252/10000, loss: 0.0046291979961097244
Episode Reward: 10.0
Step 631 (3846590) @ Episode 5253/10000, loss: 0.0051542762666940694
Episode Reward: 10.0
Step 955 (3847545) @ Episode 5254/10000, loss: 0.0073549253866076475
Episode Reward: 14.0
Step 1111 (3848656) @ Episode 5255/10000, loss: 0.0207237266004085545
Episode Reward: 21.0
Step 475 (3849131) @ Episode 5256/10000, loss: 0.0082999160513281824
Episode Reward: 7.0
Step 868 (3849999) @ Episode 5257/10000, loss: 0.0077492133714258673
 Copied model parameters to target network
Step 1116 (3850247) @ Episode 5257/10000, loss: 0.0194765180349349988
Episode Reward: 22.0
Step 909 (3851156) @ Episode 5258/10000, loss: 0.0122771225869655615
Episode Reward: 19.0
Step 1353 (3852509) @ Episode 5259/10000, loss: 0.0071510178968310363
Episode Reward: 49.0
Step 526 (3853035) @ Episode 5260/10000, loss: 0.0022637136280536657
Episode

[2017-11-05 02:34:45,977] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005300.mp4


Step 1365 (3888812) @ Episode 5301/10000, loss: 0.0118780145421624184
Episode Reward: 44.0
Step 1006 (3889818) @ Episode 5302/10000, loss: 0.0100554572418332175
Episode Reward: 21.0
Step 181 (3889999) @ Episode 5303/10000, loss: 0.0035717007704079158
 Copied model parameters to target network
Step 628 (3890446) @ Episode 5303/10000, loss: 0.0030445882584899664
Episode Reward: 16.0
Step 913 (3891359) @ Episode 5304/10000, loss: 0.0056231245398521426
Episode Reward: 18.0
Step 796 (3892155) @ Episode 5305/10000, loss: 0.0114003419876098635
Episode Reward: 14.0
Step 1144 (3893299) @ Episode 5306/10000, loss: 0.0071263662539422515
Episode Reward: 30.0
Step 1711 (3895010) @ Episode 5307/10000, loss: 0.0157013460993766858
Episode Reward: 35.0
Step 1046 (3896056) @ Episode 5308/10000, loss: 0.0064951376989483836
Episode Reward: 22.0
Step 859 (3896915) @ Episode 5309/10000, loss: 0.0023048743605613718
Episode Reward: 14.0
Step 646 (3897561) @ Episode 5310/10000, loss: 0.0029691169038414955
Epis

[2017-11-05 02:41:37,606] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005350.mp4


Step 729 (3933002) @ Episode 5351/10000, loss: 0.0050270697101950645
Episode Reward: 12.0
Step 1116 (3934118) @ Episode 5352/10000, loss: 0.0120861176401376725
Episode Reward: 20.0
Step 871 (3934989) @ Episode 5353/10000, loss: 0.0082487398758530625
Episode Reward: 22.0
Step 815 (3935804) @ Episode 5354/10000, loss: 0.0044122235849499737
Episode Reward: 15.0
Step 874 (3936678) @ Episode 5355/10000, loss: 0.0041626105085015337
Episode Reward: 15.0
Step 673 (3937351) @ Episode 5356/10000, loss: 0.0064616678282618525
Episode Reward: 12.0
Step 970 (3938321) @ Episode 5357/10000, loss: 0.0062870783731341362
Episode Reward: 15.0
Step 1158 (3939479) @ Episode 5358/10000, loss: 0.0076029477640986445
Episode Reward: 21.0
Step 520 (3939999) @ Episode 5359/10000, loss: 0.0066256234422326093
 Copied model parameters to target network
Step 1116 (3940595) @ Episode 5359/10000, loss: 0.0022290660999715334
Episode Reward: 21.0
Step 988 (3941583) @ Episode 5360/10000, loss: 0.0031833469402045013
Episod

[2017-11-05 02:48:24,320] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005400.mp4


Step 930 (3977163) @ Episode 5401/10000, loss: 0.0065893647260963927
Episode Reward: 20.0
Step 824 (3977987) @ Episode 5402/10000, loss: 0.0099755162373185165
Episode Reward: 14.0
Step 953 (3978940) @ Episode 5403/10000, loss: 0.0020123920403420925
Episode Reward: 16.0
Step 987 (3979927) @ Episode 5404/10000, loss: 0.0076381498947739638
Episode Reward: 24.0
Step 72 (3979999) @ Episode 5405/10000, loss: 0.0078515652567148256
 Copied model parameters to target network
Step 889 (3980816) @ Episode 5405/10000, loss: 0.0155056230723857882
Episode Reward: 26.0
Step 790 (3981606) @ Episode 5406/10000, loss: 0.0045356038026511675
Episode Reward: 12.0
Step 1423 (3983029) @ Episode 5407/10000, loss: 0.0093366093933582393
Episode Reward: 36.0
Step 968 (3983997) @ Episode 5408/10000, loss: 0.0134377731010317875
Episode Reward: 20.0
Step 480 (3984477) @ Episode 5409/10000, loss: 0.0022824869956821203
Episode Reward: 7.0
Step 680 (3985157) @ Episode 5410/10000, loss: 0.0029110063333064318
Episode Re

[2017-11-05 02:54:53,337] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005450.mp4


Step 644 (4018976) @ Episode 5451/10000, loss: 0.0032406440004706383
Episode Reward: 9.0
Step 543 (4019519) @ Episode 5452/10000, loss: 0.0062524504028260714
Episode Reward: 9.0
Step 480 (4019999) @ Episode 5453/10000, loss: 0.0487636066973209435
 Copied model parameters to target network
Step 705 (4020224) @ Episode 5453/10000, loss: 0.0016233341302722692
Episode Reward: 14.0
Step 829 (4021053) @ Episode 5454/10000, loss: 0.0026154723018407825
Episode Reward: 15.0
Step 916 (4021969) @ Episode 5455/10000, loss: 0.0019842712208628654
Episode Reward: 21.0
Step 617 (4022586) @ Episode 5456/10000, loss: 0.0091768288984894755
Episode Reward: 10.0
Step 759 (4023345) @ Episode 5457/10000, loss: 0.0020847404375672343
Episode Reward: 25.0
Step 956 (4024301) @ Episode 5458/10000, loss: 0.0034558004699647427
Episode Reward: 21.0
Step 520 (4024821) @ Episode 5459/10000, loss: 0.0056828921660780917
Episode Reward: 8.0
Step 1226 (4026047) @ Episode 5460/10000, loss: 0.0071272784844040877
Episode Rew

[2017-11-05 03:01:06,189] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005500.mp4


Step 1027 (4059474) @ Episode 5501/10000, loss: 0.0024860822595655923
Episode Reward: 15.0
Step 525 (4059999) @ Episode 5502/10000, loss: 0.0057395990006625653
 Copied model parameters to target network
Step 870 (4060344) @ Episode 5502/10000, loss: 0.0100746713578701026
Episode Reward: 13.0
Step 792 (4061136) @ Episode 5503/10000, loss: 0.0042018312960863113
Episode Reward: 20.0
Step 1065 (4062201) @ Episode 5504/10000, loss: 0.0038977819494903088
Episode Reward: 18.0
Step 513 (4062714) @ Episode 5505/10000, loss: 0.0281266383826732647
Episode Reward: 8.0
Step 1048 (4063762) @ Episode 5506/10000, loss: 0.0039689084514975555
Episode Reward: 21.0
Step 827 (4064589) @ Episode 5507/10000, loss: 0.0096416752785444268
Episode Reward: 13.0
Step 682 (4065271) @ Episode 5508/10000, loss: 0.0084910197183489895
Episode Reward: 11.0
Step 988 (4066259) @ Episode 5509/10000, loss: 0.0042981775477528575
Episode Reward: 22.0
Step 954 (4067213) @ Episode 5510/10000, loss: 0.0086957998573780065
Episode

[2017-11-05 03:08:10,775] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005550.mp4


Step 701 (4105078) @ Episode 5551/10000, loss: 0.0047802333720028416
Episode Reward: 10.0
Step 1088 (4106166) @ Episode 5552/10000, loss: 0.0196563787758350374
Episode Reward: 21.0
Step 971 (4107137) @ Episode 5553/10000, loss: 0.0063399667851626875
Episode Reward: 13.0
Step 1477 (4108614) @ Episode 5554/10000, loss: 0.0054037082009017472
Episode Reward: 40.0
Step 803 (4109417) @ Episode 5555/10000, loss: 0.0034585283137857914
Episode Reward: 21.0
Step 582 (4109999) @ Episode 5556/10000, loss: 0.0069510312750935555
 Copied model parameters to target network
Step 610 (4110027) @ Episode 5556/10000, loss: 0.0038312366232275963
Episode Reward: 11.0
Step 1347 (4111374) @ Episode 5557/10000, loss: 0.0038143261335790157
Episode Reward: 32.0
Step 827 (4112201) @ Episode 5558/10000, loss: 0.0023335828445851803
Episode Reward: 13.0
Step 881 (4113082) @ Episode 5559/10000, loss: 0.0020925442222505813
Episode Reward: 14.0
Step 852 (4113934) @ Episode 5560/10000, loss: 0.0056953160092234613
Episod

[2017-11-05 03:15:20,223] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005600.mp4


Step 1264 (4152305) @ Episode 5601/10000, loss: 0.0034763126168400058
Episode Reward: 29.0
Step 906 (4153211) @ Episode 5602/10000, loss: 0.0022200874518603086
Episode Reward: 21.0
Step 726 (4153937) @ Episode 5603/10000, loss: 0.0061139604076743134
Episode Reward: 22.0
Step 1163 (4155100) @ Episode 5604/10000, loss: 0.0026024293620139367
Episode Reward: 27.0
Step 985 (4156085) @ Episode 5605/10000, loss: 0.0124650932848453525
Episode Reward: 20.0
Step 636 (4156721) @ Episode 5606/10000, loss: 0.0146703645586967474
Episode Reward: 9.0
Step 1079 (4157800) @ Episode 5607/10000, loss: 0.0048609375953674325
Episode Reward: 17.0
Step 1224 (4159024) @ Episode 5608/10000, loss: 0.0048779547214508065
Episode Reward: 32.0
Step 737 (4159761) @ Episode 5609/10000, loss: 0.0040205791592597967
Episode Reward: 14.0
Step 238 (4159999) @ Episode 5610/10000, loss: 0.0069912713952362545
 Copied model parameters to target network
Step 1028 (4160789) @ Episode 5610/10000, loss: 0.0016390988603234293
Episo

[2017-11-05 03:22:09,048] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005650.mp4


Step 503 (4195889) @ Episode 5651/10000, loss: 0.0028422756586223843
Episode Reward: 7.0
Step 904 (4196793) @ Episode 5652/10000, loss: 0.0035269483923912054
Episode Reward: 15.0
Step 739 (4197532) @ Episode 5653/10000, loss: 0.0039132819510996347
Episode Reward: 15.0
Step 541 (4198073) @ Episode 5654/10000, loss: 0.0061548482626676561
Episode Reward: 8.0
Step 736 (4198809) @ Episode 5655/10000, loss: 0.0105708371847867978
Episode Reward: 11.0
Step 899 (4199708) @ Episode 5656/10000, loss: 0.0063527021557092673
Episode Reward: 25.0
Step 291 (4199999) @ Episode 5657/10000, loss: 0.0154483662918210037
 Copied model parameters to target network
Step 547 (4200255) @ Episode 5657/10000, loss: 0.0085570532828569415
Episode Reward: 8.0
Step 713 (4200968) @ Episode 5658/10000, loss: 0.0061973859556019317
Episode Reward: 11.0
Step 755 (4201723) @ Episode 5659/10000, loss: 0.0032051925081759695
Episode Reward: 19.0
Step 1007 (4202730) @ Episode 5660/10000, loss: 0.0110683459788560875
Episode Rew

[2017-11-05 03:28:51,509] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005700.mp4


Step 794 (4239951) @ Episode 5701/10000, loss: 0.0051662405021488676
Episode Reward: 13.0
Step 48 (4239999) @ Episode 5702/10000, loss: 0.0016687448369339108
 Copied model parameters to target network
Step 1150 (4241101) @ Episode 5702/10000, loss: 0.0027681840583682067
Episode Reward: 22.0
Step 1039 (4242140) @ Episode 5703/10000, loss: 0.0065710339695215225
Episode Reward: 18.0
Step 578 (4242718) @ Episode 5704/10000, loss: 0.0032450943253934383
Episode Reward: 15.0
Step 763 (4243481) @ Episode 5705/10000, loss: 0.0036213423591107137
Episode Reward: 14.0
Step 831 (4244312) @ Episode 5706/10000, loss: 0.0062287114560604095
Episode Reward: 25.0
Step 727 (4245039) @ Episode 5707/10000, loss: 0.0027101321611553438
Episode Reward: 18.0
Step 853 (4245892) @ Episode 5708/10000, loss: 0.0296152140945196154
Episode Reward: 14.0
Step 977 (4246869) @ Episode 5709/10000, loss: 0.0106819979846477545
Episode Reward: 16.0
Step 895 (4247764) @ Episode 5710/10000, loss: 0.0029625352472066885
Episode 

[2017-11-05 03:35:36,075] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005750.mp4


Step 1053 (4283697) @ Episode 5751/10000, loss: 0.0030819347593933344
Episode Reward: 19.0
Step 764 (4284461) @ Episode 5752/10000, loss: 0.0044634300284087665
Episode Reward: 11.0
Step 672 (4285133) @ Episode 5753/10000, loss: 0.0037746315356343985
Episode Reward: 11.0
Step 785 (4285918) @ Episode 5754/10000, loss: 0.0031932401470839977
Episode Reward: 12.0
Step 819 (4286737) @ Episode 5755/10000, loss: 0.0108043905347585685
Episode Reward: 14.0
Step 1319 (4288056) @ Episode 5756/10000, loss: 0.0054584811441600325
Episode Reward: 30.0
Step 854 (4288910) @ Episode 5757/10000, loss: 0.0041023064404726037
Episode Reward: 14.0
Step 1073 (4289983) @ Episode 5758/10000, loss: 0.0047768666408956055
Episode Reward: 18.0
Step 16 (4289999) @ Episode 5759/10000, loss: 0.0227162577211856845
 Copied model parameters to target network
Step 990 (4290973) @ Episode 5759/10000, loss: 0.0137524325400590955
Episode Reward: 20.0
Step 678 (4291651) @ Episode 5760/10000, loss: 0.0094189923256635672
Episode

[2017-11-05 03:42:17,832] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005800.mp4


Step 1141 (4327622) @ Episode 5801/10000, loss: 0.0042762970551848416
Episode Reward: 23.0
Step 737 (4328359) @ Episode 5802/10000, loss: 0.0084539018571376883
Episode Reward: 19.0
Step 873 (4329232) @ Episode 5803/10000, loss: 0.0024492659140378237
Episode Reward: 19.0
Step 494 (4329726) @ Episode 5804/10000, loss: 0.0029681001324206595
Episode Reward: 8.0
Step 273 (4329999) @ Episode 5805/10000, loss: 0.0101613383740186694
 Copied model parameters to target network
Step 700 (4330426) @ Episode 5805/10000, loss: 0.0050623989664018153
Episode Reward: 15.0
Step 1244 (4331670) @ Episode 5806/10000, loss: 0.0032519116066396236
Episode Reward: 22.0
Step 969 (4332639) @ Episode 5807/10000, loss: 0.0045110429637134075
Episode Reward: 17.0
Step 943 (4333582) @ Episode 5808/10000, loss: 0.0095222406089305887
Episode Reward: 16.0
Step 754 (4334336) @ Episode 5809/10000, loss: 0.0051195588894188475
Episode Reward: 23.0
Step 1081 (4335417) @ Episode 5810/10000, loss: 0.0089020244777202626
Episode

[2017-11-05 03:48:56,121] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005850.mp4


Step 425 (4369999) @ Episode 5851/10000, loss: 0.0027004950679838657
 Copied model parameters to target network
Step 809 (4370383) @ Episode 5851/10000, loss: 0.0027767745777964593
Episode Reward: 14.0
Step 672 (4371055) @ Episode 5852/10000, loss: 0.0046452535316348087
Episode Reward: 10.0
Step 793 (4371848) @ Episode 5853/10000, loss: 0.0044393590651452546
Episode Reward: 11.0
Step 748 (4372596) @ Episode 5854/10000, loss: 0.0041932915337383753
Episode Reward: 15.0
Step 441 (4373037) @ Episode 5855/10000, loss: 0.0017767850076779723
Episode Reward: 6.0
Step 888 (4373925) @ Episode 5856/10000, loss: 0.0026651420630514624
Episode Reward: 21.0
Step 1252 (4375177) @ Episode 5857/10000, loss: 0.0062931482680141935
Episode Reward: 24.0
Step 859 (4376036) @ Episode 5858/10000, loss: 0.0123382704332470957
Episode Reward: 19.0
Step 856 (4376892) @ Episode 5859/10000, loss: 0.0064119184389710435
Episode Reward: 14.0
Step 1039 (4377931) @ Episode 5860/10000, loss: 0.0081430049613118173
Episode 

[2017-11-05 03:55:21,739] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005900.mp4


Step 1372 (4412608) @ Episode 5901/10000, loss: 0.0021523036994040012
Episode Reward: 26.0
Step 661 (4413269) @ Episode 5902/10000, loss: 0.0327635332942009695
Episode Reward: 11.0
Step 681 (4413950) @ Episode 5903/10000, loss: 0.0082849422469735157
Episode Reward: 10.0
Step 771 (4414721) @ Episode 5904/10000, loss: 0.0048556001856923105
Episode Reward: 13.0
Step 769 (4415490) @ Episode 5905/10000, loss: 0.0065260585397481923
Episode Reward: 7.0
Step 1000 (4416490) @ Episode 5906/10000, loss: 0.006686042994260788
Episode Reward: 27.0
Step 714 (4417204) @ Episode 5907/10000, loss: 0.0077493996359407993
Episode Reward: 12.0
Step 611 (4417815) @ Episode 5908/10000, loss: 0.0088733285665512085
Episode Reward: 9.0
Step 642 (4418457) @ Episode 5909/10000, loss: 0.0028315954841673374
Episode Reward: 9.0
Step 1379 (4419836) @ Episode 5910/10000, loss: 0.0027068902272731066
Episode Reward: 38.0
Step 163 (4419999) @ Episode 5911/10000, loss: 0.0022413739934563637
 Copied model parameters to targ

[2017-11-05 04:01:35,493] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video005950.mp4


Step 1221 (4452309) @ Episode 5951/10000, loss: 0.0077172918245196344
Episode Reward: 24.0
Step 1011 (4453320) @ Episode 5952/10000, loss: 0.003451008815318346
Episode Reward: 16.0
Step 606 (4453926) @ Episode 5953/10000, loss: 0.0024454421363770964
Episode Reward: 9.0
Step 361 (4454287) @ Episode 5954/10000, loss: 0.0044007399119436745
Episode Reward: 4.0
Step 765 (4455052) @ Episode 5955/10000, loss: 0.0091975601390004167
Episode Reward: 13.0
Step 1069 (4456121) @ Episode 5956/10000, loss: 0.0059202248230576515
Episode Reward: 23.0
Step 760 (4456881) @ Episode 5957/10000, loss: 0.0314738936722278618
Episode Reward: 11.0
Step 595 (4457476) @ Episode 5958/10000, loss: 0.0027264661621302366
Episode Reward: 9.0
Step 341 (4457817) @ Episode 5959/10000, loss: 0.0061722858808934692
Episode Reward: 4.0
Step 547 (4458364) @ Episode 5960/10000, loss: 0.0247786827385425577
Episode Reward: 7.0
Step 983 (4459347) @ Episode 5961/10000, loss: 0.0097335828468203545
Episode Reward: 21.0
Step 652 (445

[2017-11-05 04:07:34,046] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006000.mp4


Step 624 (4489999) @ Episode 6001/10000, loss: 0.0078948531299829485
 Copied model parameters to target network
Step 786 (4490161) @ Episode 6001/10000, loss: 0.0076874764636158943
Episode Reward: 13.0
Step 722 (4490883) @ Episode 6002/10000, loss: 0.0142990574240684515
Episode Reward: 11.0
Step 653 (4491536) @ Episode 6003/10000, loss: 0.0052170236594974995
Episode Reward: 10.0
Step 723 (4492259) @ Episode 6004/10000, loss: 0.0031346010509878397
Episode Reward: 18.0
Step 978 (4493237) @ Episode 6005/10000, loss: 0.0138843972235918056
Episode Reward: 16.0
Step 1375 (4494612) @ Episode 6006/10000, loss: 0.0037544330116361387
Episode Reward: 35.0
Step 598 (4495210) @ Episode 6007/10000, loss: 0.0044150203466415405
Episode Reward: 7.0
Step 787 (4495997) @ Episode 6008/10000, loss: 0.0061939489096403125
Episode Reward: 13.0
Step 1069 (4497066) @ Episode 6009/10000, loss: 0.0058909957297146327
Episode Reward: 28.0
Step 833 (4497899) @ Episode 6010/10000, loss: 0.0018806355074048042
Episode 

[2017-11-05 04:13:46,978] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006050.mp4


Step 747 (4529981) @ Episode 6051/10000, loss: 0.0038968874141573906
Episode Reward: 12.0
Step 18 (4529999) @ Episode 6052/10000, loss: 0.0036361217498779297
 Copied model parameters to target network
Step 725 (4530706) @ Episode 6052/10000, loss: 0.0069418456405401233
Episode Reward: 13.0
Step 1007 (4531713) @ Episode 6053/10000, loss: 0.0059380382299423226
Episode Reward: 22.0
Step 1187 (4532900) @ Episode 6054/10000, loss: 0.0014809858985245228
Episode Reward: 21.0
Step 1080 (4533980) @ Episode 6055/10000, loss: 0.0497961491346359256
Episode Reward: 27.0
Step 782 (4534762) @ Episode 6056/10000, loss: 0.0016024187207221985
Episode Reward: 10.0
Step 1152 (4535914) @ Episode 6057/10000, loss: 0.0025288872420787816
Episode Reward: 25.0
Step 576 (4536490) @ Episode 6058/10000, loss: 0.0107068652287125594
Episode Reward: 8.0
Step 642 (4537132) @ Episode 6059/10000, loss: 0.0719313323497772225
Episode Reward: 8.0
Step 870 (4538002) @ Episode 6060/10000, loss: 0.0047819144092500216
Episode 

[2017-11-05 04:20:18,889] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006100.mp4


Step 1268 (4572790) @ Episode 6101/10000, loss: 0.0041715460829436784
Episode Reward: 25.0
Step 1158 (4573948) @ Episode 6102/10000, loss: 0.0049985423684120186
Episode Reward: 25.0
Step 742 (4574690) @ Episode 6103/10000, loss: 0.0043556308373808867
Episode Reward: 14.0
Step 1063 (4575753) @ Episode 6104/10000, loss: 0.0038592820055782795
Episode Reward: 25.0
Step 834 (4576587) @ Episode 6105/10000, loss: 0.0031248102895915512
Episode Reward: 17.0
Step 999 (4577586) @ Episode 6106/10000, loss: 0.0137825729325413737
Episode Reward: 24.0
Step 839 (4578425) @ Episode 6107/10000, loss: 0.0016821622848510742
Episode Reward: 20.0
Step 738 (4579163) @ Episode 6108/10000, loss: 0.0180502962321043125
Episode Reward: 15.0
Step 836 (4579999) @ Episode 6109/10000, loss: 0.0019426983781158924
 Copied model parameters to target network
Step 990 (4580153) @ Episode 6109/10000, loss: 0.0039978032000362874
Episode Reward: 16.0
Step 683 (4580836) @ Episode 6110/10000, loss: 0.0030635264702141285
Episod

[2017-11-05 04:26:42,732] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006150.mp4


Step 530 (4613607) @ Episode 6151/10000, loss: 0.0220679379999637687
Episode Reward: 7.0
Step 602 (4614209) @ Episode 6152/10000, loss: 0.0040291142649948626
Episode Reward: 10.0
Step 652 (4614861) @ Episode 6153/10000, loss: 0.0067495983093976974
Episode Reward: 9.0
Step 990 (4615851) @ Episode 6154/10000, loss: 0.0037001278251409533
Episode Reward: 16.0
Step 596 (4616447) @ Episode 6155/10000, loss: 0.0047777472063899045
Episode Reward: 9.0
Step 1339 (4617786) @ Episode 6156/10000, loss: 0.0083820438012480746
Episode Reward: 25.0
Step 842 (4618628) @ Episode 6157/10000, loss: 0.0045895967632532127
Episode Reward: 14.0
Step 1048 (4619676) @ Episode 6158/10000, loss: 0.0036920690909028053
Episode Reward: 18.0
Step 323 (4619999) @ Episode 6159/10000, loss: 0.0031639658845961094
 Copied model parameters to target network
Step 613 (4620289) @ Episode 6159/10000, loss: 0.0074501396156847487
Episode Reward: 11.0
Step 753 (4621042) @ Episode 6160/10000, loss: 0.0012311269529163837
Episode Re

[2017-11-05 04:32:28,523] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006200.mp4


Step 46 (4649999) @ Episode 6201/10000, loss: 0.0038530249148607254
 Copied model parameters to target network
Step 727 (4650680) @ Episode 6201/10000, loss: 0.0032851099967956543
Episode Reward: 12.0
Step 596 (4651276) @ Episode 6202/10000, loss: 0.0060063060373067864
Episode Reward: 10.0
Step 797 (4652073) @ Episode 6203/10000, loss: 0.0316084958612918858
Episode Reward: 14.0
Step 761 (4652834) @ Episode 6204/10000, loss: 0.0063690315000712876
Episode Reward: 12.0
Step 833 (4653667) @ Episode 6205/10000, loss: 0.0045795654878020295
Episode Reward: 13.0
Step 728 (4654395) @ Episode 6206/10000, loss: 0.0729524120688438494
Episode Reward: 11.0
Step 787 (4655182) @ Episode 6207/10000, loss: 0.0043224189430475235
Episode Reward: 14.0
Step 834 (4656016) @ Episode 6208/10000, loss: 0.0054448144510388373
Episode Reward: 14.0
Step 985 (4657001) @ Episode 6209/10000, loss: 0.0021177716553211218
Episode Reward: 23.0
Step 817 (4657818) @ Episode 6210/10000, loss: 0.0031198286451399326
Episode Re

[2017-11-05 04:39:00,175] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006250.mp4


Step 531 (4692651) @ Episode 6251/10000, loss: 0.0029376782476902013
Episode Reward: 8.0
Step 834 (4693485) @ Episode 6252/10000, loss: 0.0031538389157503843
Episode Reward: 12.0
Step 731 (4694216) @ Episode 6253/10000, loss: 0.0039861043915152556
Episode Reward: 13.0
Step 767 (4694983) @ Episode 6254/10000, loss: 0.0102213080972433093
Episode Reward: 16.0
Step 1252 (4696235) @ Episode 6255/10000, loss: 0.0516946241259574946
Episode Reward: 36.0
Step 1061 (4697296) @ Episode 6256/10000, loss: 0.0035679247230291367
Episode Reward: 21.0
Step 537 (4697833) @ Episode 6257/10000, loss: 0.0036625692155212164
Episode Reward: 12.0
Step 862 (4698695) @ Episode 6258/10000, loss: 0.0074261627160012723
Episode Reward: 20.0
Step 809 (4699504) @ Episode 6259/10000, loss: 0.0028418062720447782
Episode Reward: 16.0
Step 495 (4699999) @ Episode 6260/10000, loss: 0.0022277052048593768
 Copied model parameters to target network
Step 773 (4700277) @ Episode 6260/10000, loss: 0.00149122672155499465
Episode

[2017-11-05 04:46:07,480] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006300.mp4


Step 952 (4739191) @ Episode 6301/10000, loss: 0.0084370970726013186
Episode Reward: 24.0
Step 808 (4739999) @ Episode 6302/10000, loss: 0.0104281762614846235
 Copied model parameters to target network
Step 852 (4740043) @ Episode 6302/10000, loss: 0.0353936403989791933
Episode Reward: 15.0
Step 1171 (4741214) @ Episode 6303/10000, loss: 0.0050273644737899342
Episode Reward: 23.0
Step 896 (4742110) @ Episode 6304/10000, loss: 0.0046166502870619366
Episode Reward: 15.0
Step 506 (4742616) @ Episode 6305/10000, loss: 0.0050928471609950066
Episode Reward: 8.0
Step 701 (4743317) @ Episode 6306/10000, loss: 0.0031382914166897535
Episode Reward: 10.0
Step 977 (4744294) @ Episode 6307/10000, loss: 0.0041863396763801575
Episode Reward: 17.0
Step 1543 (4745837) @ Episode 6308/10000, loss: 0.0094598177820444123
Episode Reward: 44.0
Step 1213 (4747050) @ Episode 6309/10000, loss: 0.0043594036251306537
Episode Reward: 23.0
Step 629 (4747679) @ Episode 6310/10000, loss: 0.0103684579953551383
Episode

[2017-11-05 04:53:01,289] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006350.mp4


Step 989 (4783874) @ Episode 6351/10000, loss: 0.0026713246479630476
Episode Reward: 16.0
Step 575 (4784449) @ Episode 6352/10000, loss: 0.0025533312000334263
Episode Reward: 8.0
Step 625 (4785074) @ Episode 6353/10000, loss: 0.0059831128455698492
Episode Reward: 12.0
Step 986 (4786060) @ Episode 6354/10000, loss: 0.0037235170602798467
Episode Reward: 21.0
Step 740 (4786800) @ Episode 6355/10000, loss: 0.0026642056182026863
Episode Reward: 12.0
Step 733 (4787533) @ Episode 6356/10000, loss: 0.0024207131937146187
Episode Reward: 15.0
Step 698 (4788231) @ Episode 6357/10000, loss: 0.0041735135018825537
Episode Reward: 10.0
Step 941 (4789172) @ Episode 6358/10000, loss: 0.0025611480232328176
Episode Reward: 15.0
Step 743 (4789915) @ Episode 6359/10000, loss: 0.0077260374091565617
Episode Reward: 13.0
Step 84 (4789999) @ Episode 6360/10000, loss: 0.0028354139067232615
 Copied model parameters to target network
Step 833 (4790748) @ Episode 6360/10000, loss: 0.0042287260293960575
Episode Rew

[2017-11-05 04:59:54,780] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006400.mp4


Step 627 (4827734) @ Episode 6401/10000, loss: 0.0028536200989037756
Episode Reward: 10.0
Step 657 (4828391) @ Episode 6402/10000, loss: 0.0084884529933333445
Episode Reward: 12.0
Step 511 (4828902) @ Episode 6403/10000, loss: 0.0045642647892236714
Episode Reward: 9.0
Step 1097 (4829999) @ Episode 6404/10000, loss: 0.0034320494160056114
 Copied model parameters to target network
Step 1165 (4830067) @ Episode 6404/10000, loss: 0.0023854579776525497
Episode Reward: 20.0
Step 835 (4830902) @ Episode 6405/10000, loss: 0.0078465994447469713
Episode Reward: 18.0
Step 539 (4831441) @ Episode 6406/10000, loss: 0.0034645940177142625
Episode Reward: 9.0
Step 820 (4832261) @ Episode 6407/10000, loss: 0.0041771209798753268
Episode Reward: 13.0
Step 1105 (4833366) @ Episode 6408/10000, loss: 0.0376568362116813668
Episode Reward: 21.0
Step 822 (4834188) @ Episode 6409/10000, loss: 0.0037077877204865217
Episode Reward: 14.0
Step 1272 (4835460) @ Episode 6410/10000, loss: 0.0057123382575809956
Episode

[2017-11-05 05:06:36,059] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006450.mp4


Step 43 (4869999) @ Episode 6451/10000, loss: 0.0068185776472091675
 Copied model parameters to target network
Step 1019 (4870975) @ Episode 6451/10000, loss: 0.0078439153730869383
Episode Reward: 15.0
Step 799 (4871774) @ Episode 6452/10000, loss: 0.0019894866272807123
Episode Reward: 13.0
Step 1032 (4872806) @ Episode 6453/10000, loss: 0.0062997862696647643
Episode Reward: 22.0
Step 846 (4873652) @ Episode 6454/10000, loss: 0.0059120338410139086
Episode Reward: 15.0
Step 903 (4874555) @ Episode 6455/10000, loss: 0.0102542396634817124
Episode Reward: 18.0
Step 1156 (4875711) @ Episode 6456/10000, loss: 0.0113740218803286556
Episode Reward: 37.0
Step 1373 (4877084) @ Episode 6457/10000, loss: 0.0047718901187181476
Episode Reward: 27.0
Step 1365 (4878449) @ Episode 6458/10000, loss: 0.0040550976991653446
Episode Reward: 32.0
Step 1013 (4879462) @ Episode 6459/10000, loss: 0.0097802830860018735
Episode Reward: 21.0
Step 537 (4879999) @ Episode 6460/10000, loss: 0.0044177654199302265
 Cop

[2017-11-05 05:13:47,258] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006500.mp4


Step 846 (4917835) @ Episode 6501/10000, loss: 0.0051556257531046874
Episode Reward: 14.0
Step 1047 (4918882) @ Episode 6502/10000, loss: 0.0046037836000323296
Episode Reward: 30.0
Step 860 (4919742) @ Episode 6503/10000, loss: 0.0062346332706511024
Episode Reward: 19.0
Step 257 (4919999) @ Episode 6504/10000, loss: 0.0037398035638034344
 Copied model parameters to target network
Step 930 (4920672) @ Episode 6504/10000, loss: 0.0040428307838737965
Episode Reward: 18.0
Step 555 (4921227) @ Episode 6505/10000, loss: 0.0050399773754179483
Episode Reward: 10.0
Step 671 (4921898) @ Episode 6506/10000, loss: 0.0027191764675080776
Episode Reward: 11.0
Step 826 (4922724) @ Episode 6507/10000, loss: 0.0012729689478874207
Episode Reward: 14.0
Step 689 (4923413) @ Episode 6508/10000, loss: 0.0059607606381177917
Episode Reward: 14.0
Step 776 (4924189) @ Episode 6509/10000, loss: 0.0538878813385963444
Episode Reward: 16.0
Step 634 (4924823) @ Episode 6510/10000, loss: 0.0124981235712766656
Episode 

[2017-11-05 05:20:53,493] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006550.mp4


Step 704 (4963457) @ Episode 6551/10000, loss: 0.0025346660986542787
Episode Reward: 9.0
Step 791 (4964248) @ Episode 6552/10000, loss: 0.0044804154895246034
Episode Reward: 17.0
Step 1163 (4965411) @ Episode 6553/10000, loss: 0.0074043897911906245
Episode Reward: 31.0
Step 843 (4966254) @ Episode 6554/10000, loss: 0.0030854349024593834
Episode Reward: 13.0
Step 992 (4967246) @ Episode 6555/10000, loss: 0.0077103972434997564
Episode Reward: 25.0
Step 982 (4968228) @ Episode 6556/10000, loss: 0.0289040282368659975
Episode Reward: 24.0
Step 670 (4968898) @ Episode 6557/10000, loss: 0.0034120318014174775
Episode Reward: 10.0
Step 896 (4969794) @ Episode 6558/10000, loss: 0.0082492269575595865
Episode Reward: 14.0
Step 205 (4969999) @ Episode 6559/10000, loss: 0.0029199745040386915
 Copied model parameters to target network
Step 952 (4970746) @ Episode 6559/10000, loss: 0.0040784021839499472
Episode Reward: 16.0
Step 795 (4971541) @ Episode 6560/10000, loss: 0.0037511312402784824
Episode R

[2017-11-05 05:27:54,513] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006600.mp4


Step 774 (5009332) @ Episode 6601/10000, loss: 0.0042763380333781244
Episode Reward: 12.0
Step 667 (5009999) @ Episode 6602/10000, loss: 0.0066118640825152453
 Copied model parameters to target network
Step 697 (5010029) @ Episode 6602/10000, loss: 0.0046388097107410438
Episode Reward: 14.0
Step 1357 (5011386) @ Episode 6603/10000, loss: 0.0050793243572115996
Episode Reward: 27.0
Step 734 (5012120) @ Episode 6604/10000, loss: 0.0045321956276893626
Episode Reward: 11.0
Step 688 (5012808) @ Episode 6605/10000, loss: 0.0049323523417115217
Episode Reward: 12.0
Step 733 (5013541) @ Episode 6606/10000, loss: 0.0050476230680942535
Episode Reward: 12.0
Step 772 (5014313) @ Episode 6607/10000, loss: 0.0158958565443754213
Episode Reward: 14.0
Step 827 (5015140) @ Episode 6608/10000, loss: 0.0040190150029957294
Episode Reward: 17.0
Step 904 (5016044) @ Episode 6609/10000, loss: 0.0039346190169453623
Episode Reward: 20.0
Step 1265 (5017309) @ Episode 6610/10000, loss: 0.0070399316027760513
Episode

[2017-11-05 05:34:48,835] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006650.mp4


Step 459 (5053646) @ Episode 6651/10000, loss: 0.0026075358036905527
Episode Reward: 6.0
Step 813 (5054459) @ Episode 6652/10000, loss: 0.0038557639345526695
Episode Reward: 17.0
Step 638 (5055097) @ Episode 6653/10000, loss: 0.0073566418141126634
Episode Reward: 11.0
Step 670 (5055767) @ Episode 6654/10000, loss: 0.0050345081835985186
Episode Reward: 12.0
Step 755 (5056522) @ Episode 6655/10000, loss: 0.0372287519276142114
Episode Reward: 11.0
Step 962 (5057484) @ Episode 6656/10000, loss: 0.0057064993306994444
Episode Reward: 18.0
Step 1207 (5058691) @ Episode 6657/10000, loss: 0.0041238274425268173
Episode Reward: 23.0
Step 855 (5059546) @ Episode 6658/10000, loss: 0.0103635331615805633
Episode Reward: 12.0
Step 453 (5059999) @ Episode 6659/10000, loss: 0.0040712417103350167
 Copied model parameters to target network
Step 947 (5060493) @ Episode 6659/10000, loss: 0.0054202117025852246
Episode Reward: 15.0
Step 946 (5061439) @ Episode 6660/10000, loss: 0.0034436429850757124
Episode R

[2017-11-05 05:41:53,972] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006700.mp4


Step 886 (5099646) @ Episode 6701/10000, loss: 0.0057514491491019737
Episode Reward: 20.0
Step 353 (5099999) @ Episode 6702/10000, loss: 0.0026257815770804887
 Copied model parameters to target network
Step 612 (5100258) @ Episode 6702/10000, loss: 0.0015516572166234255
Episode Reward: 10.0
Step 1045 (5101303) @ Episode 6703/10000, loss: 0.0253635756671428685
Episode Reward: 22.0
Step 764 (5102067) @ Episode 6704/10000, loss: 0.0088949399068951615
Episode Reward: 15.0
Step 500 (5102567) @ Episode 6705/10000, loss: 0.0053277476690709595
Episode Reward: 6.0
Step 803 (5103370) @ Episode 6706/10000, loss: 0.0030696182511746883
Episode Reward: 12.0
Step 786 (5104156) @ Episode 6707/10000, loss: 0.0079931560903787614
Episode Reward: 12.0
Step 993 (5105149) @ Episode 6708/10000, loss: 0.0062973564490675935
Episode Reward: 17.0
Step 1001 (5106150) @ Episode 6709/10000, loss: 0.0020837755873799324
Episode Reward: 18.0
Step 735 (5106885) @ Episode 6710/10000, loss: 0.0036275614984333515
Episode 

[2017-11-05 05:48:44,484] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006750.mp4


Step 778 (5143997) @ Episode 6751/10000, loss: 0.0037734895013272762
Episode Reward: 11.0
Step 856 (5144853) @ Episode 6752/10000, loss: 0.0020059996750205755
Episode Reward: 15.0
Step 918 (5145771) @ Episode 6753/10000, loss: 0.0015948519576340914
Episode Reward: 16.0
Step 1303 (5147074) @ Episode 6754/10000, loss: 0.0038237390108406544
Episode Reward: 27.0
Step 786 (5147860) @ Episode 6755/10000, loss: 0.0034068087115883827
Episode Reward: 18.0
Step 691 (5148551) @ Episode 6756/10000, loss: 0.0142030036076903344
Episode Reward: 10.0
Step 869 (5149420) @ Episode 6757/10000, loss: 0.0071246419101953515
Episode Reward: 18.0
Step 579 (5149999) @ Episode 6758/10000, loss: 0.0036669289693236355
 Copied model parameters to target network
Step 1056 (5150476) @ Episode 6758/10000, loss: 0.0059570050798356538
Episode Reward: 20.0
Step 594 (5151070) @ Episode 6759/10000, loss: 0.0029358649626374245
Episode Reward: 8.0
Step 1131 (5152201) @ Episode 6760/10000, loss: 0.0065351435914635666
Episode

[2017-11-05 05:55:13,505] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006800.mp4


Step 540 (5185631) @ Episode 6801/10000, loss: 0.0033821933902800083
Episode Reward: 8.0
Step 1076 (5186707) @ Episode 6802/10000, loss: 0.0032194997183978558
Episode Reward: 20.0
Step 558 (5187265) @ Episode 6803/10000, loss: 0.0180459320545196537
Episode Reward: 7.0
Step 499 (5187764) @ Episode 6804/10000, loss: 0.0042143524624407294
Episode Reward: 7.0
Step 844 (5188608) @ Episode 6805/10000, loss: 0.0064387256279587755
Episode Reward: 13.0
Step 909 (5189517) @ Episode 6806/10000, loss: 0.0035070038866251707
Episode Reward: 15.0
Step 482 (5189999) @ Episode 6807/10000, loss: 0.0076874122023582463
 Copied model parameters to target network
Step 1277 (5190794) @ Episode 6807/10000, loss: 0.0046650478616356855
Episode Reward: 24.0
Step 736 (5191530) @ Episode 6808/10000, loss: 0.0046825278550386433
Episode Reward: 10.0
Step 1041 (5192571) @ Episode 6809/10000, loss: 0.0034130332060158253
Episode Reward: 25.0
Step 1118 (5193689) @ Episode 6810/10000, loss: 0.0042375652119517335
Episode 

[2017-11-05 06:01:26,795] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006850.mp4


Step 895 (5225848) @ Episode 6851/10000, loss: 0.0040213381871581087
Episode Reward: 17.0
Step 796 (5226644) @ Episode 6852/10000, loss: 0.0028550922870635986
Episode Reward: 12.0
Step 979 (5227623) @ Episode 6853/10000, loss: 0.0034352403599768877
Episode Reward: 18.0
Step 1236 (5228859) @ Episode 6854/10000, loss: 0.0043490491807460785
Episode Reward: 29.0
Step 975 (5229834) @ Episode 6855/10000, loss: 0.0052539017051458365
Episode Reward: 15.0
Step 165 (5229999) @ Episode 6856/10000, loss: 0.0029815272428095344
 Copied model parameters to target network
Step 790 (5230624) @ Episode 6856/10000, loss: 0.0079952748492360126
Episode Reward: 17.0
Step 1065 (5231689) @ Episode 6857/10000, loss: 0.0054529444314539436
Episode Reward: 30.0
Step 714 (5232403) @ Episode 6858/10000, loss: 0.0033072535879909992
Episode Reward: 11.0
Step 1299 (5233702) @ Episode 6859/10000, loss: 0.0050214100629091265
Episode Reward: 27.0
Step 911 (5234613) @ Episode 6860/10000, loss: 0.0073795109055936346
Episod

[2017-11-05 06:07:56,887] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006900.mp4


Step 620 (5267613) @ Episode 6901/10000, loss: 0.0047889091074466705
Episode Reward: 10.0
Step 1040 (5268653) @ Episode 6902/10000, loss: 0.0020902724936604536
Episode Reward: 23.0
Step 823 (5269476) @ Episode 6903/10000, loss: 0.0126417949795722962
Episode Reward: 13.0
Step 523 (5269999) @ Episode 6904/10000, loss: 0.0063942549750208855
 Copied model parameters to target network
Step 1222 (5270698) @ Episode 6904/10000, loss: 0.0100363865494728098
Episode Reward: 25.0
Step 866 (5271564) @ Episode 6905/10000, loss: 0.0076184361241757875
Episode Reward: 11.0
Step 625 (5272189) @ Episode 6906/10000, loss: 0.0033220860641449694
Episode Reward: 13.0
Step 822 (5273011) @ Episode 6907/10000, loss: 0.0026135887019336224
Episode Reward: 14.0
Step 956 (5273967) @ Episode 6908/10000, loss: 0.0095803327858448037
Episode Reward: 19.0
Step 872 (5274839) @ Episode 6909/10000, loss: 0.0037152078002691276
Episode Reward: 15.0
Step 559 (5275398) @ Episode 6910/10000, loss: 0.0040345210582017946
Episode

[2017-11-05 06:14:40,637] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video006950.mp4


Step 640 (5310871) @ Episode 6951/10000, loss: 0.0092276409268379217
Episode Reward: 12.0
Step 611 (5311482) @ Episode 6952/10000, loss: 0.0055978372693061838
Episode Reward: 7.0
Step 1172 (5312654) @ Episode 6953/10000, loss: 0.0046242764219641685
Episode Reward: 22.0
Step 953 (5313607) @ Episode 6954/10000, loss: 0.0085681369528174484
Episode Reward: 18.0
Step 825 (5314432) @ Episode 6955/10000, loss: 0.0075516593642532825
Episode Reward: 14.0
Step 907 (5315339) @ Episode 6956/10000, loss: 0.0037501719780266285
Episode Reward: 14.0
Step 1012 (5316351) @ Episode 6957/10000, loss: 0.0020886093843728304
Episode Reward: 17.0
Step 1455 (5317806) @ Episode 6958/10000, loss: 0.0079819224774837565
Episode Reward: 42.0
Step 937 (5318743) @ Episode 6959/10000, loss: 0.0023933229967951775
Episode Reward: 17.0
Step 588 (5319331) @ Episode 6960/10000, loss: 0.0047860224731266574
Episode Reward: 9.0
Step 668 (5319999) @ Episode 6961/10000, loss: 0.0025435574352741247
 Copied model parameters to ta

[2017-11-05 06:21:22,196] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007000.mp4


Step 781 (5353927) @ Episode 7001/10000, loss: 0.0033581175375729896
Episode Reward: 13.0
Step 1079 (5355006) @ Episode 7002/10000, loss: 0.0048177521675825127
Episode Reward: 22.0
Step 1181 (5356187) @ Episode 7003/10000, loss: 0.0038357130251824856
Episode Reward: 28.0
Step 811 (5356998) @ Episode 7004/10000, loss: 0.0055075711570680147
Episode Reward: 13.0
Step 1028 (5358026) @ Episode 7005/10000, loss: 0.0019800979644060135
Episode Reward: 24.0
Step 723 (5358749) @ Episode 7006/10000, loss: 0.0018718475475907326
Episode Reward: 12.0
Step 1044 (5359793) @ Episode 7007/10000, loss: 0.0186732560396194464
Episode Reward: 17.0
Step 206 (5359999) @ Episode 7008/10000, loss: 0.0047338414005935198
 Copied model parameters to target network
Step 633 (5360426) @ Episode 7008/10000, loss: 0.0122022172436118136
Episode Reward: 10.0
Step 988 (5361414) @ Episode 7009/10000, loss: 0.0057009868323802952
Episode Reward: 19.0
Step 796 (5362210) @ Episode 7010/10000, loss: 0.0052544586360454565
Episo

[2017-11-05 06:27:58,743] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007050.mp4


Step 894 (5396937) @ Episode 7051/10000, loss: 0.0051170266233384616
Episode Reward: 15.0
Step 988 (5397925) @ Episode 7052/10000, loss: 0.0110495220869779595
Episode Reward: 20.0
Step 1318 (5399243) @ Episode 7053/10000, loss: 0.0040999995544552885
Episode Reward: 27.0
Step 756 (5399999) @ Episode 7054/10000, loss: 0.0038269751239567995
 Copied model parameters to target network
Step 820 (5400063) @ Episode 7054/10000, loss: 0.0030157133005559444
Episode Reward: 12.0
Step 574 (5400637) @ Episode 7055/10000, loss: 0.0039664749056100845
Episode Reward: 12.0
Step 988 (5401625) @ Episode 7056/10000, loss: 0.0024034893140196867
Episode Reward: 18.0
Step 1133 (5402758) @ Episode 7057/10000, loss: 0.0071276254020631317
Episode Reward: 18.0
Step 811 (5403569) @ Episode 7058/10000, loss: 0.0114691788330674174
Episode Reward: 13.0
Step 872 (5404441) @ Episode 7059/10000, loss: 0.0050522172823548325
Episode Reward: 22.0
Step 903 (5405344) @ Episode 7060/10000, loss: 0.0061415415257215593
Episode

[2017-11-05 06:34:42,098] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007100.mp4


Step 762 (5439930) @ Episode 7101/10000, loss: 0.0052792923524975787
Episode Reward: 11.0
Step 69 (5439999) @ Episode 7102/10000, loss: 0.0304508507251739595
 Copied model parameters to target network
Step 735 (5440665) @ Episode 7102/10000, loss: 0.0020101978443562984
Episode Reward: 10.0
Step 680 (5441345) @ Episode 7103/10000, loss: 0.0151329357177019124
Episode Reward: 10.0
Step 1004 (5442349) @ Episode 7104/10000, loss: 0.0021239905618131167
Episode Reward: 18.0
Step 890 (5443239) @ Episode 7105/10000, loss: 0.0066666416823863985
Episode Reward: 18.0
Step 947 (5444186) @ Episode 7106/10000, loss: 0.0053057717159390455
Episode Reward: 16.0
Step 1001 (5445187) @ Episode 7107/10000, loss: 0.017050439491868028
Episode Reward: 22.0
Step 559 (5445746) @ Episode 7108/10000, loss: 0.0025291470810770996
Episode Reward: 8.0
Step 731 (5446477) @ Episode 7109/10000, loss: 0.0091553330421447757
Episode Reward: 11.0
Step 1072 (5447549) @ Episode 7110/10000, loss: 0.0048448583111166954
Episode R

[2017-11-05 06:41:16,280] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007150.mp4


Step 680 (5482372) @ Episode 7151/10000, loss: 0.0072004036046564587
Episode Reward: 11.0
Step 532 (5482904) @ Episode 7152/10000, loss: 0.0078037371858954437
Episode Reward: 7.0
Step 906 (5483810) @ Episode 7153/10000, loss: 0.0031553697772324085
Episode Reward: 16.0
Step 934 (5484744) @ Episode 7154/10000, loss: 0.0025711925700306892
Episode Reward: 16.0
Step 671 (5485415) @ Episode 7155/10000, loss: 0.0069000259973108773
Episode Reward: 14.0
Step 784 (5486199) @ Episode 7156/10000, loss: 0.0090103223919868477
Episode Reward: 14.0
Step 1124 (5487323) @ Episode 7157/10000, loss: 0.0017242168541997671
Episode Reward: 29.0
Step 944 (5488267) @ Episode 7158/10000, loss: 0.0020720963366329673
Episode Reward: 17.0
Step 632 (5488899) @ Episode 7159/10000, loss: 0.0037349865306168795
Episode Reward: 8.0
Step 744 (5489643) @ Episode 7160/10000, loss: 0.0027668471448123455
Episode Reward: 22.0
Step 356 (5489999) @ Episode 7161/10000, loss: 0.0035992227494716644
 Copied model parameters to targ

[2017-11-05 06:47:19,183] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007200.mp4


Step 837 (5521145) @ Episode 7201/10000, loss: 0.0038500828668475157
Episode Reward: 15.0
Step 737 (5521882) @ Episode 7202/10000, loss: 0.0049822642467916015
Episode Reward: 13.0
Step 1043 (5522925) @ Episode 7203/10000, loss: 0.0054310685954988514
Episode Reward: 21.0
Step 1047 (5523972) @ Episode 7204/10000, loss: 0.0022901748307049274
Episode Reward: 18.0
Step 596 (5524568) @ Episode 7205/10000, loss: 0.0051640304736793046
Episode Reward: 9.0
Step 645 (5525213) @ Episode 7206/10000, loss: 0.0062821288593113423
Episode Reward: 10.0
Step 538 (5525751) @ Episode 7207/10000, loss: 0.0026204069145023823
Episode Reward: 11.0
Step 674 (5526425) @ Episode 7208/10000, loss: 0.0040480815805494785
Episode Reward: 8.0
Step 1013 (5527438) @ Episode 7209/10000, loss: 0.0036789672449231148
Episode Reward: 21.0
Step 777 (5528215) @ Episode 7210/10000, loss: 0.0083849895745515826
Episode Reward: 12.0
Step 702 (5528917) @ Episode 7211/10000, loss: 0.0059696901589632034
Episode Reward: 17.0
Step 1082

[2017-11-05 06:53:50,053] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007250.mp4


Step 566 (5562781) @ Episode 7251/10000, loss: 0.0044771311804652215
Episode Reward: 7.0
Step 543 (5563324) @ Episode 7252/10000, loss: 0.0182219352573156365
Episode Reward: 9.0
Step 472 (5563796) @ Episode 7253/10000, loss: 0.0024522303137928247
Episode Reward: 6.0
Step 683 (5564479) @ Episode 7254/10000, loss: 0.0047799199819564825
Episode Reward: 14.0
Step 688 (5565167) @ Episode 7255/10000, loss: 0.0076950732618570334
Episode Reward: 13.0
Step 960 (5566127) @ Episode 7256/10000, loss: 0.0167338736355304727
Episode Reward: 18.0
Step 524 (5566651) @ Episode 7257/10000, loss: 0.0088339466601610184
Episode Reward: 8.0
Step 815 (5567466) @ Episode 7258/10000, loss: 0.0040452331304550174
Episode Reward: 18.0
Step 1008 (5568474) @ Episode 7259/10000, loss: 0.0035015796311199665
Episode Reward: 21.0
Step 695 (5569169) @ Episode 7260/10000, loss: 0.0040400680154562964
Episode Reward: 11.0
Step 830 (5569999) @ Episode 7261/10000, loss: 0.0027945877518504865
 Copied model parameters to target

[2017-11-05 07:00:17,327] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007300.mp4


Step 834 (5604573) @ Episode 7301/10000, loss: 0.0087030930444598245
Episode Reward: 13.0
Step 843 (5605416) @ Episode 7302/10000, loss: 0.0093988012522459033
Episode Reward: 13.0
Step 1006 (5606422) @ Episode 7303/10000, loss: 0.0035792742855846887
Episode Reward: 17.0
Step 869 (5607291) @ Episode 7304/10000, loss: 0.0050961249507963665
Episode Reward: 16.0
Step 1169 (5608460) @ Episode 7305/10000, loss: 0.0064322473481297496
Episode Reward: 24.0
Step 678 (5609138) @ Episode 7306/10000, loss: 0.0377358645200729474
Episode Reward: 12.0
Step 779 (5609917) @ Episode 7307/10000, loss: 0.0300350319594144827
Episode Reward: 17.0
Step 82 (5609999) @ Episode 7308/10000, loss: 0.0049439403228461745
 Copied model parameters to target network
Step 489 (5610406) @ Episode 7308/10000, loss: 0.0058795558288693438
Episode Reward: 7.0
Step 883 (5611289) @ Episode 7309/10000, loss: 0.0028347519692033536
Episode Reward: 12.0
Step 416 (5611705) @ Episode 7310/10000, loss: 0.0034800022840499885
Episode R

[2017-11-05 07:06:41,659] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007350.mp4


Step 982 (5645670) @ Episode 7351/10000, loss: 0.0053735482506453994
Episode Reward: 18.0
Step 923 (5646593) @ Episode 7352/10000, loss: 0.0035940362140536315
Episode Reward: 17.0
Step 912 (5647505) @ Episode 7353/10000, loss: 0.0111738853156566627
Episode Reward: 17.0
Step 864 (5648369) @ Episode 7354/10000, loss: 0.0031601497903466225
Episode Reward: 21.0
Step 924 (5649293) @ Episode 7355/10000, loss: 0.0043861246667802338
Episode Reward: 20.0
Step 687 (5649980) @ Episode 7356/10000, loss: 0.0088764606043696432
Episode Reward: 11.0
Step 19 (5649999) @ Episode 7357/10000, loss: 0.0022424831986427307
 Copied model parameters to target network
Step 1224 (5651204) @ Episode 7357/10000, loss: 0.0032438430935144424
Episode Reward: 20.0
Step 808 (5652012) @ Episode 7358/10000, loss: 0.0024366839788854125
Episode Reward: 16.0
Step 826 (5652838) @ Episode 7359/10000, loss: 0.0100372759625315677
Episode Reward: 17.0
Step 632 (5653470) @ Episode 7360/10000, loss: 0.0045714140869677075
Episode R

[2017-11-05 07:13:09,396] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007400.mp4


Step 932 (5687314) @ Episode 7401/10000, loss: 0.0022261831909418106
Episode Reward: 20.0
Step 897 (5688211) @ Episode 7402/10000, loss: 0.0083639519289135932
Episode Reward: 16.0
Step 838 (5689049) @ Episode 7403/10000, loss: 0.0019728355109691626
Episode Reward: 11.0
Step 950 (5689999) @ Episode 7404/10000, loss: 0.0066472408361732967
 Copied model parameters to target network
Step 996 (5690045) @ Episode 7404/10000, loss: 0.0048113451339304455
Episode Reward: 18.0
Step 1243 (5691288) @ Episode 7405/10000, loss: 0.0084589282050728815
Episode Reward: 29.0
Step 1076 (5692364) @ Episode 7406/10000, loss: 0.0043458458967506886
Episode Reward: 25.0
Step 807 (5693171) @ Episode 7407/10000, loss: 0.0046449061483144767
Episode Reward: 14.0
Step 500 (5693671) @ Episode 7408/10000, loss: 0.0058927368372678763
Episode Reward: 8.0
Step 662 (5694333) @ Episode 7409/10000, loss: 0.0025543239898979664
Episode Reward: 10.0
Step 515 (5694848) @ Episode 7410/10000, loss: 0.0038253362290561215
Episode 

[2017-11-05 07:19:39,699] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007450.mp4


Step 1313 (5729145) @ Episode 7451/10000, loss: 0.0051146904006600385
Episode Reward: 30.0
Step 774 (5729919) @ Episode 7452/10000, loss: 0.0059483041986823085
Episode Reward: 12.0
Step 80 (5729999) @ Episode 7453/10000, loss: 0.0072272815741598616
 Copied model parameters to target network
Step 1126 (5731045) @ Episode 7453/10000, loss: 0.0039284047670662468
Episode Reward: 23.0
Step 555 (5731600) @ Episode 7454/10000, loss: 0.0063470299355685714
Episode Reward: 6.0
Step 1010 (5732610) @ Episode 7455/10000, loss: 0.0040839645080268386
Episode Reward: 27.0
Step 1219 (5733829) @ Episode 7456/10000, loss: 0.0023523268755525357
Episode Reward: 23.0
Step 686 (5734515) @ Episode 7457/10000, loss: 0.0054696802981197836
Episode Reward: 11.0
Step 594 (5735109) @ Episode 7458/10000, loss: 0.0061226692050695427
Episode Reward: 11.0
Step 898 (5736007) @ Episode 7459/10000, loss: 0.0331779345870018338
Episode Reward: 25.0
Step 869 (5736876) @ Episode 7460/10000, loss: 0.0072283865883946422
Episode

[2017-11-05 07:26:01,962] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007500.mp4


Step 1019 (5769607) @ Episode 7501/10000, loss: 0.0046892808750271823
Episode Reward: 20.0
Step 392 (5769999) @ Episode 7502/10000, loss: 0.0034863557666540146
 Copied model parameters to target network
Step 664 (5770271) @ Episode 7502/10000, loss: 0.0026967464946210384
Episode Reward: 10.0
Step 807 (5771078) @ Episode 7503/10000, loss: 0.0023947355803102255
Episode Reward: 11.0
Step 806 (5771884) @ Episode 7504/10000, loss: 0.0013587318826466799
Episode Reward: 10.0
Step 778 (5772662) @ Episode 7505/10000, loss: 0.0508582219481468235
Episode Reward: 13.0
Step 486 (5773148) @ Episode 7506/10000, loss: 0.0060704532079398631
Episode Reward: 6.0
Step 358 (5773506) @ Episode 7507/10000, loss: 0.0067616486921906477
Episode Reward: 4.0
Step 703 (5774209) @ Episode 7508/10000, loss: 0.0017005590489134192
Episode Reward: 13.0
Step 681 (5774890) @ Episode 7509/10000, loss: 0.0019127056002616882
Episode Reward: 10.0
Step 954 (5775844) @ Episode 7510/10000, loss: 0.0061638206243515015
Episode Re

[2017-11-05 07:32:08,792] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007550.mp4


Step 777 (5807859) @ Episode 7551/10000, loss: 0.0032040150836110115
Episode Reward: 17.0
Step 720 (5808579) @ Episode 7552/10000, loss: 0.0043004094623029235
Episode Reward: 11.0
Step 1077 (5809656) @ Episode 7553/10000, loss: 0.0037694275379180916
Episode Reward: 26.0
Step 343 (5809999) @ Episode 7554/10000, loss: 0.0059930505231022835
 Copied model parameters to target network
Step 1211 (5810867) @ Episode 7554/10000, loss: 0.0092821745201945326
Episode Reward: 23.0
Step 828 (5811695) @ Episode 7555/10000, loss: 0.0063787726685404783
Episode Reward: 13.0
Step 643 (5812338) @ Episode 7556/10000, loss: 0.0023476213682442904
Episode Reward: 10.0
Step 693 (5813031) @ Episode 7557/10000, loss: 0.0029415199533104897
Episode Reward: 11.0
Step 836 (5813867) @ Episode 7558/10000, loss: 0.0035319929011166096
Episode Reward: 14.0
Step 1022 (5814889) @ Episode 7559/10000, loss: 0.0041244719177484515
Episode Reward: 19.0
Step 807 (5815696) @ Episode 7560/10000, loss: 0.0039928611367940924
Episod

[2017-11-05 07:38:18,918] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007600.mp4


Step 864 (5847269) @ Episode 7601/10000, loss: 0.0052321352995932185
Episode Reward: 21.0
Step 727 (5847996) @ Episode 7602/10000, loss: 0.0021679294295608997
Episode Reward: 16.0
Step 1297 (5849293) @ Episode 7603/10000, loss: 0.0208398811519145975
Episode Reward: 24.0
Step 706 (5849999) @ Episode 7604/10000, loss: 0.0025543947704136373
 Copied model parameters to target network
Step 763 (5850056) @ Episode 7604/10000, loss: 0.0026838919147849083
Episode Reward: 16.0
Step 521 (5850577) @ Episode 7605/10000, loss: 0.0057405214756727223
Episode Reward: 7.0
Step 969 (5851546) @ Episode 7606/10000, loss: 0.0045050787739455733
Episode Reward: 19.0
Step 501 (5852047) @ Episode 7607/10000, loss: 0.0204371921718120577
Episode Reward: 10.0
Step 697 (5852744) @ Episode 7608/10000, loss: 0.0052739186212420467
Episode Reward: 13.0
Step 992 (5853736) @ Episode 7609/10000, loss: 0.0029210061766207226
Episode Reward: 21.0
Step 431 (5854167) @ Episode 7610/10000, loss: 0.0063213170506060125
Episode R

[2017-11-05 07:44:47,630] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007650.mp4


Step 579 (5888198) @ Episode 7651/10000, loss: 0.0038013362791389227
Episode Reward: 8.0
Step 1016 (5889214) @ Episode 7652/10000, loss: 0.0037613175809383392
Episode Reward: 20.0
Step 445 (5889659) @ Episode 7653/10000, loss: 0.0027771899476647377
Episode Reward: 5.0
Step 340 (5889999) @ Episode 7654/10000, loss: 0.0039238194003701213
 Copied model parameters to target network
Step 709 (5890368) @ Episode 7654/10000, loss: 0.0083632422611117365
Episode Reward: 10.0
Step 1343 (5891711) @ Episode 7655/10000, loss: 0.0033751549199223523
Episode Reward: 29.0
Step 850 (5892561) @ Episode 7656/10000, loss: 0.0027355377096682787
Episode Reward: 13.0
Step 618 (5893179) @ Episode 7657/10000, loss: 0.0052708587609231474
Episode Reward: 10.0
Step 1015 (5894194) @ Episode 7658/10000, loss: 0.0052810176275670538
Episode Reward: 18.0
Step 659 (5894853) @ Episode 7659/10000, loss: 0.0051826392300426962
Episode Reward: 11.0
Step 679 (5895532) @ Episode 7660/10000, loss: 0.0041043274104595186
Episode 

[2017-11-05 07:51:09,185] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007700.mp4


Step 694 (5928926) @ Episode 7701/10000, loss: 0.0036260969936847687
Episode Reward: 11.0
Step 723 (5929649) @ Episode 7702/10000, loss: 0.0063933688215911395
Episode Reward: 16.0
Step 350 (5929999) @ Episode 7703/10000, loss: 0.0038437421899288893
 Copied model parameters to target network
Step 744 (5930393) @ Episode 7703/10000, loss: 0.0058387573808431625
Episode Reward: 13.0
Step 670 (5931063) @ Episode 7704/10000, loss: 0.0027750486042350536
Episode Reward: 11.0
Step 880 (5931943) @ Episode 7705/10000, loss: 0.0063465246930718424
Episode Reward: 13.0
Step 742 (5932685) @ Episode 7706/10000, loss: 0.0015851957723498344
Episode Reward: 12.0
Step 900 (5933585) @ Episode 7707/10000, loss: 0.0027558323927223682
Episode Reward: 14.0
Step 872 (5934457) @ Episode 7708/10000, loss: 0.0013969622086733582
Episode Reward: 15.0
Step 906 (5935363) @ Episode 7709/10000, loss: 0.0048848711885511875
Episode Reward: 16.0
Step 857 (5936220) @ Episode 7710/10000, loss: 0.0020647421479225168
Episode R

[2017-11-05 07:57:30,942] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007750.mp4


Step 1002 (5969717) @ Episode 7751/10000, loss: 0.0058283866383135326
Episode Reward: 21.0
Step 282 (5969999) @ Episode 7752/10000, loss: 0.0084582408890128144
 Copied model parameters to target network
Step 723 (5970440) @ Episode 7752/10000, loss: 0.0031319726258516315
Episode Reward: 10.0
Step 837 (5971277) @ Episode 7753/10000, loss: 0.0045013711787760268
Episode Reward: 18.0
Step 1158 (5972435) @ Episode 7754/10000, loss: 0.0056147798895835885
Episode Reward: 21.0
Step 743 (5973178) @ Episode 7755/10000, loss: 0.0098569234833121312
Episode Reward: 15.0
Step 414 (5973592) @ Episode 7756/10000, loss: 0.0036683934740722182
Episode Reward: 5.0
Step 884 (5974476) @ Episode 7757/10000, loss: 0.0112845506519079295
Episode Reward: 17.0
Step 642 (5975118) @ Episode 7758/10000, loss: 0.0076657678000628958
Episode Reward: 12.0
Step 528 (5975646) @ Episode 7759/10000, loss: 0.0057709896937012677
Episode Reward: 7.0
Step 1067 (5976713) @ Episode 7760/10000, loss: 0.0038169622421264658
Episode 

[2017-11-05 08:03:58,473] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007800.mp4


Step 660 (6010788) @ Episode 7801/10000, loss: 0.0092030568048357965
Episode Reward: 10.0
Step 476 (6011264) @ Episode 7802/10000, loss: 0.0021792780607938766
Episode Reward: 7.0
Step 682 (6011946) @ Episode 7803/10000, loss: 0.0020076476503163576
Episode Reward: 9.0
Step 770 (6012716) @ Episode 7804/10000, loss: 0.0021175772417336702
Episode Reward: 12.0
Step 872 (6013588) @ Episode 7805/10000, loss: 0.0061263563111424456
Episode Reward: 13.0
Step 591 (6014179) @ Episode 7806/10000, loss: 0.0039644287899136547
Episode Reward: 7.0
Step 756 (6014935) @ Episode 7807/10000, loss: 0.0037340302951633937
Episode Reward: 13.0
Step 544 (6015479) @ Episode 7808/10000, loss: 0.0035524733830243353
Episode Reward: 11.0
Step 936 (6016415) @ Episode 7809/10000, loss: 0.0057548312470316895
Episode Reward: 17.0
Step 718 (6017133) @ Episode 7810/10000, loss: 0.0055836532264947897
Episode Reward: 17.0
Step 594 (6017727) @ Episode 7811/10000, loss: 0.0072447331622242935
Episode Reward: 9.0
Step 593 (6018

[2017-11-05 08:10:08,175] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007850.mp4


Step 735 (6049999) @ Episode 7851/10000, loss: 0.0037188543938100345
 Copied model parameters to target network
Step 764 (6050028) @ Episode 7851/10000, loss: 0.0032301787286996846
Episode Reward: 19.0
Step 944 (6050972) @ Episode 7852/10000, loss: 0.0089424727484583856
Episode Reward: 18.0
Step 721 (6051693) @ Episode 7853/10000, loss: 0.0071255965158343315
Episode Reward: 11.0
Step 672 (6052365) @ Episode 7854/10000, loss: 0.0070200762711465366
Episode Reward: 11.0
Step 585 (6052950) @ Episode 7855/10000, loss: 0.0016822674078866847
Episode Reward: 16.0
Step 546 (6053496) @ Episode 7856/10000, loss: 0.0072374716401100165
Episode Reward: 9.0
Step 666 (6054162) @ Episode 7857/10000, loss: 0.0016396819846704602
Episode Reward: 9.0
Step 735 (6054897) @ Episode 7858/10000, loss: 0.0052401730790734292
Episode Reward: 11.0
Step 820 (6055717) @ Episode 7859/10000, loss: 0.0041356659494340425
Episode Reward: 13.0
Step 807 (6056524) @ Episode 7860/10000, loss: 0.0014246281934902072
Episode Rew

[2017-11-05 08:16:08,793] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007900.mp4


Step 599 (6088083) @ Episode 7901/10000, loss: 0.0018604559591040015
Episode Reward: 8.0
Step 658 (6088741) @ Episode 7902/10000, loss: 0.0023104115389287476
Episode Reward: 10.0
Step 991 (6089732) @ Episode 7903/10000, loss: 0.0043145199306309226
Episode Reward: 20.0
Step 267 (6089999) @ Episode 7904/10000, loss: 0.0109316213056445123
 Copied model parameters to target network
Step 847 (6090579) @ Episode 7904/10000, loss: 0.0073933387175202377
Episode Reward: 18.0
Step 1034 (6091613) @ Episode 7905/10000, loss: 0.0020725654903799295
Episode Reward: 21.0
Step 839 (6092452) @ Episode 7906/10000, loss: 0.0054557546973228455
Episode Reward: 13.0
Step 754 (6093206) @ Episode 7907/10000, loss: 0.0228614564985036855
Episode Reward: 15.0
Step 591 (6093797) @ Episode 7908/10000, loss: 0.0022288479376584298
Episode Reward: 7.0
Step 666 (6094463) @ Episode 7909/10000, loss: 0.0040056770667433748
Episode Reward: 13.0
Step 767 (6095230) @ Episode 7910/10000, loss: 0.0082174343988299373
Episode Re

[2017-11-05 08:22:32,336] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video007950.mp4


Step 836 (6129136) @ Episode 7951/10000, loss: 0.0116313882172107776
Episode Reward: 18.0
Step 740 (6129876) @ Episode 7952/10000, loss: 0.0071949055418372154
Episode Reward: 11.0
Step 123 (6129999) @ Episode 7953/10000, loss: 0.0118092615157365895
 Copied model parameters to target network
Step 764 (6130640) @ Episode 7953/10000, loss: 0.0050926711410284042
Episode Reward: 17.0
Step 599 (6131239) @ Episode 7954/10000, loss: 0.0025667408481240273
Episode Reward: 13.0
Step 855 (6132094) @ Episode 7955/10000, loss: 0.0103839654475450525
Episode Reward: 15.0
Step 1011 (6133105) @ Episode 7956/10000, loss: 0.0107214218005537998
Episode Reward: 21.0
Step 740 (6133845) @ Episode 7957/10000, loss: 0.0103705264627933569
Episode Reward: 10.0
Step 493 (6134338) @ Episode 7958/10000, loss: 0.0046659051440656185
Episode Reward: 8.0
Step 823 (6135161) @ Episode 7959/10000, loss: 0.0035426761023700237
Episode Reward: 14.0
Step 725 (6135886) @ Episode 7960/10000, loss: 0.0021995718125253916
Episode R

[2017-11-05 08:29:03,111] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008000.mp4


Step 256 (6169999) @ Episode 8001/10000, loss: 0.0031227488070726395
 Copied model parameters to target network
Step 687 (6170430) @ Episode 8001/10000, loss: 0.0023622021544724703
Episode Reward: 13.0
Step 848 (6171278) @ Episode 8002/10000, loss: 0.0075707342475652695
Episode Reward: 16.0
Step 817 (6172095) @ Episode 8003/10000, loss: 0.0035010357387363915
Episode Reward: 11.0
Step 802 (6172897) @ Episode 8004/10000, loss: 0.0024077603593468666
Episode Reward: 11.0
Step 1120 (6174017) @ Episode 8005/10000, loss: 0.0076529602520167835
Episode Reward: 17.0
Step 965 (6174982) @ Episode 8006/10000, loss: 0.0442792810499668185
Episode Reward: 19.0
Step 800 (6175782) @ Episode 8007/10000, loss: 0.0277003087103366852
Episode Reward: 11.0
Step 761 (6176543) @ Episode 8008/10000, loss: 0.0070260707288980485
Episode Reward: 10.0
Step 491 (6177034) @ Episode 8009/10000, loss: 0.0054056160151958466
Episode Reward: 6.0
Step 746 (6177780) @ Episode 8010/10000, loss: 0.0022906525991857056
Episode R

[2017-11-05 08:35:20,838] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008050.mp4


Step 225 (6209999) @ Episode 8051/10000, loss: 0.0019209617748856544
 Copied model parameters to target network
Step 532 (6210306) @ Episode 8051/10000, loss: 0.0111553696915507322
Episode Reward: 7.0
Step 1135 (6211441) @ Episode 8052/10000, loss: 0.0045624002814292918
Episode Reward: 18.0
Step 706 (6212147) @ Episode 8053/10000, loss: 0.0050193476490676467
Episode Reward: 9.0
Step 582 (6212729) @ Episode 8054/10000, loss: 0.0043258303776383463
Episode Reward: 8.0
Step 962 (6213691) @ Episode 8055/10000, loss: 0.0078104343265295035
Episode Reward: 20.0
Step 796 (6214487) @ Episode 8056/10000, loss: 0.0048897638916969346
Episode Reward: 26.0
Step 866 (6215353) @ Episode 8057/10000, loss: 0.0080569889396429067
Episode Reward: 17.0
Step 787 (6216140) @ Episode 8058/10000, loss: 0.0391631647944450427
Episode Reward: 12.0
Step 656 (6216796) @ Episode 8059/10000, loss: 0.0071643684059381485
Episode Reward: 10.0
Step 1035 (6217831) @ Episode 8060/10000, loss: 0.0040986626408994255
Episode Re

[2017-11-05 08:42:04,131] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008100.mp4


Step 860 (6253312) @ Episode 8101/10000, loss: 0.0052424804307520395
Episode Reward: 12.0
Step 583 (6253895) @ Episode 8102/10000, loss: 0.0047844676300883293
Episode Reward: 12.0
Step 763 (6254658) @ Episode 8103/10000, loss: 0.0026594770606607243
Episode Reward: 14.0
Step 1047 (6255705) @ Episode 8104/10000, loss: 0.0041062384843826293
Episode Reward: 18.0
Step 781 (6256486) @ Episode 8105/10000, loss: 0.0078311329707503326
Episode Reward: 11.0
Step 1264 (6257750) @ Episode 8106/10000, loss: 0.0044493596069514756
Episode Reward: 25.0
Step 935 (6258685) @ Episode 8107/10000, loss: 0.0067126899957656866
Episode Reward: 17.0
Step 1034 (6259719) @ Episode 8108/10000, loss: 0.0023985386360436681
Episode Reward: 17.0
Step 280 (6259999) @ Episode 8109/10000, loss: 0.0039563239552080633
 Copied model parameters to target network
Step 443 (6260162) @ Episode 8109/10000, loss: 0.0069935745559632787
Episode Reward: 7.0
Step 630 (6260792) @ Episode 8110/10000, loss: 0.0055156713351607325
Episode

[2017-11-05 08:48:40,656] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008150.mp4


Step 910 (6295317) @ Episode 8151/10000, loss: 0.0073548643849790194
Episode Reward: 18.0
Step 815 (6296132) @ Episode 8152/10000, loss: 0.0021416035015136003
Episode Reward: 12.0
Step 848 (6296980) @ Episode 8153/10000, loss: 0.0079712867736816437
Episode Reward: 15.0
Step 661 (6297641) @ Episode 8154/10000, loss: 0.0019042438361793756
Episode Reward: 10.0
Step 1134 (6298775) @ Episode 8155/10000, loss: 0.0108580328524112783
Episode Reward: 27.0
Step 1170 (6299945) @ Episode 8156/10000, loss: 0.0020515059586614373
Episode Reward: 26.0
Step 54 (6299999) @ Episode 8157/10000, loss: 0.0054547684267163284
 Copied model parameters to target network
Step 875 (6300820) @ Episode 8157/10000, loss: 0.0016975096659734845
Episode Reward: 18.0
Step 836 (6301656) @ Episode 8158/10000, loss: 0.0058202715590596234
Episode Reward: 14.0
Step 561 (6302217) @ Episode 8159/10000, loss: 0.0017203465104103088
Episode Reward: 7.0
Step 934 (6303151) @ Episode 8160/10000, loss: 0.0027443449944257736
Episode R

[2017-11-05 08:55:12,546] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008200.mp4


Step 934 (6337208) @ Episode 8201/10000, loss: 0.0034996806643903255
Episode Reward: 20.0
Step 1136 (6338344) @ Episode 8202/10000, loss: 0.0053048417903482918
Episode Reward: 23.0
Step 909 (6339253) @ Episode 8203/10000, loss: 0.0086394976824522027
Episode Reward: 19.0
Step 705 (6339958) @ Episode 8204/10000, loss: 0.0030094212852418423
Episode Reward: 11.0
Step 41 (6339999) @ Episode 8205/10000, loss: 0.0037744201254099607
 Copied model parameters to target network
Step 643 (6340601) @ Episode 8205/10000, loss: 0.0052365954034030446
Episode Reward: 11.0
Step 773 (6341374) @ Episode 8206/10000, loss: 0.0444676317274570474
Episode Reward: 11.0
Step 580 (6341954) @ Episode 8207/10000, loss: 0.0083939014002680786
Episode Reward: 9.0
Step 685 (6342639) @ Episode 8208/10000, loss: 0.0019976529292762284
Episode Reward: 10.0
Step 648 (6343287) @ Episode 8209/10000, loss: 0.0027920512948185205
Episode Reward: 10.0
Step 456 (6343743) @ Episode 8210/10000, loss: 0.0021038164850324392
Episode Re

[2017-11-05 09:01:26,901] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008250.mp4


Step 834 (6376729) @ Episode 8251/10000, loss: 0.0128618646413087843
Episode Reward: 14.0
Step 889 (6377618) @ Episode 8252/10000, loss: 0.0045402529649436473
Episode Reward: 14.0
Step 842 (6378460) @ Episode 8253/10000, loss: 0.0050842976197600365
Episode Reward: 18.0
Step 737 (6379197) @ Episode 8254/10000, loss: 0.0030984154436737335
Episode Reward: 14.0
Step 593 (6379790) @ Episode 8255/10000, loss: 0.0051497453823685656
Episode Reward: 13.0
Step 209 (6379999) @ Episode 8256/10000, loss: 0.0040699057281017393
 Copied model parameters to target network
Step 881 (6380671) @ Episode 8256/10000, loss: 0.0026703849434852687
Episode Reward: 22.0
Step 641 (6381312) @ Episode 8257/10000, loss: 0.0073895249515771874
Episode Reward: 11.0
Step 944 (6382256) @ Episode 8258/10000, loss: 0.0044690105132758622
Episode Reward: 20.0
Step 478 (6382734) @ Episode 8259/10000, loss: 0.0076592275872826587
Episode Reward: 7.0
Step 1172 (6383906) @ Episode 8260/10000, loss: 0.0117784468457102784
Episode R

[2017-11-05 09:08:21,990] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008300.mp4


Step 1117 (6421662) @ Episode 8301/10000, loss: 0.0045604407787323045
Episode Reward: 21.0
Step 733 (6422395) @ Episode 8302/10000, loss: 0.0018018826376646757
Episode Reward: 11.0
Step 770 (6423165) @ Episode 8303/10000, loss: 0.0017766456585377455
Episode Reward: 15.0
Step 640 (6423805) @ Episode 8304/10000, loss: 0.0147628551349043853
Episode Reward: 9.0
Step 791 (6424596) @ Episode 8305/10000, loss: 0.0076724067330360413
Episode Reward: 16.0
Step 1551 (6426147) @ Episode 8306/10000, loss: 0.0030287809204310185
Episode Reward: 34.0
Step 775 (6426922) @ Episode 8307/10000, loss: 0.0044774869456887245
Episode Reward: 12.0
Step 852 (6427774) @ Episode 8308/10000, loss: 0.0073535898700356485
Episode Reward: 13.0
Step 914 (6428688) @ Episode 8309/10000, loss: 0.0091256862506270447
Episode Reward: 14.0
Step 815 (6429503) @ Episode 8310/10000, loss: 0.0031372611410915853
Episode Reward: 15.0
Step 496 (6429999) @ Episode 8311/10000, loss: 0.0033933129161596394
 Copied model parameters to ta

[2017-11-05 09:15:03,432] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008350.mp4


Step 923 (6464259) @ Episode 8351/10000, loss: 0.0066905752755701548
Episode Reward: 14.0
Step 638 (6464897) @ Episode 8352/10000, loss: 0.0039010774344205856
Episode Reward: 18.0
Step 777 (6465674) @ Episode 8353/10000, loss: 0.0025072975549846888
Episode Reward: 12.0
Step 805 (6466479) @ Episode 8354/10000, loss: 0.0050370828248560435
Episode Reward: 14.0
Step 638 (6467117) @ Episode 8355/10000, loss: 0.0026954400818794966
Episode Reward: 10.0
Step 1027 (6468144) @ Episode 8356/10000, loss: 0.0018772617913782597
Episode Reward: 20.0
Step 883 (6469027) @ Episode 8357/10000, loss: 0.0395727492868900394
Episode Reward: 15.0
Step 972 (6469999) @ Episode 8358/10000, loss: 0.0059431903064250955
 Copied model parameters to target network
Step 992 (6470019) @ Episode 8358/10000, loss: 0.0071728252805769446
Episode Reward: 24.0
Step 754 (6470773) @ Episode 8359/10000, loss: 0.0340932123363018046
Episode Reward: 12.0
Step 912 (6471685) @ Episode 8360/10000, loss: 0.0031431084498763084
Episode 

[2017-11-05 09:22:12,912] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008400.mp4


Step 666 (6509768) @ Episode 8401/10000, loss: 0.0073794159106910235
Episode Reward: 12.0
Step 231 (6509999) @ Episode 8402/10000, loss: 0.0028963922522962093
 Copied model parameters to target network
Step 630 (6510398) @ Episode 8402/10000, loss: 0.0014938854146748781
Episode Reward: 9.0
Step 1231 (6511629) @ Episode 8403/10000, loss: 0.0039234384894371035
Episode Reward: 26.0
Step 776 (6512405) @ Episode 8404/10000, loss: 0.0043806242756545543
Episode Reward: 16.0
Step 962 (6513367) @ Episode 8405/10000, loss: 0.0032904986292123795
Episode Reward: 17.0
Step 967 (6514334) @ Episode 8406/10000, loss: 0.0033162857871502646
Episode Reward: 20.0
Step 1202 (6515536) @ Episode 8407/10000, loss: 0.0015840985579416156
Episode Reward: 22.0
Step 917 (6516453) @ Episode 8408/10000, loss: 0.0031903812196105726
Episode Reward: 14.0
Step 742 (6517195) @ Episode 8409/10000, loss: 0.0021319836378097534
Episode Reward: 11.0
Step 1007 (6518202) @ Episode 8410/10000, loss: 0.0054600196890532975
Episode

[2017-11-05 09:29:10,937] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008450.mp4


Step 797 (6554634) @ Episode 8451/10000, loss: 0.0034703738056123257
Episode Reward: 12.0
Step 1082 (6555716) @ Episode 8452/10000, loss: 0.0025338458362966776
Episode Reward: 28.0
Step 783 (6556499) @ Episode 8453/10000, loss: 0.0040515232831239765
Episode Reward: 13.0
Step 813 (6557312) @ Episode 8454/10000, loss: 0.0062633613124489787
Episode Reward: 21.0
Step 894 (6558206) @ Episode 8455/10000, loss: 0.0048744129016995437
Episode Reward: 17.0
Step 762 (6558968) @ Episode 8456/10000, loss: 0.0381305925548076645
Episode Reward: 16.0
Step 1031 (6559999) @ Episode 8457/10000, loss: 0.0030575008131563663
 Copied model parameters to target network
Step 1144 (6560112) @ Episode 8457/10000, loss: 0.0232162252068519695
Episode Reward: 24.0
Step 848 (6560960) @ Episode 8458/10000, loss: 0.0022613196633756167
Episode Reward: 19.0
Step 912 (6561872) @ Episode 8459/10000, loss: 0.0034331576898694045
Episode Reward: 15.0
Step 887 (6562759) @ Episode 8460/10000, loss: 0.0057905400171875955
Episod

[2017-11-05 09:35:32,115] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008500.mp4


Step 732 (6594951) @ Episode 8501/10000, loss: 0.0031373538076877594
Episode Reward: 11.0
Step 1144 (6596095) @ Episode 8502/10000, loss: 0.0025509889237582684
Episode Reward: 24.0
Step 471 (6596566) @ Episode 8503/10000, loss: 0.0027292380109429365
Episode Reward: 6.0
Step 653 (6597219) @ Episode 8504/10000, loss: 0.0021360213868319996
Episode Reward: 10.0
Step 746 (6597965) @ Episode 8505/10000, loss: 0.0026479812804609537
Episode Reward: 11.0
Step 598 (6598563) @ Episode 8506/10000, loss: 0.0038890657015144825
Episode Reward: 8.0
Step 912 (6599475) @ Episode 8507/10000, loss: 0.0953869894146919326
Episode Reward: 15.0
Step 524 (6599999) @ Episode 8508/10000, loss: 0.0040047909133136277
 Copied model parameters to target network
Step 935 (6600410) @ Episode 8508/10000, loss: 0.0037663802504539496
Episode Reward: 16.0
Step 630 (6601040) @ Episode 8509/10000, loss: 0.0022881920449435715
Episode Reward: 11.0
Step 642 (6601682) @ Episode 8510/10000, loss: 0.0123326051980257036
Episode Re

[2017-11-05 09:41:56,468] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008550.mp4


Step 857 (6635496) @ Episode 8551/10000, loss: 0.0250106323510408485
Episode Reward: 22.0
Step 517 (6636013) @ Episode 8552/10000, loss: 0.0026669565122574568
Episode Reward: 8.0
Step 960 (6636973) @ Episode 8553/10000, loss: 0.0133950654417276382
Episode Reward: 19.0
Step 664 (6637637) @ Episode 8554/10000, loss: 0.0335354469716548914
Episode Reward: 10.0
Step 549 (6638186) @ Episode 8555/10000, loss: 0.0122375702485442166
Episode Reward: 8.0
Step 949 (6639135) @ Episode 8556/10000, loss: 0.0097055006772279746
Episode Reward: 19.0
Step 864 (6639999) @ Episode 8557/10000, loss: 0.0071316547691822056
 Copied model parameters to target network
Step 937 (6640072) @ Episode 8557/10000, loss: 0.0048715127632021975
Episode Reward: 19.0
Step 1133 (6641205) @ Episode 8558/10000, loss: 0.0042739850468933584
Episode Reward: 33.0
Step 813 (6642018) @ Episode 8559/10000, loss: 0.0036263684742152695
Episode Reward: 12.0
Step 666 (6642684) @ Episode 8560/10000, loss: 0.0038334566634148365
Episode Re

[2017-11-05 09:48:23,601] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008600.mp4


Step 951 (6676598) @ Episode 8601/10000, loss: 0.0035199425183236643
Episode Reward: 15.0
Step 699 (6677297) @ Episode 8602/10000, loss: 0.0050020609050989154
Episode Reward: 14.0
Step 866 (6678163) @ Episode 8603/10000, loss: 0.0062651205807924273
Episode Reward: 15.0
Step 943 (6679106) @ Episode 8604/10000, loss: 0.0051416032947599895
Episode Reward: 15.0
Step 893 (6679999) @ Episode 8605/10000, loss: 0.0115736899897456172
 Copied model parameters to target network
Step 1062 (6680168) @ Episode 8605/10000, loss: 0.0031417279969900846
Episode Reward: 22.0
Step 1088 (6681256) @ Episode 8606/10000, loss: 0.0066554653458297257
Episode Reward: 17.0
Step 875 (6682131) @ Episode 8607/10000, loss: 0.0037984582595527175
Episode Reward: 13.0
Step 544 (6682675) @ Episode 8608/10000, loss: 0.0056136325001716616
Episode Reward: 7.0
Step 721 (6683396) @ Episode 8609/10000, loss: 0.0033213144633919514
Episode Reward: 11.0
Step 603 (6683999) @ Episode 8610/10000, loss: 0.0086193578317761425
Episode 

[2017-11-05 09:54:55,773] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008650.mp4


Step 775 (6718013) @ Episode 8651/10000, loss: 0.0688816234469413805
Episode Reward: 13.0
Step 1101 (6719114) @ Episode 8652/10000, loss: 0.0077992146834731175
Episode Reward: 18.0
Step 733 (6719847) @ Episode 8653/10000, loss: 0.0032043880783021453
Episode Reward: 11.0
Step 152 (6719999) @ Episode 8654/10000, loss: 0.0031908154487609863
 Copied model parameters to target network
Step 1121 (6720968) @ Episode 8654/10000, loss: 0.0075191175565123563
Episode Reward: 29.0
Step 839 (6721807) @ Episode 8655/10000, loss: 0.0099180135875940325
Episode Reward: 14.0
Step 929 (6722736) @ Episode 8656/10000, loss: 0.0044146319851279266
Episode Reward: 15.0
Step 939 (6723675) @ Episode 8657/10000, loss: 0.0078165009617805485
Episode Reward: 20.0
Step 833 (6724508) @ Episode 8658/10000, loss: 0.0109657496213912965
Episode Reward: 15.0
Step 529 (6725037) @ Episode 8659/10000, loss: 0.0037170327268540866
Episode Reward: 8.0
Step 900 (6725937) @ Episode 8660/10000, loss: 0.0112964212894439742
Episode 

[2017-11-05 10:01:13,388] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008700.mp4


Step 311 (6757647) @ Episode 8701/10000, loss: 0.0033066580072045326
Episode Reward: 3.0
Step 862 (6758509) @ Episode 8702/10000, loss: 0.0464316271245479686
Episode Reward: 14.0
Step 956 (6759465) @ Episode 8703/10000, loss: 0.0062826913781464134
Episode Reward: 16.0
Step 534 (6759999) @ Episode 8704/10000, loss: 0.0023920228704810143
 Copied model parameters to target network
Step 707 (6760172) @ Episode 8704/10000, loss: 0.0070788208395242695
Episode Reward: 15.0
Step 1066 (6761238) @ Episode 8705/10000, loss: 0.0028316820971667767
Episode Reward: 18.0
Step 605 (6761843) @ Episode 8706/10000, loss: 0.0021459180861711556
Episode Reward: 10.0
Step 736 (6762579) @ Episode 8707/10000, loss: 0.0074316360987722874
Episode Reward: 11.0
Step 841 (6763420) @ Episode 8708/10000, loss: 0.0082784481346607276
Episode Reward: 14.0
Step 457 (6763877) @ Episode 8709/10000, loss: 0.0099923685193061833
Episode Reward: 5.0
Step 686 (6764563) @ Episode 8710/10000, loss: 0.0401785783469677435
Episode Re

[2017-11-05 10:07:59,706] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008750.mp4


Step 704 (6800812) @ Episode 8751/10000, loss: 0.0040105152875185015
Episode Reward: 12.0
Step 1224 (6802036) @ Episode 8752/10000, loss: 0.0030635572038590913
Episode Reward: 26.0
Step 677 (6802713) @ Episode 8753/10000, loss: 0.0062320576980710035
Episode Reward: 10.0
Step 1194 (6803907) @ Episode 8754/10000, loss: 0.0026095041539520025
Episode Reward: 28.0
Step 549 (6804456) @ Episode 8755/10000, loss: 0.0032550951000303032
Episode Reward: 7.0
Step 922 (6805378) @ Episode 8756/10000, loss: 0.0210239794105291376
Episode Reward: 14.0
Step 845 (6806223) @ Episode 8757/10000, loss: 0.0076720486395061027
Episode Reward: 17.0
Step 632 (6806855) @ Episode 8758/10000, loss: 0.0037727812305092817
Episode Reward: 10.0
Step 1174 (6808029) @ Episode 8759/10000, loss: 0.0059339171275496484
Episode Reward: 25.0
Step 929 (6808958) @ Episode 8760/10000, loss: 0.0018465227913111448
Episode Reward: 14.0
Step 678 (6809636) @ Episode 8761/10000, loss: 0.0062027294188737875
Episode Reward: 8.0
Step 363 

[2017-11-05 10:14:33,278] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008800.mp4


Step 691 (6842343) @ Episode 8801/10000, loss: 0.0037407034542411566
Episode Reward: 11.0
Step 854 (6843197) @ Episode 8802/10000, loss: 0.0059966035187244415
Episode Reward: 14.0
Step 541 (6843738) @ Episode 8803/10000, loss: 0.0057846056297421455
Episode Reward: 8.0
Step 1294 (6845032) @ Episode 8804/10000, loss: 0.0047765150666236887
Episode Reward: 24.0
Step 1200 (6846232) @ Episode 8805/10000, loss: 0.0060789650306105614
Episode Reward: 20.0
Step 665 (6846897) @ Episode 8806/10000, loss: 0.0096635008230805415
Episode Reward: 10.0
Step 939 (6847836) @ Episode 8807/10000, loss: 0.0090805171057581965
Episode Reward: 15.0
Step 744 (6848580) @ Episode 8808/10000, loss: 0.0048956647515296946
Episode Reward: 11.0
Step 767 (6849347) @ Episode 8809/10000, loss: 0.0047883391380310065
Episode Reward: 15.0
Step 652 (6849999) @ Episode 8810/10000, loss: 0.0131097547709941867
 Copied model parameters to target network
Step 758 (6850105) @ Episode 8810/10000, loss: 0.0054589710198342855
Episode 

[2017-11-05 10:21:01,174] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008850.mp4


Step 580 (6882738) @ Episode 8851/10000, loss: 0.0036378807853907347
Episode Reward: 9.0
Step 1113 (6883851) @ Episode 8852/10000, loss: 0.0030317546334117655
Episode Reward: 25.0
Step 1012 (6884863) @ Episode 8853/10000, loss: 0.0059122331440448765
Episode Reward: 21.0
Step 1212 (6886075) @ Episode 8854/10000, loss: 0.0069749541580677035
Episode Reward: 19.0
Step 681 (6886756) @ Episode 8855/10000, loss: 0.0095963124185800558
Episode Reward: 9.0
Step 808 (6887564) @ Episode 8856/10000, loss: 0.0044650896452367317
Episode Reward: 11.0
Step 1023 (6888587) @ Episode 8857/10000, loss: 0.0040461653843522073
Episode Reward: 17.0
Step 869 (6889456) @ Episode 8858/10000, loss: 0.0019844858907163143
Episode Reward: 15.0
Step 543 (6889999) @ Episode 8859/10000, loss: 0.0038031945005059242
 Copied model parameters to target network
Step 970 (6890426) @ Episode 8859/10000, loss: 0.0025201193057000637
Episode Reward: 24.0
Step 699 (6891125) @ Episode 8860/10000, loss: 0.0048189950175583367
Episode

[2017-11-05 10:27:34,721] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008900.mp4


Step 371 (6924165) @ Episode 8901/10000, loss: 0.0099784471094608347
Episode Reward: 4.0
Step 681 (6924846) @ Episode 8902/10000, loss: 0.0021880231797695166
Episode Reward: 11.0
Step 613 (6925459) @ Episode 8903/10000, loss: 0.0029902525711804638
Episode Reward: 9.0
Step 991 (6926450) @ Episode 8904/10000, loss: 0.0017112388741225004
Episode Reward: 21.0
Step 848 (6927298) @ Episode 8905/10000, loss: 0.0062711113132536414
Episode Reward: 15.0
Step 942 (6928240) @ Episode 8906/10000, loss: 0.0022221165709197526
Episode Reward: 17.0
Step 879 (6929119) @ Episode 8907/10000, loss: 0.0052445381879806523
Episode Reward: 17.0
Step 558 (6929677) @ Episode 8908/10000, loss: 0.0058629140257835395
Episode Reward: 7.0
Step 322 (6929999) @ Episode 8909/10000, loss: 0.0034281481057405475
 Copied model parameters to target network
Step 593 (6930270) @ Episode 8909/10000, loss: 0.0039901277050375944
Episode Reward: 8.0
Step 831 (6931101) @ Episode 8910/10000, loss: 0.0022007275838404894
Episode Rewar

[2017-11-05 10:33:43,392] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video008950.mp4


Step 1010 (6963423) @ Episode 8951/10000, loss: 0.0032652635127305984
Episode Reward: 21.0
Step 1027 (6964450) @ Episode 8952/10000, loss: 0.0045926054008305076
Episode Reward: 20.0
Step 786 (6965236) @ Episode 8953/10000, loss: 0.0033386498689651494
Episode Reward: 16.0
Step 590 (6965826) @ Episode 8954/10000, loss: 0.0083384327590465556
Episode Reward: 10.0
Step 611 (6966437) @ Episode 8955/10000, loss: 0.0192292630672454835
Episode Reward: 6.0
Step 672 (6967109) @ Episode 8956/10000, loss: 0.0017973716603592038
Episode Reward: 10.0
Step 1030 (6968139) @ Episode 8957/10000, loss: 0.0045876335352659225
Episode Reward: 30.0
Step 899 (6969038) @ Episode 8958/10000, loss: 0.0062123807147145275
Episode Reward: 18.0
Step 952 (6969990) @ Episode 8959/10000, loss: 0.0034697677474468946
Episode Reward: 17.0
Step 9 (6969999) @ Episode 8960/10000, loss: 0.0057066129520535474
 Copied model parameters to target network
Step 959 (6970949) @ Episode 8960/10000, loss: 0.0079785864800214777
Episode R

[2017-11-05 10:39:49,698] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video009000.mp4


Step 792 (7001673) @ Episode 9001/10000, loss: 0.0014591910876333714
Episode Reward: 13.0
Step 775 (7002448) @ Episode 9002/10000, loss: 0.0020706148352473974
Episode Reward: 9.0
Step 861 (7003309) @ Episode 9003/10000, loss: 0.0056273378431797035
Episode Reward: 12.0
Step 737 (7004046) @ Episode 9004/10000, loss: 0.0091101173311471945
Episode Reward: 12.0
Step 851 (7004897) @ Episode 9005/10000, loss: 0.0026165833696722984
Episode Reward: 12.0
Step 675 (7005572) @ Episode 9006/10000, loss: 0.0023312612902373075
Episode Reward: 8.0
Step 771 (7006343) @ Episode 9007/10000, loss: 0.0046516633592545993
Episode Reward: 10.0
Step 708 (7007051) @ Episode 9008/10000, loss: 0.0027314904145896435
Episode Reward: 9.0
Step 793 (7007844) @ Episode 9009/10000, loss: 0.0052925748750567445
Episode Reward: 12.0
Step 751 (7008595) @ Episode 9010/10000, loss: 0.0018517841817811131
Episode Reward: 11.0
Step 628 (7009223) @ Episode 9011/10000, loss: 0.0090574342757463463
Episode Reward: 12.0
Step 515 (700

[2017-11-05 10:45:55,762] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video009050.mp4


Step 540 (7039757) @ Episode 9051/10000, loss: 0.0038408231921494007
Episode Reward: 7.0
Step 242 (7039999) @ Episode 9052/10000, loss: 0.0028620916418731213
 Copied model parameters to target network
Step 767 (7040524) @ Episode 9052/10000, loss: 0.0028262482956051826
Episode Reward: 19.0
Step 659 (7041183) @ Episode 9053/10000, loss: 0.0017732643755152822
Episode Reward: 10.0
Step 861 (7042044) @ Episode 9054/10000, loss: 0.0050445380620658446
Episode Reward: 11.0
Step 648 (7042692) @ Episode 9055/10000, loss: 0.0036368980072438717
Episode Reward: 9.0
Step 949 (7043641) @ Episode 9056/10000, loss: 0.0038218735717236996
Episode Reward: 19.0
Step 608 (7044249) @ Episode 9057/10000, loss: 0.0032880427315831184
Episode Reward: 8.0
Step 682 (7044931) @ Episode 9058/10000, loss: 0.0043562981300055986
Episode Reward: 11.0
Step 475 (7045406) @ Episode 9059/10000, loss: 0.0026437831111252316
Episode Reward: 6.0
Step 1091 (7046497) @ Episode 9060/10000, loss: 0.0023915821220725775
Episode Rewa

[2017-11-05 10:51:51,768] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video009100.mp4


Step 592 (7077093) @ Episode 9101/10000, loss: 0.0028978395275771626
Episode Reward: 10.0
Step 402 (7077495) @ Episode 9102/10000, loss: 0.0034391803201287985
Episode Reward: 8.0
Step 724 (7078219) @ Episode 9103/10000, loss: 0.0045364778488874435
Episode Reward: 12.0
Step 776 (7078995) @ Episode 9104/10000, loss: 0.0075981570407748226
Episode Reward: 12.0
Step 607 (7079602) @ Episode 9105/10000, loss: 0.0012562009505927563
Episode Reward: 11.0
Step 397 (7079999) @ Episode 9106/10000, loss: 0.0036030332557857037
 Copied model parameters to target network
Step 780 (7080382) @ Episode 9106/10000, loss: 0.0072436323389410978
Episode Reward: 13.0
Step 884 (7081266) @ Episode 9107/10000, loss: 0.0126529969274997717
Episode Reward: 22.0
Step 1258 (7082524) @ Episode 9108/10000, loss: 0.0018170236144214869
Episode Reward: 27.0
Step 1024 (7083548) @ Episode 9109/10000, loss: 0.0026574505027383566
Episode Reward: 28.0
Step 748 (7084296) @ Episode 9110/10000, loss: 0.0008400479564443231
Episode 

[2017-11-05 10:58:02,400] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video009150.mp4


Step 947 (7116381) @ Episode 9151/10000, loss: 0.0040894076228141785
Episode Reward: 22.0
Step 728 (7117109) @ Episode 9152/10000, loss: 0.0048096673563122756
Episode Reward: 10.0
Step 1119 (7118228) @ Episode 9153/10000, loss: 0.0046961307525634766
Episode Reward: 24.0
Step 938 (7119166) @ Episode 9154/10000, loss: 0.0066555729135870933
Episode Reward: 16.0
Step 496 (7119662) @ Episode 9155/10000, loss: 0.0034707365557551384
Episode Reward: 8.0
Step 337 (7119999) @ Episode 9156/10000, loss: 0.0036760871298611164
 Copied model parameters to target network
Step 984 (7120646) @ Episode 9156/10000, loss: 0.0036979170981794596
Episode Reward: 19.0
Step 958 (7121604) @ Episode 9157/10000, loss: 0.0055660922080278453
Episode Reward: 20.0
Step 804 (7122408) @ Episode 9158/10000, loss: 0.0054816962219774728
Episode Reward: 13.0
Step 745 (7123153) @ Episode 9159/10000, loss: 0.0177128762006759643
Episode Reward: 11.0
Step 684 (7123837) @ Episode 9160/10000, loss: 0.0043398630805313595
Episode R

[2017-11-05 11:04:46,972] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video009200.mp4


Step 601 (7158909) @ Episode 9201/10000, loss: 0.0073744547553360465
Episode Reward: 9.0
Step 984 (7159893) @ Episode 9202/10000, loss: 0.1674875319004058846
Episode Reward: 17.0
Step 106 (7159999) @ Episode 9203/10000, loss: 0.0055146394297480588
 Copied model parameters to target network
Step 521 (7160414) @ Episode 9203/10000, loss: 0.0039751157164573673
Episode Reward: 7.0
Step 1038 (7161452) @ Episode 9204/10000, loss: 0.0081593198701739315
Episode Reward: 18.0
Step 604 (7162056) @ Episode 9205/10000, loss: 0.0044246576726436615
Episode Reward: 9.0
Step 642 (7162698) @ Episode 9206/10000, loss: 0.0096474802121520044
Episode Reward: 10.0
Step 607 (7163305) @ Episode 9207/10000, loss: 0.0051629664376378062
Episode Reward: 15.0
Step 1087 (7164392) @ Episode 9208/10000, loss: 0.0075859930366277695
Episode Reward: 22.0
Step 733 (7165125) @ Episode 9209/10000, loss: 0.0047555663622915745
Episode Reward: 11.0
Step 939 (7166064) @ Episode 9210/10000, loss: 0.0184939932078123142
Episode Re

[2017-11-05 11:11:08,808] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video009250.mp4


Step 741 (7199219) @ Episode 9251/10000, loss: 0.0055038174614310265
Episode Reward: 11.0
Step 780 (7199999) @ Episode 9252/10000, loss: 0.0027440637350082397
 Copied model parameters to target network
Step 944 (7200163) @ Episode 9252/10000, loss: 0.0022937580943107605
Episode Reward: 16.0
Step 1009 (7201172) @ Episode 9253/10000, loss: 0.0040626316331326964
Episode Reward: 17.0
Step 834 (7202006) @ Episode 9254/10000, loss: 0.0048664794303476813
Episode Reward: 14.0
Step 1177 (7203183) @ Episode 9255/10000, loss: 0.0036406465806066997
Episode Reward: 18.0
Step 962 (7204145) @ Episode 9256/10000, loss: 0.0022380249574780464
Episode Reward: 25.0
Step 910 (7205055) @ Episode 9257/10000, loss: 0.0208278745412826545
Episode Reward: 16.0
Step 968 (7206023) @ Episode 9258/10000, loss: 0.0049507152289152145
Episode Reward: 17.0
Step 806 (7206829) @ Episode 9259/10000, loss: 0.0023332904092967515
Episode Reward: 21.0
Step 633 (7207462) @ Episode 9260/10000, loss: 0.0045548714697360998
Episode

[2017-11-05 11:17:36,819] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video009300.mp4


Step 582 (7239999) @ Episode 9301/10000, loss: 0.0040617701597511774
 Copied model parameters to target network
Step 1383 (7240800) @ Episode 9301/10000, loss: 0.0018274032045155764
Episode Reward: 28.0
Step 914 (7241714) @ Episode 9302/10000, loss: 0.0056873476132750518
Episode Reward: 15.0
Step 1204 (7242918) @ Episode 9303/10000, loss: 0.0039437939412891865
Episode Reward: 32.0
Step 948 (7243866) @ Episode 9304/10000, loss: 0.0150764565914869376
Episode Reward: 17.0
Step 750 (7244616) @ Episode 9305/10000, loss: 0.0054957969114184385
Episode Reward: 11.0
Step 979 (7245595) @ Episode 9306/10000, loss: 0.0036785430274903774
Episode Reward: 23.0
Step 967 (7246562) @ Episode 9307/10000, loss: 0.0060701207257807255
Episode Reward: 13.0
Step 938 (7247500) @ Episode 9308/10000, loss: 0.0283583607524633472
Episode Reward: 15.0
Step 867 (7248367) @ Episode 9309/10000, loss: 0.0045471293851733214
Episode Reward: 17.0
Step 760 (7249127) @ Episode 9310/10000, loss: 0.0084251984953880313
Episode

[2017-11-05 11:24:35,113] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video009350.mp4


Step 750 (7284353) @ Episode 9351/10000, loss: 0.0020066751167178154
Episode Reward: 17.0
Step 783 (7285136) @ Episode 9352/10000, loss: 0.0036129143554717302
Episode Reward: 11.0
Step 850 (7285986) @ Episode 9353/10000, loss: 0.0051187630742788315
Episode Reward: 18.0
Step 986 (7286972) @ Episode 9354/10000, loss: 0.0047786510549485684
Episode Reward: 16.0
Step 639 (7287611) @ Episode 9355/10000, loss: 0.0051736366003751755
Episode Reward: 9.0
Step 920 (7288531) @ Episode 9356/10000, loss: 0.0044082533568143845
Episode Reward: 21.0
Step 583 (7289114) @ Episode 9357/10000, loss: 0.0093538602814078335
Episode Reward: 8.0
Step 696 (7289810) @ Episode 9358/10000, loss: 0.0120202787220478067
Episode Reward: 15.0
Step 189 (7289999) @ Episode 9359/10000, loss: 0.0030981868039816626
 Copied model parameters to target network
Step 957 (7290767) @ Episode 9359/10000, loss: 0.0086089754477143297
Episode Reward: 24.0
Step 593 (7291360) @ Episode 9360/10000, loss: 0.0042590429075062275
Episode Rew

[2017-11-05 11:30:54,277] Starting new video recorder writing to /home/mark/projects/reinforcement-learning/experiments/Breakout-v0/monitor/openaigym.video.0.14354.video009400.mp4


Step 721 (7324391) @ Episode 9401/10000, loss: 0.0066166231408715254
Episode Reward: 10.0
Step 1381 (7325772) @ Episode 9402/10000, loss: 0.0023492448963224894
Episode Reward: 29.0
Step 661 (7326433) @ Episode 9403/10000, loss: 0.0066148564219474798
Episode Reward: 10.0
Step 743 (7327176) @ Episode 9404/10000, loss: 0.0023949537426233298
Episode Reward: 12.0
Step 857 (7328033) @ Episode 9405/10000, loss: 0.0084447320550680165
Episode Reward: 17.0
Step 676 (7328709) @ Episode 9406/10000, loss: 0.0095829935744404834
Episode Reward: 10.0
Step 820 (7329529) @ Episode 9407/10000, loss: 0.0048130019567906866
Episode Reward: 13.0
Step 470 (7329999) @ Episode 9408/10000, loss: 0.0065483115613460545
 Copied model parameters to target network
Step 982 (7330511) @ Episode 9408/10000, loss: 0.0022894281428307295
Episode Reward: 16.0
Step 796 (7331307) @ Episode 9409/10000, loss: 0.0481149181723594726
Episode Reward: 9.0
Step 981 (7332288) @ Episode 9410/10000, loss: 0.0054785697720944881
Episode R