In [1]:
import gym
import itertools
import os
import random
import sys
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

if "../" not in sys.path:
  sys.path.append("../")

from lib import plotting
from collections import deque, namedtuple

%matplotlib inline

In [2]:
env = gym.envs.make("Breakout-v0")

[2017-03-02 21:16:57,349] Making new env: Breakout-v0


In [3]:
# Atari actions" 0 (no op), 1 (fire), 2 (left), 3 (right)
VALID_ACTIONS = [0, 1, 2, 3]

In [4]:
print(len(VALID_ACTIONS))

4


In [5]:
class StateProcessor(object):
    """A class to process an atari image for input into the nn"""
    def __init__(self):
        # Build the Tensorflow graph.
        with tf.variable_scope('state_processor'):
            # Input is a 210 x 160 x 3 array describing the screen
            self.input_state = tf.placeholder(shape=[210, 160, 3], dtype=tf.uint8)
            # Transform to grayscale
            self.output = tf.image.rgb_to_grayscale(self.input_state)
            # Crop top and bottom of image to make it square
            self.output = tf.image.crop_to_bounding_box(self.output,
                                                        34, 0, 160, 160)
            # resize image to 84 x 84
            self.output = tf.image.resize_images(self.output, [84, 84],
                                               method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            self.output = tf.squeeze(self.output)
    
    def process(self, sess, state):
        """
        process an atari image
        
        Args:
            sess: A TensorFlow session object
            state: A [210, 160, 3] Atari RGB state
            
        Returns:
            A processed [84, 84, 1] state representing grayscale values
        """
        return sess.run(self.output, {self.input_state: state})
    

In [6]:
class Estimator(object):
    """Neural Network to estimate Q-Value
    
    This network is used for both the Q-Network and Target Network
    """
    
    def __init__(self, scope='estimator', summaries_dir=None):
        self.scope = scope
        # Write Tensorboard summaries to disk
        self.summary_writer = None
        with tf.variable_scope(scope):
            # Build the graph
            self._build_model()
            if summaries_dir:
                summary_dir = os.path.join(summaries_dir, "summaries_{}".format(scope))
                if not os.path.exists(summary_dir):
                    os.makedirs(summary_dir)
                self.summary_writer = tf.summary.FileWriter(summary_dir)
    
    def _build_model(self):
        """
        Builds the Tensorflow graph
        """
        
        # Placeholders for input
        # Input are 4 RGB frames of shape 160, 160 each
        self.X_pl = tf.placeholder(shape=[None, 84, 84, 4], 
                                   dtype=tf.uint8,
                                   name='X')
        # TD Target value
        self.y_pl = tf.placeholder(shape=[None], dtype=tf.float32, name='y')
        # Integer if of which action was selected
        self.actions_pl = tf.placeholder(shape=[None],
                                         dtype=tf.int32,
                                         name='actions')
        # Variables
        layer1_weights = tf.Variable(tf.truncated_normal(
                [8, 8, 4, 32], stddev=0.1))
        layer1_biases = tf.Variable(tf.zeros([32]))
        
        layer2_weights = tf.Variable(tf.truncated_normal(
                [4, 4, 32, 64], stddev=0.1))
        layer2_biases = tf.Variable(tf.zeros([64]))
        
        layer3_weights = tf.Variable(tf.truncated_normal(
                [3, 3, 64, 64], stddev=0.1))
        layer3_biases = tf.Variable(tf.zeros([64]))
        
        layer4_weights = tf.Variable(tf.truncated_normal(
                [7*7*64, 512], stddev=0.1))
        layer4_biases = tf.Variable(tf.zeros([512]))
        
        layer5_weights = tf.Variable(tf.truncated_normal(
                [512, len(VALID_ACTIONS)], stddev=0.1))
        layer5_biases = tf.Variable(tf.zeros([len(VALID_ACTIONS)]))
        
        
        X = tf.to_float(self.X_pl) / 255.0
        batch_size = tf.shape(self.X_pl)[0]

        # Three convolutional layers
        conv = tf.nn.conv2d(X, layer1_weights, [1, 4, 4, 1],  padding='VALID', name='conv1')
        relu = tf.nn.relu(conv + layer1_biases, name='relu1')
        conv = tf.nn.conv2d(relu, layer2_weights, [1, 2, 2, 1], padding='VALID', name='conv2')
        relu = tf.nn.relu(conv + layer2_biases, name='relu2')
        conv = tf.nn.conv2d(relu, layer3_weights, [1, 1, 1, 1], padding='VALID', name='conv3')
        relu = tf.nn.relu(conv + layer3_biases, name='relu3')
        shape = relu.get_shape().as_list()
        
        reshape = tf.reshape(relu, [-1, shape[1]*shape[2]*shape[3]])
        fc1 = tf.matmul(reshape, layer4_weights) + layer4_biases
        self.predictions = tf.matmul(fc1, layer5_weights) + layer5_biases
        
        # Get the predictions for the chosen actions only
        gather_indices = tf.range(batch_size) * tf.shape(self.predictions)[1]
        self.action_predictions = tf.gather(tf.reshape(self.predictions, [-1]), gather_indices)
        
        # Loss
        self.losses = tf.squared_difference(self.y_pl, self.action_predictions)
        self.loss = tf.reduce_mean(self.losses)
        
        # Optimizer Parameters from original paper
        self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6)
        self.train_op = self.optimizer.minimize(self.loss, 
                                                global_step=tf.contrib.framework.get_global_step())
        
        # Summaries for Tensorboard
        self.summaries = tf.summary.merge([
                tf.summary.scalar('loss', self.loss),
                tf.summary.histogram('loss_hist', self.losses),
                tf.summary.histogram('q_values_hist', self.predictions),
                tf.summary.scalar('max_q_value', tf.reduce_max(self.predictions))
            ])
        
    def predict(self, sess, s):
        """predictions action values.
        
        Args:
            sess: Tensorflow session
            s: State input of shape [batch_size, 4, 160, 160, 3]
                
        Returns:
            Tensor of shape [batch_size, NUM_VALID_ACTIONS] containing
            the action values.
        """
        A = sess.run(self.predictions, {self.X_pl: s})
        
        return A
        #return sess.run(self.predictions, {self.X_pl: s})
        
    def update(self, sess, s, a, y):
        """
        UPdate the estimator toward the given targets
        
        Args:
            sess: Tensorflow session object
            s: State input of size [batch_size, 4, 160, 160, 3]
            a: Chosen action of size [batch_size]
            y: Targets of shape [batch_size]
            
        Returns:
            The calculated loss of each batch
        """
        feed_dict = {self.X_pl: s, self.y_pl: y, self.actions_pl: a}
        summaries, global_step, _, loss = sess.run(
            [self.summaries, 
             tf.contrib.framework.get_global_step(), 
             self.train_op,
             self.loss], feed_dict=feed_dict)
        if self.summary_writer:
            self.summary_writer.add_summary(summaries, global_step)
        return loss

In [7]:
# For Testing

tf.reset_default_graph()
global_step = tf.Variable(0, name='global_step', trainable=False)

e = Estimator(scope='test')
sp = StateProcessor()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    observation = env.reset()
    
    observation_p = sp.process(sess, observation)
    observation = np.stack([observation_p] * 4, axis=2)
    observations = np.array([observation] * 2)
    
    print(e.predict(sess, observations))
    
    # Test training step
    y = np.array([10.0, 10.0])
    a = np.array([1, 3])
    print(e.update(sess, observations, a, y))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


[[-6.24132586  1.84667373  0.70204931  3.91748405]
 [-6.24132586  1.84667373  0.70204931  3.91748405]]
263.781


In [8]:
def copy_model_parameters(sess, estimator1, estimator2):
    """
    Copies the model params of one estimator to another
    
    Args:
        sess: Tensorflow sesion instance
        estimator1: Estimator to copy the parameters from
        estimator2: Estimator to copy the parameters to
    """
    e1_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator1.scope)]
    e1_params = sorted(e1_params, key=lambda v: v.name)
    e2_params = [t for t in tf.trainable_variables() if t.name.startswith(estimator2.scope)]
    e2_params = sorted(e2_params, key=lambda v: v.name)
    
    update_ops = []
    for e1_v, e2_v in zip(e1_params, e2_params):
        op = e2_v.assign(e1_v)
        update_ops.append(op)
    sess.run(update_ops)

In [9]:
def make_epsilon_greedy_policy(estimator, nA):
    """
    Creates an epsilon-greedy policy based on a given Q-function approximation
    
    Args:
        esimator: An estimator that returns q values for a given state
        nA: Number of actions in the environment
        
    Returns:
        A function that takes the (sess, observation, epsilon) as an argument
        and returns the probabilites for each action in the form of a 
        numpy array of length nA
    """
    
    def policy_fn(sess, observation, epsilon):
        A = np.ones(nA, dtype=float) * epsilon / nA
       
        q_values = estimator.predict(sess, np.expand_dims(observation, 0))[0]
        
        best_action = np.argmax(q_values)
        A[best_action] += (1.0 - epsilon)
        return A
    return policy_fn

In [10]:
def deep_q_learning(sess, 
                    env, 
                    q_estimator, 
                    target_estimator, 
                    state_processor, 
                    num_episodes, 
                    experiment_dir, 
                    replay_memory_size=500000, 
                    replay_memory_init_size=50000, 
                    update_target_estimator_every=10000, 
                    discount_factor=0.99, 
                    epsilon_start=1.0, 
                    epsilon_end=0.1, 
                    epsilon_decay_steps=500000, 
                    batch_size=32, 
                    record_video_every=50):
    """
    Q-learning algorithim for off-policy TD control using function approximation
    Finds the optimal greedy policy while following epsilon-greedy one
    
    Args:
        sess: Tensorflow Session object
        env: OpenAI environment
        q_estimator: Estimator object used for the q values
        target_estimator: Estimator object used for the targets
        state_processor: A StateProcessor object
        num_episodes: Number of episodes to run for
        experiment_dir: Directory to save Tensorflow summaries in
        replay_memory_size: Size of the replay memory
        replay_memory_init_size: Number of random experiences to sampel when initializing 
          the reply memory.
        update_target_estimator_every: Copy parameters from the Q estimator to the 
          target estimator every N steps
        discount_factor: Lambda time discount factor
        epsilon_start: Chance to sample a random action when taking an action.
          Epsilon is decayed over time and this is the start value
        epsilon_end: The final minimum value of epsilon after decaying is done
        epsilon_decay_steps: Number of steps to decay epsilon over
        batch_size: Size of batches to sample from the replay memory
        record_video_every: Record a video every N episodes

    Returns:
        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.
    
    """
    Transition = namedtuple('Transition', ['state', 'action', 'reward', 'next_state', 'done'])
    
    # The replay memory
    replay_memory = deque(maxlen=replay_memory_size)
    
    # Keeps track of useful stats
    stats = plotting.EpisodeStats(
        episode_lengths=np.zeros(num_episodes),
        episode_rewards=np.zeros(num_episodes))
    
    # Create directories for checkpoints and summaries
    checkpoint_dir = os.path.join(experiment_dir, 'checkpoints')
    checkpoint_path = os.path.join(checkpoint_dir, 'model')
    monitor_path = os.path.join(experiment_dir, 'monitor')
    
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    if not os.path.exists(monitor_path):
        os.makedirs(monitor_path)
        
    saver = tf.train.Saver()
    # Load a previous checkpoint if we find one
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    if latest_checkpoint:
        print("Loading model checkpoint {}...\n".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)
    
    # Get the current time step
    total_t = sess.run(tf.contrib.framework.get_global_step())
    
    # The epsilon decay schedule
    epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)
    
    # The policy we're following
    policy = make_epsilon_greedy_policy(
        q_estimator,
        len(VALID_ACTIONS))
    
    # Populate the replay memory with initial experience
    print("Populating replay memory...")
    state = env.reset()
    state = state_processor.process(sess, state)
    state = np.stack([state] * 4, axis=2)
    for i in range(replay_memory_init_size):
        if i % 500 == 0:
            print("Step {}".format(i))
        action_probs = policy(sess, state, epsilons[total_t])
        action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
        next_state, reward, done, _ = env.step(VALID_ACTIONS[action])
        next_state = state_processor.process(sess, next_state)
        next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)
        replay_memory.append(Transition(state, action, reward, next_state, done))
        if done:
            state = env.reset()
            state = state_processor.process(sess, state)
            state = np.stack([state] * 4, axis=2)
        else:
            state = next_state
    
    # Record videos
    env.monitor.start(monitor_path,
                     resume=True,
                     video_callable=lambda count: count % record_video_every)
    
    for i_episode in xrange(num_episodes):
        # Save the current checkpoint
        saver.save(tf.get_default_session(), checkpoint_path)
        
        # Reset the environment
        state = env.reset()
        state = state_processor.process(sess, state)
        state = np.stack([state]*4, axis=2)
        loss = None
        
        # One step in the environment.
        for t in itertools.count():
            epsilon = epsilons[min(total_t, epsilon_decay_steps-1)]
            
            # Add epsilon to Tensorboard
            episode_summary = tf.Summary()
            episode_summary.value.add(simple_value=epsilon, tag='epsilon')
            q_estimator.summary_writer.add_summary(episode_summary, total_t)
            
            if total_t % update_target_estimator_every == 0:
                print("Copying parameters from q_estimator to target...\n")
                copy_model_parameters(sess, q_estimator, target_estimator)
            
            
            # Print step we're on, useful for debugging
            if t % 100 == 0:
                print("\rStep {} ({}) @ Episode {}/{}, loss: {}".format(
                t, total_t, i_episode+1, num_episodes, loss))
                sys.stdout.flush()
            
            # Take a step in the environment
            action_probs = policy(sess, state, epsilon)
            action = np.random.choice(len(action_probs), p=action_probs)
            next_state, reward, done, _ = env.step(VALID_ACTIONS[action])
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)
            
            transition = Transition(state, action, reward, next_state, done)
            # If our replay_memory is full, pop the first element
            replay_memory.append(transition)
            
            stats.episode_lengths[i_episode] = t
            stats.episode_rewards[i_episode] += reward
            
            #Sample minibatch from the replay memory
            samples = random.sample(replay_memory, batch_size)
            states_batch, action_batch, reward_batch, next_state_batch, done_batch = map(
            np.array, zip(*samples))
            
            # Calculate q-values and targets
            q_values_next = target_estimator.predict(sess, next_state_batch)
            targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * discount_factor * np.amax(q_values_next, axis=1)
            
            # Perform gradient descent update
            states_batch = np.array(states_batch)
            loss = q_estimator.update(sess, states_batch, action_batch, targets_batch)
            
            if done:
                break
            
            state = next_state
            total_t += 1
            
            
        # Add summaries to tensorboard
        episode_summary = tf.Summary()
        episode_summary.value.add(simple_value=stats.episode_rewards[i_episode],
                                     node_name='episode_reward', tag='episode_reward')
        episode_summary.value.add(simple_value=stats.episode_lengths[i_episode],
                                     node_name='episode_length', tag='episode_length')
        q_estimator.summary_writer.add_summary(episode_summary, total_t)
        q_estimator.summary_writer.flush()
            
        yield total_t, plotting.EpisodeStats(
            episode_lengths=stats.episode_lengths[:i_episode+1],
            episode_rewards=stats.episode_rewards[:i_episode+1])
        
    env.monitor.close()
    return


In [13]:
tf.reset_default_graph()

# Where we save our checkpoint and graphs
experiment_dir = os.path.abspath("./experiments/{}".format(env.spec.id))

# Create a global_step_variable
global_step = tf.Variable(0, name='global_step', trainable=False)

# Create estimators
q_estimator = Estimator(scope='q', summaries_dir=experiment_dir)
target_estimator = Estimator(scope='target_q')

state_processor = StateProcessor()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for t, stats in deep_q_learning(sess,
                                   env,
                                   q_estimator=q_estimator,
                                   target_estimator=target_estimator,
                                   state_processor=state_processor,
                                    num_episodes=200,
                                   experiment_dir=experiment_dir,
                                   replay_memory_size=500000,
                                   replay_memory_init_size=50000):
        print("\nEpisode Reward: {}".format(stats.episode_rewards[-1]))

Loading model checkpoint /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/checkpoints/model...

Populating replay memory...
Step 0
Step 500
Step 1000
Step 1500
Step 2000
Step 2500
Step 3000
Step 3500
Step 4000
Step 4500
Step 5000
Step 5500
Step 6000
Step 6500
Step 7000
Step 7500
Step 8000
Step 8500
Step 9000
Step 9500
Step 10000
Step 10500
Step 11000
Step 11500
Step 12000
Step 12500
Step 13000
Step 13500
Step 14000
Step 14500
Step 15000
Step 15500
Step 16000
Step 16500
Step 17000
Step 17500
Step 18000
Step 18500
Step 19000
Step 19500
Step 20000
Step 20500
Step 21000
Step 21500
Step 22000
Step 22500
Step 23000
Step 23500
Step 24000
Step 24500
Step 25000
Step 25500
Step 26000
Step 26500
Step 27000
Step 27500
Step 28000
Step 28500
Step 29000
Step 29500
Step 30000
Step 30500
Step 31000
Step 31500
Step 32000
Step 32500
Step 33000
Step 33500
Step 34000
Step 34500
Step 35000
Step 35500
Step 36000
Step 36500
Step 37000
Step 37500
Step 38000
Step 38500
Step 39000
Step 39500
St

[2017-03-02 23:33:55,186] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000001.mp4



Episode Reward: 2.0
Step 0 (80136) @ Episode 2/200, loss: None
Step 100 (80236) @ Episode 2/200, loss: 0.0593302287161


[2017-03-02 23:34:05,995] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000002.mp4



Episode Reward: 0.0
Step 0 (80312) @ Episode 3/200, loss: None
Step 100 (80412) @ Episode 3/200, loss: 0.0174886547029
Step 200 (80512) @ Episode 3/200, loss: 0.0129819680005


[2017-03-02 23:34:20,767] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000003.mp4



Episode Reward: 1.0
Step 0 (80553) @ Episode 4/200, loss: None
Step 100 (80653) @ Episode 4/200, loss: 0.0336621552706
Step 200 (80753) @ Episode 4/200, loss: 0.00769066810608
Step 300 (80853) @ Episode 4/200, loss: 0.00983962230384


[2017-03-02 23:34:41,042] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000004.mp4



Episode Reward: 3.0
Step 0 (80887) @ Episode 5/200, loss: None
Step 100 (80987) @ Episode 5/200, loss: 0.0147223453969


[2017-03-02 23:34:53,125] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000005.mp4



Episode Reward: 0.0
Step 0 (81083) @ Episode 6/200, loss: None
Step 100 (81183) @ Episode 6/200, loss: 0.0173677112907


[2017-03-02 23:35:04,929] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000006.mp4



Episode Reward: 0.0
Step 0 (81266) @ Episode 7/200, loss: None
Step 100 (81366) @ Episode 7/200, loss: 0.0119780041277
Step 200 (81466) @ Episode 7/200, loss: 0.0283915176988


[2017-03-02 23:35:18,634] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000007.mp4



Episode Reward: 1.0
Step 0 (81480) @ Episode 8/200, loss: None
Step 100 (81580) @ Episode 8/200, loss: 0.00929198786616
Step 200 (81680) @ Episode 8/200, loss: 0.0236762724817


[2017-03-02 23:35:37,303] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000008.mp4



Episode Reward: 2.0
Step 0 (81775) @ Episode 9/200, loss: None
Step 100 (81875) @ Episode 9/200, loss: 0.0325475409627


[2017-03-02 23:35:48,589] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000009.mp4



Episode Reward: 0.0
Step 0 (81951) @ Episode 10/200, loss: None
Step 100 (82051) @ Episode 10/200, loss: 0.0225248001516


[2017-03-02 23:35:59,842] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000010.mp4



Episode Reward: 0.0
Step 0 (82125) @ Episode 11/200, loss: None
Step 100 (82225) @ Episode 11/200, loss: 0.00480328407139
Step 200 (82325) @ Episode 11/200, loss: 0.0344071537256


[2017-03-02 23:36:15,648] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000011.mp4



Episode Reward: 1.0
Step 0 (82363) @ Episode 12/200, loss: None
Step 100 (82463) @ Episode 12/200, loss: 0.0261528342962
Step 200 (82563) @ Episode 12/200, loss: 0.551748752594
Step 300 (82663) @ Episode 12/200, loss: 0.00738095957786


[2017-03-02 23:36:38,494] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000012.mp4



Episode Reward: 3.0
Step 0 (82703) @ Episode 13/200, loss: None
Step 100 (82803) @ Episode 13/200, loss: 0.0144997034222
Step 200 (82903) @ Episode 13/200, loss: 0.0040804091841


[2017-03-02 23:36:54,201] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000013.mp4



Episode Reward: 1.0
Step 0 (82949) @ Episode 14/200, loss: None
Step 100 (83049) @ Episode 14/200, loss: 0.0217467285693
Step 200 (83149) @ Episode 14/200, loss: 0.0213508270681
Step 300 (83249) @ Episode 14/200, loss: 0.537101864815

Episode Reward: 3.0


[2017-03-02 23:37:19,279] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000014.mp4


Step 0 (83336) @ Episode 15/200, loss: None
Step 100 (83436) @ Episode 15/200, loss: 0.011041386053
Step 200 (83536) @ Episode 15/200, loss: 0.00549334287643


[2017-03-02 23:37:37,495] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000015.mp4



Episode Reward: 2.0
Step 0 (83625) @ Episode 16/200, loss: None
Step 100 (83725) @ Episode 16/200, loss: 0.0163880605251
Step 200 (83825) @ Episode 16/200, loss: 0.0219936855137


[2017-03-02 23:37:55,941] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000016.mp4



Episode Reward: 2.0
Step 0 (83913) @ Episode 17/200, loss: None
Step 100 (84013) @ Episode 17/200, loss: 0.00546866795048


[2017-03-02 23:38:06,964] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000017.mp4



Episode Reward: 0.0
Step 0 (84078) @ Episode 18/200, loss: None
Step 100 (84178) @ Episode 18/200, loss: 0.0152150895447
Step 200 (84278) @ Episode 18/200, loss: 0.0101504512131
Step 300 (84378) @ Episode 18/200, loss: 0.0254999026656


[2017-03-02 23:38:29,780] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000018.mp4



Episode Reward: 3.0
Step 0 (84438) @ Episode 19/200, loss: None
Step 100 (84538) @ Episode 19/200, loss: 0.00697914883494
Step 200 (84638) @ Episode 19/200, loss: 0.00246679550037


[2017-03-02 23:38:46,964] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000019.mp4



Episode Reward: 2.0
Step 0 (84708) @ Episode 20/200, loss: None
Step 100 (84808) @ Episode 20/200, loss: 0.0162519775331
Step 200 (84908) @ Episode 20/200, loss: 0.694667935371

Episode Reward: 1.0


[2017-03-02 23:39:04,386] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000020.mp4


Step 0 (84976) @ Episode 21/200, loss: None
Step 100 (85076) @ Episode 21/200, loss: 0.0100467242301

Episode Reward: 0.0


[2017-03-02 23:39:16,303] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000021.mp4


Step 0 (85152) @ Episode 22/200, loss: None
Step 100 (85252) @ Episode 22/200, loss: 0.00720302853733
Step 200 (85352) @ Episode 22/200, loss: 0.00697157625109


[2017-03-02 23:39:33,071] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000022.mp4



Episode Reward: 1.0
Step 0 (85411) @ Episode 23/200, loss: None
Step 100 (85511) @ Episode 23/200, loss: 0.00746551249176


[2017-03-02 23:39:44,608] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000023.mp4



Episode Reward: 0.0
Step 0 (85587) @ Episode 24/200, loss: None
Step 100 (85687) @ Episode 24/200, loss: 0.0196915324777
Step 200 (85787) @ Episode 24/200, loss: 0.00626177852973


[2017-03-02 23:39:59,359] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000024.mp4



Episode Reward: 1.0
Step 0 (85810) @ Episode 25/200, loss: None
Step 100 (85910) @ Episode 25/200, loss: 1.50174593925
Step 200 (86010) @ Episode 25/200, loss: 0.00728002609685


[2017-03-02 23:40:15,397] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000025.mp4



Episode Reward: 1.0
Step 0 (86048) @ Episode 26/200, loss: None
Step 100 (86148) @ Episode 26/200, loss: 0.0525597706437


[2017-03-02 23:40:26,267] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000026.mp4



Episode Reward: 0.0
Step 0 (86216) @ Episode 27/200, loss: None
Step 100 (86316) @ Episode 27/200, loss: 0.00938747450709
Step 200 (86416) @ Episode 27/200, loss: 0.012486724183


[2017-03-02 23:40:39,337] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000027.mp4



Episode Reward: 1.0
Step 0 (86420) @ Episode 28/200, loss: None
Step 100 (86520) @ Episode 28/200, loss: 0.0185246039182
Step 200 (86620) @ Episode 28/200, loss: 0.0174816921353


[2017-03-02 23:40:57,974] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000028.mp4



Episode Reward: 2.0
Step 0 (86705) @ Episode 29/200, loss: None
Step 100 (86805) @ Episode 29/200, loss: 0.00606651371345
Step 200 (86905) @ Episode 29/200, loss: 0.0108488928527
Step 300 (87005) @ Episode 29/200, loss: 0.0303117074072


[2017-03-02 23:41:18,113] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000029.mp4



Episode Reward: 2.0
Step 0 (87015) @ Episode 30/200, loss: None
Step 100 (87115) @ Episode 30/200, loss: 0.00707918498665
Step 200 (87215) @ Episode 30/200, loss: 0.672024011612

Episode Reward: 0.0


[2017-03-02 23:41:32,138] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000030.mp4


Step 0 (87215) @ Episode 31/200, loss: None
Step 100 (87315) @ Episode 31/200, loss: 0.010585132055


[2017-03-02 23:41:43,140] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000031.mp4



Episode Reward: 0.0
Step 0 (87379) @ Episode 32/200, loss: None
Step 100 (87479) @ Episode 32/200, loss: 0.670050919056


[2017-03-02 23:41:55,693] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000032.mp4



Episode Reward: 0.0
Step 0 (87573) @ Episode 33/200, loss: None
Step 100 (87673) @ Episode 33/200, loss: 0.0133252544329
Step 200 (87773) @ Episode 33/200, loss: 0.042034227401
Step 300 (87873) @ Episode 33/200, loss: 0.00646457402036


[2017-03-02 23:42:20,357] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000033.mp4



Episode Reward: 3.0
Step 0 (87935) @ Episode 34/200, loss: None
Step 100 (88035) @ Episode 34/200, loss: 0.00772649189457
Step 200 (88135) @ Episode 34/200, loss: 0.0065969703719


[2017-03-02 23:42:34,588] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000034.mp4



Episode Reward: 1.0
Step 0 (88158) @ Episode 35/200, loss: None
Step 100 (88258) @ Episode 35/200, loss: 0.0143132051453


[2017-03-02 23:42:45,659] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000035.mp4



Episode Reward: 0.0
Step 0 (88335) @ Episode 36/200, loss: None
Step 100 (88435) @ Episode 36/200, loss: 0.0214370880276
Step 200 (88535) @ Episode 36/200, loss: 0.00883594620973


[2017-03-02 23:43:00,746] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000036.mp4



Episode Reward: 1.0
Step 0 (88573) @ Episode 37/200, loss: None
Step 100 (88673) @ Episode 37/200, loss: 0.00777838751674


[2017-03-02 23:43:11,558] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000037.mp4



Episode Reward: 0.0
Step 0 (88742) @ Episode 38/200, loss: None
Step 100 (88842) @ Episode 38/200, loss: 0.00842023454607
Step 200 (88942) @ Episode 38/200, loss: 0.00780837889761


[2017-03-02 23:43:29,960] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000038.mp4



Episode Reward: 1.0
Step 0 (89027) @ Episode 39/200, loss: None
Step 100 (89127) @ Episode 39/200, loss: 0.00296986149624
Step 200 (89227) @ Episode 39/200, loss: 0.0245871543884


[2017-03-02 23:43:44,609] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000039.mp4



Episode Reward: 1.0
Step 0 (89257) @ Episode 40/200, loss: None
Step 100 (89357) @ Episode 40/200, loss: 0.0035661761649
Step 200 (89457) @ Episode 40/200, loss: 0.00702401250601


[2017-03-02 23:44:01,219] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000040.mp4



Episode Reward: 2.0
Step 0 (89511) @ Episode 41/200, loss: None
Step 100 (89611) @ Episode 41/200, loss: 0.0302499067038


[2017-03-02 23:44:12,399] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000041.mp4



Episode Reward: 0.0
Step 0 (89690) @ Episode 42/200, loss: None
Step 100 (89790) @ Episode 42/200, loss: 0.775126338005
Step 200 (89890) @ Episode 42/200, loss: 0.0316856727004
Step 300 (89990) @ Episode 42/200, loss: 0.00827863160521
Copying parameters from q_estimator to target...



[2017-03-02 23:44:35,587] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000042.mp4



Episode Reward: 3.0
Step 0 (90024) @ Episode 43/200, loss: None
Step 100 (90124) @ Episode 43/200, loss: 0.0169293545187
Step 200 (90224) @ Episode 43/200, loss: 0.0829321593046


[2017-03-02 23:44:49,887] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000043.mp4



Episode Reward: 1.0
Step 0 (90248) @ Episode 44/200, loss: None
Step 100 (90348) @ Episode 44/200, loss: 0.00990525167435


[2017-03-02 23:45:00,162] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000044.mp4



Episode Reward: 0.0
Step 0 (90411) @ Episode 45/200, loss: None
Step 100 (90511) @ Episode 45/200, loss: 0.00907676760107


[2017-03-02 23:45:11,280] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000045.mp4



Episode Reward: 0.0
Step 0 (90588) @ Episode 46/200, loss: None
Step 100 (90688) @ Episode 46/200, loss: 0.00425619911402


[2017-03-02 23:45:22,277] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000046.mp4



Episode Reward: 0.0
Step 0 (90758) @ Episode 47/200, loss: None
Step 100 (90858) @ Episode 47/200, loss: 0.0107963923365


[2017-03-02 23:45:34,970] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000047.mp4



Episode Reward: 0.0
Step 0 (90937) @ Episode 48/200, loss: None
Step 100 (91037) @ Episode 48/200, loss: 0.00664844829589


[2017-03-02 23:45:46,216] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000048.mp4



Episode Reward: 0.0
Step 0 (91106) @ Episode 49/200, loss: None
Step 100 (91206) @ Episode 49/200, loss: 0.00448409141973
Step 200 (91306) @ Episode 49/200, loss: 0.0380091033876
Step 300 (91406) @ Episode 49/200, loss: 0.02128129825


[2017-03-02 23:46:07,077] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000049.mp4



Episode Reward: 2.0
Step 0 (91420) @ Episode 50/200, loss: None
Step 100 (91520) @ Episode 50/200, loss: 0.00618929415941
Step 200 (91620) @ Episode 50/200, loss: 0.029060009867

Episode Reward: 1.0
Step 0 (91653) @ Episode 51/200, loss: None
Step 100 (91753) @ Episode 51/200, loss: 0.00819174014032
Step 200 (91853) @ Episode 51/200, loss: 0.0076104127802


[2017-03-02 23:46:35,341] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000051.mp4



Episode Reward: 1.0
Step 0 (91858) @ Episode 52/200, loss: None
Step 100 (91958) @ Episode 52/200, loss: 0.0255494508892
Step 200 (92058) @ Episode 52/200, loss: 1.27789092064
Step 300 (92158) @ Episode 52/200, loss: 0.00196851231158


[2017-03-02 23:46:59,524] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000052.mp4



Episode Reward: 3.0
Step 0 (92221) @ Episode 53/200, loss: None
Step 100 (92321) @ Episode 53/200, loss: 0.0125311110169
Step 200 (92421) @ Episode 53/200, loss: 0.0131344879046
Step 300 (92521) @ Episode 53/200, loss: 0.00663115829229


[2017-03-02 23:47:24,371] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000053.mp4



Episode Reward: 3.0
Step 0 (92588) @ Episode 54/200, loss: None
Step 100 (92688) @ Episode 54/200, loss: 0.0149645451456
Step 200 (92788) @ Episode 54/200, loss: 0.0107471607625


[2017-03-02 23:47:44,538] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000054.mp4



Episode Reward: 2.0
Step 0 (92880) @ Episode 55/200, loss: None
Step 100 (92980) @ Episode 55/200, loss: 0.0120575036854


[2017-03-02 23:47:56,520] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000055.mp4



Episode Reward: 0.0
Step 0 (93055) @ Episode 56/200, loss: None
Step 100 (93155) @ Episode 56/200, loss: 0.013817416504


[2017-03-02 23:48:07,528] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000056.mp4



Episode Reward: 0.0
Step 0 (93227) @ Episode 57/200, loss: None
Step 100 (93327) @ Episode 57/200, loss: 0.00736599741504
Step 200 (93427) @ Episode 57/200, loss: 0.0131995417178


[2017-03-02 23:48:23,115] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000057.mp4



Episode Reward: 1.0
Step 0 (93465) @ Episode 58/200, loss: None
Step 100 (93565) @ Episode 58/200, loss: 0.00599231850356
Step 200 (93665) @ Episode 58/200, loss: 0.00619571562856


[2017-03-02 23:48:39,550] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000058.mp4



Episode Reward: 1.0
Step 0 (93723) @ Episode 59/200, loss: None
Step 100 (93823) @ Episode 59/200, loss: 0.0176282413304
Step 200 (93923) @ Episode 59/200, loss: 0.00717762811109
Step 300 (94023) @ Episode 59/200, loss: 0.00590628758073


[2017-03-02 23:49:05,589] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000059.mp4



Episode Reward: 4.0
Step 0 (94108) @ Episode 60/200, loss: None
Step 100 (94208) @ Episode 60/200, loss: 0.0305300261825


[2017-03-02 23:49:18,264] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000060.mp4



Episode Reward: 0.0
Step 0 (94303) @ Episode 61/200, loss: None
Step 100 (94403) @ Episode 61/200, loss: 0.609422564507


[2017-03-02 23:49:30,919] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000061.mp4



Episode Reward: 0.0
Step 0 (94475) @ Episode 62/200, loss: None
Step 100 (94575) @ Episode 62/200, loss: 0.0200846903026

Episode Reward: 0.0


[2017-03-02 23:49:43,635] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000062.mp4


Step 0 (94652) @ Episode 63/200, loss: None
Step 100 (94752) @ Episode 63/200, loss: 0.0126857087016


[2017-03-02 23:49:55,139] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000063.mp4



Episode Reward: 0.0
Step 0 (94821) @ Episode 64/200, loss: None
Step 100 (94921) @ Episode 64/200, loss: 0.0145487524569


[2017-03-02 23:50:07,367] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000064.mp4



Episode Reward: 0.0
Step 0 (95005) @ Episode 65/200, loss: None
Step 100 (95105) @ Episode 65/200, loss: 0.0104385372251


[2017-03-02 23:50:18,720] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000065.mp4



Episode Reward: 0.0
Step 0 (95178) @ Episode 66/200, loss: None
Step 100 (95278) @ Episode 66/200, loss: 0.0254782494158
Step 200 (95378) @ Episode 66/200, loss: 0.00361299235374
Step 300 (95478) @ Episode 66/200, loss: 0.0127704162151


[2017-03-02 23:50:40,868] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000066.mp4



Episode Reward: 3.0
Step 0 (95515) @ Episode 67/200, loss: None
Step 100 (95615) @ Episode 67/200, loss: 0.0109156277031
Step 200 (95715) @ Episode 67/200, loss: 0.0185944512486


[2017-03-02 23:50:59,874] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000067.mp4



Episode Reward: 1.0
Step 0 (95767) @ Episode 68/200, loss: None
Step 100 (95867) @ Episode 68/200, loss: 0.00914355553687
Step 200 (95967) @ Episode 68/200, loss: 0.710600018501


[2017-03-02 23:51:17,365] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000068.mp4



Episode Reward: 2.0
Step 0 (96036) @ Episode 69/200, loss: None
Step 100 (96136) @ Episode 69/200, loss: 0.707311034203
Step 200 (96236) @ Episode 69/200, loss: 0.017701908946


[2017-03-02 23:51:35,634] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000069.mp4



Episode Reward: 2.0
Step 0 (96329) @ Episode 70/200, loss: None
Step 100 (96429) @ Episode 70/200, loss: 0.00790708512068


[2017-03-02 23:51:48,088] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000070.mp4



Episode Reward: 0.0
Step 0 (96515) @ Episode 71/200, loss: None
Step 100 (96615) @ Episode 71/200, loss: 0.055920753628
Step 200 (96715) @ Episode 71/200, loss: 0.89153200388


[2017-03-02 23:52:03,072] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000071.mp4



Episode Reward: 1.0
Step 0 (96753) @ Episode 72/200, loss: None
Step 100 (96853) @ Episode 72/200, loss: 0.0114015201107
Step 200 (96953) @ Episode 72/200, loss: 0.00735179334879


[2017-03-02 23:52:20,626] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000072.mp4



Episode Reward: 2.0
Step 0 (97028) @ Episode 73/200, loss: None
Step 100 (97128) @ Episode 73/200, loss: 0.0329014062881
Step 200 (97228) @ Episode 73/200, loss: 0.0159057043493


[2017-03-02 23:52:36,127] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000073.mp4



Episode Reward: 2.0
Step 0 (97276) @ Episode 74/200, loss: None
Step 100 (97376) @ Episode 74/200, loss: 0.0262424945831
Step 200 (97476) @ Episode 74/200, loss: 0.00540637597442


[2017-03-02 23:52:54,544] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000074.mp4



Episode Reward: 2.0
Step 0 (97568) @ Episode 75/200, loss: None
Step 100 (97668) @ Episode 75/200, loss: 0.0219624601305
Step 200 (97768) @ Episode 75/200, loss: 1.51861596107


[2017-03-02 23:53:07,320] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000075.mp4



Episode Reward: 1.0
Step 0 (97774) @ Episode 76/200, loss: None
Step 100 (97874) @ Episode 76/200, loss: 0.0323255099356
Step 200 (97974) @ Episode 76/200, loss: 0.0101292049512


[2017-03-02 23:53:20,528] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000076.mp4



Episode Reward: 1.0
Step 0 (97983) @ Episode 77/200, loss: None
Step 100 (98083) @ Episode 77/200, loss: 0.0162488743663
Step 200 (98183) @ Episode 77/200, loss: 0.598275125027


[2017-03-02 23:53:37,672] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000077.mp4



Episode Reward: 2.0
Step 0 (98262) @ Episode 78/200, loss: None
Step 100 (98362) @ Episode 78/200, loss: 0.0140128321946

Episode Reward: 0.0


[2017-03-02 23:53:49,742] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000078.mp4


Step 0 (98444) @ Episode 79/200, loss: None
Step 100 (98544) @ Episode 79/200, loss: 0.0194589588791
Step 200 (98644) @ Episode 79/200, loss: 0.0104292146862


[2017-03-02 23:54:07,048] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000079.mp4



Episode Reward: 1.0
Step 0 (98692) @ Episode 80/200, loss: None
Step 100 (98792) @ Episode 80/200, loss: 0.014952396974
Step 200 (98892) @ Episode 80/200, loss: 0.856789469719
Step 300 (98992) @ Episode 80/200, loss: 0.0142845548689


[2017-03-02 23:54:32,146] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000080.mp4



Episode Reward: 3.0
Step 0 (99058) @ Episode 81/200, loss: None
Step 100 (99158) @ Episode 81/200, loss: 0.00718405144289
Step 200 (99258) @ Episode 81/200, loss: 0.0369966179132
Step 300 (99358) @ Episode 81/200, loss: 0.0274928957224


[2017-03-02 23:54:52,388] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000081.mp4



Episode Reward: 3.0
Step 0 (99373) @ Episode 82/200, loss: None
Step 100 (99473) @ Episode 82/200, loss: 0.0460791438818


[2017-03-02 23:55:04,420] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000082.mp4



Episode Reward: 0.0
Step 0 (99557) @ Episode 83/200, loss: None
Step 100 (99657) @ Episode 83/200, loss: 0.0182720907032


[2017-03-02 23:55:16,483] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000083.mp4



Episode Reward: 0.0
Step 0 (99726) @ Episode 84/200, loss: None
Step 100 (99826) @ Episode 84/200, loss: 0.00536716263741
Step 200 (99926) @ Episode 84/200, loss: 0.00572584522888


[2017-03-02 23:55:30,408] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000084.mp4



Episode Reward: 1.0
Step 0 (99935) @ Episode 85/200, loss: None
Copying parameters from q_estimator to target...

Step 100 (100035) @ Episode 85/200, loss: 0.0074006812647
Step 200 (100135) @ Episode 85/200, loss: 0.0313569381833
Step 300 (100235) @ Episode 85/200, loss: 0.0219871606678


[2017-03-02 23:55:53,756] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000085.mp4



Episode Reward: 3.0
Step 0 (100293) @ Episode 86/200, loss: None
Step 100 (100393) @ Episode 86/200, loss: 0.0167733766139
Step 200 (100493) @ Episode 86/200, loss: 0.028932839632
Step 300 (100593) @ Episode 86/200, loss: 0.0194810014218


[2017-03-02 23:56:13,508] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000086.mp4



Episode Reward: 2.0
Step 0 (100600) @ Episode 87/200, loss: None
Step 100 (100700) @ Episode 87/200, loss: 0.0141499135643


[2017-03-02 23:56:25,195] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000087.mp4



Episode Reward: 0.0
Step 0 (100781) @ Episode 88/200, loss: None
Step 100 (100881) @ Episode 88/200, loss: 0.0113068232313
Step 200 (100981) @ Episode 88/200, loss: 0.0192364826798


[2017-03-02 23:56:39,506] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000088.mp4



Episode Reward: 1.0
Step 0 (101004) @ Episode 89/200, loss: None
Step 100 (101104) @ Episode 89/200, loss: 0.0264252331108
Step 200 (101204) @ Episode 89/200, loss: 0.0247431211174


[2017-03-02 23:56:56,889] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000089.mp4



Episode Reward: 2.0
Step 0 (101282) @ Episode 90/200, loss: None
Step 100 (101382) @ Episode 90/200, loss: 0.677613258362
Step 200 (101482) @ Episode 90/200, loss: 0.00575577560812


[2017-03-02 23:57:13,359] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000090.mp4



Episode Reward: 1.0
Step 0 (101537) @ Episode 91/200, loss: None
Step 100 (101637) @ Episode 91/200, loss: 0.017625592649


[2017-03-02 23:57:26,202] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000091.mp4



Episode Reward: 0.0
Step 0 (101729) @ Episode 92/200, loss: None
Step 100 (101829) @ Episode 92/200, loss: 0.677945494652
Step 200 (101929) @ Episode 92/200, loss: 0.00405716942623


[2017-03-02 23:57:43,279] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000092.mp4



Episode Reward: 2.0
Step 0 (101998) @ Episode 93/200, loss: None
Step 100 (102098) @ Episode 93/200, loss: 0.0274413414299
Step 200 (102198) @ Episode 93/200, loss: 0.0110627599061


[2017-03-02 23:57:59,910] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000093.mp4



Episode Reward: 1.0
Step 0 (102247) @ Episode 94/200, loss: None
Step 100 (102347) @ Episode 94/200, loss: 0.0219271853566
Step 200 (102447) @ Episode 94/200, loss: 0.0435685776174


[2017-03-02 23:58:18,752] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000094.mp4



Episode Reward: 2.0
Step 0 (102543) @ Episode 95/200, loss: None
Step 100 (102643) @ Episode 95/200, loss: 0.010776637122
Step 200 (102743) @ Episode 95/200, loss: 0.0187903977931
Step 300 (102843) @ Episode 95/200, loss: 0.0334018059075

Episode Reward: 2.0


[2017-03-02 23:58:39,345] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000095.mp4


Step 0 (102844) @ Episode 96/200, loss: None
Step 100 (102944) @ Episode 96/200, loss: 0.00634875148535


[2017-03-02 23:58:50,867] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000096.mp4



Episode Reward: 0.0
Step 0 (103018) @ Episode 97/200, loss: None
Step 100 (103118) @ Episode 97/200, loss: 0.00930189248174
Step 200 (103218) @ Episode 97/200, loss: 0.659845352173
Step 300 (103318) @ Episode 97/200, loss: 0.011978039518
Step 400 (103418) @ Episode 97/200, loss: 0.509165108204


[2017-03-02 23:59:17,977] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000097.mp4



Episode Reward: 4.0
Step 0 (103449) @ Episode 98/200, loss: None
Step 100 (103549) @ Episode 98/200, loss: 0.00927024707198
Step 200 (103649) @ Episode 98/200, loss: 0.0288747977465
Step 300 (103749) @ Episode 98/200, loss: 0.00605679815635


[2017-03-02 23:59:41,595] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000098.mp4



Episode Reward: 3.0
Step 0 (103799) @ Episode 99/200, loss: None
Step 100 (103899) @ Episode 99/200, loss: 0.0156582240015
Step 200 (103999) @ Episode 99/200, loss: 0.39464905858
Step 300 (104099) @ Episode 99/200, loss: 0.0244832187891


[2017-03-03 00:00:02,397] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000099.mp4



Episode Reward: 2.0
Step 0 (104118) @ Episode 100/200, loss: None
Step 100 (104218) @ Episode 100/200, loss: 0.0163256563246
Step 200 (104318) @ Episode 100/200, loss: 0.0138622224331

Episode Reward: 1.0
Step 0 (104342) @ Episode 101/200, loss: None
Step 100 (104442) @ Episode 101/200, loss: 0.0352768301964


[2017-03-03 00:00:29,951] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000101.mp4



Episode Reward: 0.0
Step 0 (104523) @ Episode 102/200, loss: None
Step 100 (104623) @ Episode 102/200, loss: 0.046380341053


[2017-03-03 00:00:43,940] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000102.mp4



Episode Reward: 0.0
Step 0 (104718) @ Episode 103/200, loss: None
Step 100 (104818) @ Episode 103/200, loss: 0.0164226964116
Step 200 (104918) @ Episode 103/200, loss: 0.018612684682


[2017-03-03 00:00:59,461] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000103.mp4



Episode Reward: 1.0
Step 0 (104961) @ Episode 104/200, loss: None
Step 100 (105061) @ Episode 104/200, loss: 0.0260361414403
Step 200 (105161) @ Episode 104/200, loss: 1.26744294167


[2017-03-03 00:01:14,935] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000104.mp4



Episode Reward: 1.0
Step 0 (105205) @ Episode 105/200, loss: None
Step 100 (105305) @ Episode 105/200, loss: 0.723968088627
Step 200 (105405) @ Episode 105/200, loss: 0.492758661509


[2017-03-03 00:01:32,862] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000105.mp4



Episode Reward: 2.0
Step 0 (105482) @ Episode 106/200, loss: None
Step 100 (105582) @ Episode 106/200, loss: 0.0359980203211
Step 200 (105682) @ Episode 106/200, loss: 0.0437374189496
Step 300 (105782) @ Episode 106/200, loss: 0.00856985524297


[2017-03-03 00:01:51,969] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000106.mp4



Episode Reward: 2.0
Step 0 (105789) @ Episode 107/200, loss: None
Step 100 (105889) @ Episode 107/200, loss: 0.0204640366137


[2017-03-03 00:02:02,761] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000107.mp4



Episode Reward: 0.0
Step 0 (105956) @ Episode 108/200, loss: None
Step 100 (106056) @ Episode 108/200, loss: 0.00514427945018
Step 200 (106156) @ Episode 108/200, loss: 0.00791836716235


[2017-03-03 00:02:20,115] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000108.mp4



Episode Reward: 2.0
Step 0 (106228) @ Episode 109/200, loss: None
Step 100 (106328) @ Episode 109/200, loss: 0.00764200324193


[2017-03-03 00:02:32,954] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000109.mp4



Episode Reward: 0.0
Step 0 (106422) @ Episode 110/200, loss: None
Step 100 (106522) @ Episode 110/200, loss: 0.0303205084056
Step 200 (106622) @ Episode 110/200, loss: 0.0487152673304
Step 300 (106722) @ Episode 110/200, loss: 0.0214802213013


[2017-03-03 00:02:55,422] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000110.mp4



Episode Reward: 3.0
Step 0 (106772) @ Episode 111/200, loss: None
Step 100 (106872) @ Episode 111/200, loss: 0.0220184735954
Step 200 (106972) @ Episode 111/200, loss: 0.0386112183332
Step 300 (107072) @ Episode 111/200, loss: 0.013256999664


[2017-03-03 00:03:15,359] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000111.mp4



Episode Reward: 3.0
Step 0 (107083) @ Episode 112/200, loss: None
Step 100 (107183) @ Episode 112/200, loss: 1.36192131042
Step 200 (107283) @ Episode 112/200, loss: 0.0201474763453


[2017-03-03 00:03:32,674] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000112.mp4



Episode Reward: 1.0
Step 0 (107353) @ Episode 113/200, loss: None
Step 100 (107453) @ Episode 113/200, loss: 0.015531883575
Step 200 (107553) @ Episode 113/200, loss: 0.51393455267
Step 300 (107653) @ Episode 113/200, loss: 0.00730903912336


[2017-03-03 00:03:55,938] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000113.mp4



Episode Reward: 3.0
Step 0 (107719) @ Episode 114/200, loss: None
Step 100 (107819) @ Episode 114/200, loss: 0.0186246857047
Step 200 (107919) @ Episode 114/200, loss: 0.0134583190084
Step 300 (108019) @ Episode 114/200, loss: 0.00674092583358


[2017-03-03 00:04:14,896] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000114.mp4



Episode Reward: 2.0
Step 0 (108025) @ Episode 115/200, loss: None
Step 100 (108125) @ Episode 115/200, loss: 0.0109279137105
Step 200 (108225) @ Episode 115/200, loss: 0.0291218515486
Step 300 (108325) @ Episode 115/200, loss: 0.0156447514892


[2017-03-03 00:04:37,920] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000115.mp4



Episode Reward: 3.0
Step 0 (108396) @ Episode 116/200, loss: None
Step 100 (108496) @ Episode 116/200, loss: 0.0085719563067
Step 200 (108596) @ Episode 116/200, loss: 0.042918857187
Step 300 (108696) @ Episode 116/200, loss: 0.639650344849


[2017-03-03 00:04:58,101] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000116.mp4



Episode Reward: 3.0
Step 0 (108719) @ Episode 117/200, loss: None
Step 100 (108819) @ Episode 117/200, loss: 0.0101734958589
Step 200 (108919) @ Episode 117/200, loss: 0.0103447698057


[2017-03-03 00:05:12,579] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000117.mp4



Episode Reward: 1.0
Step 0 (108954) @ Episode 118/200, loss: None
Step 100 (109054) @ Episode 118/200, loss: 0.00989988446236


[2017-03-03 00:05:25,620] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000118.mp4



Episode Reward: 0.0
Step 0 (109141) @ Episode 119/200, loss: None
Step 100 (109241) @ Episode 119/200, loss: 0.00564570631832
Step 200 (109341) @ Episode 119/200, loss: 0.00966477207839


[2017-03-03 00:05:41,641] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000119.mp4



Episode Reward: 1.0
Step 0 (109380) @ Episode 120/200, loss: None
Step 100 (109480) @ Episode 120/200, loss: 0.0250389501452


[2017-03-03 00:05:52,749] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000120.mp4



Episode Reward: 0.0
Step 0 (109545) @ Episode 121/200, loss: None
Step 100 (109645) @ Episode 121/200, loss: 0.0111880954355

Episode Reward: 0.0


[2017-03-03 00:06:05,605] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000121.mp4


Step 0 (109728) @ Episode 122/200, loss: None
Step 100 (109828) @ Episode 122/200, loss: 0.00918005406857
Step 200 (109928) @ Episode 122/200, loss: 0.00877332966775


[2017-03-03 00:06:23,129] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000122.mp4



Episode Reward: 2.0
Step 0 (109995) @ Episode 123/200, loss: None
Copying parameters from q_estimator to target...

Step 100 (110095) @ Episode 123/200, loss: 0.0124228289351
Step 200 (110195) @ Episode 123/200, loss: 0.020364176482
Step 300 (110295) @ Episode 123/200, loss: 0.80257409811


[2017-03-03 00:06:43,180] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000123.mp4



Episode Reward: 3.0
Step 0 (110310) @ Episode 124/200, loss: None
Step 100 (110410) @ Episode 124/200, loss: 1.23486924171
Step 200 (110510) @ Episode 124/200, loss: 0.024655379355
Step 300 (110610) @ Episode 124/200, loss: 0.521246254444


[2017-03-03 00:07:03,597] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000124.mp4



Episode Reward: 2.0
Step 0 (110613) @ Episode 125/200, loss: None
Step 100 (110713) @ Episode 125/200, loss: 0.699862122536
Step 200 (110813) @ Episode 125/200, loss: 0.00577748566866
Step 300 (110913) @ Episode 125/200, loss: 0.00602686218917


[2017-03-03 00:07:29,035] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000125.mp4



Episode Reward: 3.0
Step 0 (110969) @ Episode 126/200, loss: None
Step 100 (111069) @ Episode 126/200, loss: 0.022294126451
Step 200 (111169) @ Episode 126/200, loss: 0.00687329145148


[2017-03-03 00:07:46,311] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000126.mp4



Episode Reward: 2.0
Step 0 (111242) @ Episode 127/200, loss: None
Step 100 (111342) @ Episode 127/200, loss: 0.0122993588448
Step 200 (111442) @ Episode 127/200, loss: 0.01866687648


[2017-03-03 00:08:00,784] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000127.mp4



Episode Reward: 1.0
Step 0 (111453) @ Episode 128/200, loss: None
Step 100 (111553) @ Episode 128/200, loss: 0.0207412913442
Step 200 (111653) @ Episode 128/200, loss: 0.0103257149458


[2017-03-03 00:08:18,297] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000128.mp4



Episode Reward: 1.0
Step 0 (111694) @ Episode 129/200, loss: None
Step 100 (111794) @ Episode 129/200, loss: 0.00864312238991


[2017-03-03 00:08:30,180] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000129.mp4



Episode Reward: 0.0
Step 0 (111876) @ Episode 130/200, loss: None
Step 100 (111976) @ Episode 130/200, loss: 0.0161317773163


[2017-03-03 00:08:42,225] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000130.mp4



Episode Reward: 0.0
Step 0 (112048) @ Episode 131/200, loss: None
Step 100 (112148) @ Episode 131/200, loss: 0.0198588259518
Step 200 (112248) @ Episode 131/200, loss: 0.0148852486163
Step 300 (112348) @ Episode 131/200, loss: 0.0131444334984


[2017-03-03 00:09:01,906] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000131.mp4



Episode Reward: 2.0
Step 0 (112348) @ Episode 132/200, loss: None
Step 100 (112448) @ Episode 132/200, loss: 0.00716013181955
Step 200 (112548) @ Episode 132/200, loss: 0.71109944582
Step 300 (112648) @ Episode 132/200, loss: 1.30075705051


[2017-03-03 00:09:24,984] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000132.mp4



Episode Reward: 3.0
Step 0 (112682) @ Episode 133/200, loss: None
Step 100 (112782) @ Episode 133/200, loss: 0.0366691127419
Step 200 (112882) @ Episode 133/200, loss: 0.0124457506463


[2017-03-03 00:09:40,442] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000133.mp4



Episode Reward: 1.0
Step 0 (112903) @ Episode 134/200, loss: None
Step 100 (113003) @ Episode 134/200, loss: 0.00826137140393


[2017-03-03 00:09:52,916] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000134.mp4



Episode Reward: 0.0
Step 0 (113090) @ Episode 135/200, loss: None
Step 100 (113190) @ Episode 135/200, loss: 0.0384222567081
Step 200 (113290) @ Episode 135/200, loss: 0.00821128673851

Episode Reward: 1.0


[2017-03-03 00:10:07,762] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000135.mp4


Step 0 (113315) @ Episode 136/200, loss: None
Step 100 (113415) @ Episode 136/200, loss: 0.00683580106124
Step 200 (113515) @ Episode 136/200, loss: 0.0229372195899
Step 300 (113615) @ Episode 136/200, loss: 0.043290220201


[2017-03-03 00:10:29,241] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000136.mp4



Episode Reward: 3.0
Step 0 (113648) @ Episode 137/200, loss: None
Step 100 (113748) @ Episode 137/200, loss: 0.00833017192781
Step 200 (113848) @ Episode 137/200, loss: 0.00679991114885
Step 300 (113948) @ Episode 137/200, loss: 0.063198633492
Step 400 (114048) @ Episode 137/200, loss: 0.0381130129099


[2017-03-03 00:11:01,403] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000137.mp4



Episode Reward: 5.0
Step 0 (114127) @ Episode 138/200, loss: None
Step 100 (114227) @ Episode 138/200, loss: 0.00499798823148
Step 200 (114327) @ Episode 138/200, loss: 0.0126374484971

Episode Reward: 2.0


[2017-03-03 00:11:21,683] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000138.mp4


Step 0 (114416) @ Episode 139/200, loss: None
Step 100 (114516) @ Episode 139/200, loss: 0.0118887415156
Step 200 (114616) @ Episode 139/200, loss: 0.00306380423717
Step 300 (114716) @ Episode 139/200, loss: 0.00790291279554


[2017-03-03 00:11:47,970] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000139.mp4



Episode Reward: 4.0
Step 0 (114798) @ Episode 140/200, loss: None
Step 100 (114898) @ Episode 140/200, loss: 0.0188116580248
Step 200 (114998) @ Episode 140/200, loss: 0.022332329303


[2017-03-03 00:12:06,765] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000140.mp4



Episode Reward: 2.0
Step 0 (115075) @ Episode 141/200, loss: None
Step 100 (115175) @ Episode 141/200, loss: 0.006392987445
Step 200 (115275) @ Episode 141/200, loss: 0.00958992354572

Episode Reward: 0.0


[2017-03-03 00:12:19,771] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000141.mp4


Step 0 (115277) @ Episode 142/200, loss: None
Step 100 (115377) @ Episode 142/200, loss: 0.0101998848841


[2017-03-03 00:12:31,844] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000142.mp4



Episode Reward: 0.0
Step 0 (115459) @ Episode 143/200, loss: None
Step 100 (115559) @ Episode 143/200, loss: 0.0419499129057
Step 200 (115659) @ Episode 143/200, loss: 0.00927314255387


[2017-03-03 00:12:47,390] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000143.mp4



Episode Reward: 1.0
Step 0 (115690) @ Episode 144/200, loss: None
Step 100 (115790) @ Episode 144/200, loss: 0.010236277245
Step 200 (115890) @ Episode 144/200, loss: 0.00768918730319


[2017-03-03 00:13:00,867] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000144.mp4



Episode Reward: 0.0
Step 0 (115894) @ Episode 145/200, loss: None
Step 100 (115994) @ Episode 145/200, loss: 0.0352223403752
Step 200 (116094) @ Episode 145/200, loss: 0.0256797727197
Step 300 (116194) @ Episode 145/200, loss: 0.0161489639431


[2017-03-03 00:13:23,429] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000145.mp4



Episode Reward: 3.0
Step 0 (116237) @ Episode 146/200, loss: None
Step 100 (116337) @ Episode 146/200, loss: 0.0119197573513
Step 200 (116437) @ Episode 146/200, loss: 0.709294140339


[2017-03-03 00:13:39,789] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000146.mp4



Episode Reward: 1.0
Step 0 (116484) @ Episode 147/200, loss: None
Step 100 (116584) @ Episode 147/200, loss: 0.674540281296


[2017-03-03 00:13:51,588] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000147.mp4



Episode Reward: 0.0
Step 0 (116665) @ Episode 148/200, loss: None
Step 100 (116765) @ Episode 148/200, loss: 0.011566998437
Step 200 (116865) @ Episode 148/200, loss: 0.0177053064108


[2017-03-03 00:14:09,951] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000148.mp4



Episode Reward: 2.0
Step 0 (116949) @ Episode 149/200, loss: None
Step 100 (117049) @ Episode 149/200, loss: 0.0209628716111
Step 200 (117149) @ Episode 149/200, loss: 0.0182128660381
Step 300 (117249) @ Episode 149/200, loss: 0.00586482742801


[2017-03-03 00:14:29,449] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000149.mp4



Episode Reward: 2.0
Step 0 (117260) @ Episode 150/200, loss: None
Step 100 (117360) @ Episode 150/200, loss: 0.0229475982487

Episode Reward: 0.0
Step 0 (117422) @ Episode 151/200, loss: None
Step 100 (117522) @ Episode 151/200, loss: 0.664972662926
Step 200 (117622) @ Episode 151/200, loss: 0.0175640136003


[2017-03-03 00:14:54,591] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000151.mp4



Episode Reward: 1.0
Step 0 (117662) @ Episode 152/200, loss: None
Step 100 (117762) @ Episode 152/200, loss: 0.0211319960654
Step 200 (117862) @ Episode 152/200, loss: 0.00636719167233


[2017-03-03 00:15:09,182] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000152.mp4



Episode Reward: 1.0
Step 0 (117889) @ Episode 153/200, loss: None
Step 100 (117989) @ Episode 153/200, loss: 0.0220385976136


[2017-03-03 00:15:19,713] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000153.mp4



Episode Reward: 0.0
Step 0 (118057) @ Episode 154/200, loss: None
Step 100 (118157) @ Episode 154/200, loss: 0.0383275598288


[2017-03-03 00:15:30,741] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000154.mp4



Episode Reward: 0.0
Step 0 (118231) @ Episode 155/200, loss: None
Step 100 (118331) @ Episode 155/200, loss: 0.0102635929361
Step 200 (118431) @ Episode 155/200, loss: 0.0125499656424
Step 300 (118531) @ Episode 155/200, loss: 0.00575282238424


[2017-03-03 00:15:51,840] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000155.mp4



Episode Reward: 3.0
Step 0 (118567) @ Episode 156/200, loss: None
Step 100 (118667) @ Episode 156/200, loss: 0.0228636134416
Step 200 (118767) @ Episode 156/200, loss: 0.715530991554


[2017-03-03 00:16:09,913] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000156.mp4



Episode Reward: 2.0
Step 0 (118855) @ Episode 157/200, loss: None
Step 100 (118955) @ Episode 157/200, loss: 0.0249354317784
Step 200 (119055) @ Episode 157/200, loss: 0.0336152762175


[2017-03-03 00:16:26,373] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000157.mp4



Episode Reward: 1.0
Step 0 (119111) @ Episode 158/200, loss: None
Step 100 (119211) @ Episode 158/200, loss: 0.00961958058178


[2017-03-03 00:16:37,672] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000158.mp4



Episode Reward: 0.0
Step 0 (119290) @ Episode 159/200, loss: None
Step 100 (119390) @ Episode 159/200, loss: 0.013068029657
Step 200 (119490) @ Episode 159/200, loss: 0.0173372775316
Step 300 (119590) @ Episode 159/200, loss: 0.0106634441763


[2017-03-03 00:17:00,216] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000159.mp4



Episode Reward: 3.0
Step 0 (119632) @ Episode 160/200, loss: None
Step 100 (119732) @ Episode 160/200, loss: 0.00691850297153


[2017-03-03 00:17:12,336] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000160.mp4



Episode Reward: 0.0
Step 0 (119814) @ Episode 161/200, loss: None
Step 100 (119914) @ Episode 161/200, loss: 0.0110204881057
Copying parameters from q_estimator to target...

Step 200 (120014) @ Episode 161/200, loss: 0.00465364567935


[2017-03-03 00:17:26,004] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000161.mp4



Episode Reward: 1.0
Step 0 (120033) @ Episode 162/200, loss: None
Step 100 (120133) @ Episode 162/200, loss: 0.0136067867279


[2017-03-03 00:17:37,868] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000162.mp4



Episode Reward: 0.0
Step 0 (120224) @ Episode 163/200, loss: None
Step 100 (120324) @ Episode 163/200, loss: 0.02165508084
Step 200 (120424) @ Episode 163/200, loss: 0.0108251608908


[2017-03-03 00:17:51,386] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000163.mp4



Episode Reward: 0.0
Step 0 (120427) @ Episode 164/200, loss: None
Step 100 (120527) @ Episode 164/200, loss: 0.0257173832506
Step 200 (120627) @ Episode 164/200, loss: 0.736310482025
Step 300 (120727) @ Episode 164/200, loss: 0.0100843384862


[2017-03-03 00:18:17,558] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000164.mp4



Episode Reward: 3.0
Step 0 (120825) @ Episode 165/200, loss: None
Step 100 (120925) @ Episode 165/200, loss: 0.0345843397081
Step 200 (121025) @ Episode 165/200, loss: 0.00578455813229
Step 300 (121125) @ Episode 165/200, loss: 0.0167578645051


[2017-03-03 00:18:43,453] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000165.mp4



Episode Reward: 4.0
Step 0 (121220) @ Episode 166/200, loss: None
Step 100 (121320) @ Episode 166/200, loss: 0.0141335260123
Step 200 (121420) @ Episode 166/200, loss: 0.0297844018787
Step 300 (121520) @ Episode 166/200, loss: 0.0155425108969


[2017-03-03 00:19:04,173] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000166.mp4



Episode Reward: 2.0
Step 0 (121533) @ Episode 167/200, loss: None
Step 100 (121633) @ Episode 167/200, loss: 0.0472697652876
Step 200 (121733) @ Episode 167/200, loss: 0.541656434536
Step 300 (121833) @ Episode 167/200, loss: 0.0484282523394


[2017-03-03 00:19:25,152] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000167.mp4



Episode Reward: 2.0
Step 0 (121842) @ Episode 168/200, loss: None
Step 100 (121942) @ Episode 168/200, loss: 0.0134950205684
Step 200 (122042) @ Episode 168/200, loss: 0.049408711493
Step 300 (122142) @ Episode 168/200, loss: 0.0185552984476
Step 400 (122242) @ Episode 168/200, loss: 0.0553379431367


[2017-03-03 00:19:56,073] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000168.mp4



Episode Reward: 5.0
Step 0 (122331) @ Episode 169/200, loss: None
Step 100 (122431) @ Episode 169/200, loss: 0.0109431147575
Step 200 (122531) @ Episode 169/200, loss: 0.0086714187637
Step 300 (122631) @ Episode 169/200, loss: 0.0229242965579


[2017-03-03 00:20:17,697] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000169.mp4



Episode Reward: 3.0
Step 0 (122670) @ Episode 170/200, loss: None
Step 100 (122770) @ Episode 170/200, loss: 0.510651230812
Step 200 (122870) @ Episode 170/200, loss: 0.0103399530053
Step 300 (122970) @ Episode 170/200, loss: 0.0177948325872


[2017-03-03 00:20:38,253] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000170.mp4



Episode Reward: 2.0
Step 0 (123000) @ Episode 171/200, loss: None
Step 100 (123100) @ Episode 171/200, loss: 0.00351075432263


[2017-03-03 00:20:50,235] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000171.mp4



Episode Reward: 0.0
Step 0 (123192) @ Episode 172/200, loss: None
Step 100 (123292) @ Episode 172/200, loss: 0.0273295808583
Step 200 (123392) @ Episode 172/200, loss: 0.00623866543174
Step 300 (123492) @ Episode 172/200, loss: 0.0205923188478


[2017-03-03 00:21:09,155] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000172.mp4



Episode Reward: 2.0
Step 0 (123497) @ Episode 173/200, loss: None
Step 100 (123597) @ Episode 173/200, loss: 0.0198714509606
Step 200 (123697) @ Episode 173/200, loss: 0.0256917923689


[2017-03-03 00:21:22,680] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000173.mp4



Episode Reward: 1.0
Step 0 (123714) @ Episode 174/200, loss: None
Step 100 (123814) @ Episode 174/200, loss: 0.0099570164457


[2017-03-03 00:21:34,655] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000174.mp4



Episode Reward: 0.0
Step 0 (123908) @ Episode 175/200, loss: None
Step 100 (124008) @ Episode 175/200, loss: 0.0184492040426
Step 200 (124108) @ Episode 175/200, loss: 0.325709164143
Step 300 (124208) @ Episode 175/200, loss: 0.047975756228


[2017-03-03 00:21:55,893] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000175.mp4



Episode Reward: 3.0
Step 0 (124248) @ Episode 176/200, loss: None
Step 100 (124348) @ Episode 176/200, loss: 0.0482251383364
Step 200 (124448) @ Episode 176/200, loss: 0.0225409045815


[2017-03-03 00:22:12,714] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000176.mp4



Episode Reward: 1.0
Step 0 (124514) @ Episode 177/200, loss: None
Step 100 (124614) @ Episode 177/200, loss: 0.0141227543354
Step 200 (124714) @ Episode 177/200, loss: 0.0101378820837


[2017-03-03 00:22:31,552] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000177.mp4



Episode Reward: 2.0
Step 0 (124807) @ Episode 178/200, loss: None
Step 100 (124907) @ Episode 178/200, loss: 0.581315636635


[2017-03-03 00:22:42,130] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000178.mp4



Episode Reward: 0.0
Step 0 (124973) @ Episode 179/200, loss: None
Step 100 (125073) @ Episode 179/200, loss: 0.00998274888843
Step 200 (125173) @ Episode 179/200, loss: 0.0466098114848


[2017-03-03 00:22:59,928] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000179.mp4



Episode Reward: 1.0
Step 0 (125230) @ Episode 180/200, loss: None
Step 100 (125330) @ Episode 180/200, loss: 0.728019595146
Step 200 (125430) @ Episode 180/200, loss: 0.0262552574277
Step 300 (125530) @ Episode 180/200, loss: 0.0112041477114


[2017-03-03 00:23:24,329] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000180.mp4



Episode Reward: 4.0
Step 0 (125599) @ Episode 181/200, loss: None
Step 100 (125699) @ Episode 181/200, loss: 0.0294275432825
Step 200 (125799) @ Episode 181/200, loss: 0.00841241516173


[2017-03-03 00:23:41,172] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000181.mp4



Episode Reward: 1.0
Step 0 (125845) @ Episode 182/200, loss: None
Step 100 (125945) @ Episode 182/200, loss: 0.0269569419324
Step 200 (126045) @ Episode 182/200, loss: 0.62333971262


[2017-03-03 00:23:56,554] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000182.mp4



Episode Reward: 1.0
Step 0 (126070) @ Episode 183/200, loss: None
Step 100 (126170) @ Episode 183/200, loss: 0.00548506621271
Step 200 (126270) @ Episode 183/200, loss: 0.571334183216


[2017-03-03 00:24:15,806] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000183.mp4



Episode Reward: 2.0
Step 0 (126361) @ Episode 184/200, loss: None
Step 100 (126461) @ Episode 184/200, loss: 0.00687950849533
Step 200 (126561) @ Episode 184/200, loss: 0.0319221243262


[2017-03-03 00:24:29,289] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000184.mp4



Episode Reward: 0.0
Step 0 (126561) @ Episode 185/200, loss: None
Step 100 (126661) @ Episode 185/200, loss: 0.0114036947489
Step 200 (126761) @ Episode 185/200, loss: 0.0149519816041
Step 300 (126861) @ Episode 185/200, loss: 0.0165387876332


[2017-03-03 00:24:50,493] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000185.mp4



Episode Reward: 3.0
Step 0 (126869) @ Episode 186/200, loss: None
Step 100 (126969) @ Episode 186/200, loss: 0.0160427838564


[2017-03-03 00:25:02,249] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000186.mp4



Episode Reward: 0.0
Step 0 (127035) @ Episode 187/200, loss: None
Step 100 (127135) @ Episode 187/200, loss: 0.0102230533957


[2017-03-03 00:25:14,690] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000187.mp4



Episode Reward: 0.0
Step 0 (127215) @ Episode 188/200, loss: None
Step 100 (127315) @ Episode 188/200, loss: 0.805564165115


[2017-03-03 00:25:26,144] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000188.mp4



Episode Reward: 0.0
Step 0 (127397) @ Episode 189/200, loss: None
Step 100 (127497) @ Episode 189/200, loss: 0.0215444397181
Step 200 (127597) @ Episode 189/200, loss: 0.0208150558174
Step 300 (127697) @ Episode 189/200, loss: 0.011538086459
Step 400 (127797) @ Episode 189/200, loss: 0.0209173690528


[2017-03-03 00:25:51,249] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000189.mp4



Episode Reward: 4.0
Step 0 (127800) @ Episode 190/200, loss: None
Step 100 (127900) @ Episode 190/200, loss: 0.00676039094105
Step 200 (128000) @ Episode 190/200, loss: 0.00769323529676


[2017-03-03 00:26:07,809] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000190.mp4



Episode Reward: 1.0
Step 0 (128048) @ Episode 191/200, loss: None
Step 100 (128148) @ Episode 191/200, loss: 0.0188391264528


[2017-03-03 00:26:19,126] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000191.mp4



Episode Reward: 0.0
Step 0 (128227) @ Episode 192/200, loss: None
Step 100 (128327) @ Episode 192/200, loss: 0.00884436722845
Step 200 (128427) @ Episode 192/200, loss: 0.577327787876


[2017-03-03 00:26:34,026] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000192.mp4



Episode Reward: 1.0
Step 0 (128457) @ Episode 193/200, loss: None
Step 100 (128557) @ Episode 193/200, loss: 0.00861285813153
Step 200 (128657) @ Episode 193/200, loss: 0.00582063570619
Step 300 (128757) @ Episode 193/200, loss: 0.027069054544


[2017-03-03 00:26:54,435] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000193.mp4



Episode Reward: 3.0
Step 0 (128783) @ Episode 194/200, loss: None
Step 100 (128883) @ Episode 194/200, loss: 0.00611700117588
Step 200 (128983) @ Episode 194/200, loss: 0.030742527917


[2017-03-03 00:27:12,864] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000194.mp4



Episode Reward: 1.0
Step 0 (129026) @ Episode 195/200, loss: None
Step 100 (129126) @ Episode 195/200, loss: 0.0120208263397
Step 200 (129226) @ Episode 195/200, loss: 0.00688909739256
Step 300 (129326) @ Episode 195/200, loss: 0.00949986185879


[2017-03-03 00:27:33,014] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000195.mp4



Episode Reward: 2.0
Step 0 (129333) @ Episode 196/200, loss: None
Step 100 (129433) @ Episode 196/200, loss: 0.0144713213667
Step 200 (129533) @ Episode 196/200, loss: 0.00575319258496
Step 300 (129633) @ Episode 196/200, loss: 0.0378997474909


[2017-03-03 00:27:55,332] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000196.mp4



Episode Reward: 3.0
Step 0 (129658) @ Episode 197/200, loss: None
Step 100 (129758) @ Episode 197/200, loss: 0.00762471044436
Step 200 (129858) @ Episode 197/200, loss: 0.00479747727513


[2017-03-03 00:28:10,510] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000197.mp4



Episode Reward: 0.0
Step 0 (129875) @ Episode 198/200, loss: None
Step 100 (129975) @ Episode 198/200, loss: 0.00616876780987
Copying parameters from q_estimator to target...

Step 200 (130075) @ Episode 198/200, loss: 0.00834610871971


[2017-03-03 00:28:26,397] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000198.mp4



Episode Reward: 1.0
Step 0 (130119) @ Episode 199/200, loss: None
Step 100 (130219) @ Episode 199/200, loss: 0.0441041067243

Episode Reward: 0.0


[2017-03-03 00:28:37,826] Starting new video recorder writing to /home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor/openaigym.video.2.4056.video000199.mp4


Step 0 (130291) @ Episode 200/200, loss: None
Step 100 (130391) @ Episode 200/200, loss: 0.0409513115883
Step 200 (130491) @ Episode 200/200, loss: 0.0338000915945


[2017-03-03 00:28:55,020] Finished writing results. You can upload them to the scoreboard via gym.upload('/home/cully/git/reinforcement-learning/DQN/experiments/Breakout-v0/monitor')



Episode Reward: 1.0


In [16]:
stats.

array([ 1.,  0.,  1.,  3.,  0.,  0.,  0.,  1.,  1.,  1.])

In [104]:
state = env.reset()

In [121]:
Transition = namedtuple('Transition', ['state', 'action', 'reward', 'next_state', 'done'])
state = env.reset()
print state.shape
with tf.Session() as sess:
    state = sp.process(sess, state)
    #print(state)
    print(state.shape)
    state = np.stack([state]*4, axis=2)
    print(state.shape)
    action = np.random.randint(4)
    next_state, reward, done, _ = env.step(action)
    print(next_state.shape)
    next_state = sp.process(sess, next_state)
    print(next_state.shape)
    print(state[:,:,1:].shape)
    next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)
    print(next_state.shape)
    trans = Transition(state, action, reward, next_state, done)
    print(trans)

(210, 160, 3)
(84, 84)
(84, 84, 4)
(210, 160, 3)
(84, 84)
(84, 84, 3)
(84, 84, 4)
Transition(state=array([[[142, 142, 142, 142],
        [142, 142, 142, 142],
        [142, 142, 142, 142],
        ..., 
        [142, 142, 142, 142],
        [142, 142, 142, 142],
        [142, 142, 142, 142]],

       [[142, 142, 142, 142],
        [142, 142, 142, 142],
        [142, 142, 142, 142],
        ..., 
        [142, 142, 142, 142],
        [142, 142, 142, 142],
        [142, 142, 142, 142]],

       [[142, 142, 142, 142],
        [142, 142, 142, 142],
        [142, 142, 142, 142],
        ..., 
        [142, 142, 142, 142],
        [142, 142, 142, 142],
        [142, 142, 142, 142]],

       ..., 
       [[142, 142, 142, 142],
        [142, 142, 142, 142],
        [142, 142, 142, 142],
        ..., 
        [142, 142, 142, 142],
        [142, 142, 142, 142],
        [142, 142, 142, 142]],

       [[127, 127, 127, 127],
        [127, 127, 127, 127],
        [127, 127, 127, 127],
        ..., 
