<a href="https://colab.research.google.com/github/royaditya12/Reinforcement-Learning-Projects-Self/blob/main/DQN_CartPole.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import gymnasium as gym
import numpy as np
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras import layers, Model
import wandb
import os

In [6]:
np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

In [7]:
# Global memory buffer
memory = deque(maxlen=2000)

In [8]:
class DQN(Model):
    def __init__(self, action_size, **kwargs):
        super(DQN, self).__init__(**kwargs)
        self.action_size = action_size
        self.d1 = layers.Dense(24, activation='relu', name='d1')
        self.d2 = layers.Dense(24, activation='relu', name='d2')
        self.d3 = layers.Dense(action_size, activation='linear', name='d3')

    def call(self, x):
        x = self.d1(x)
        x = self.d2(x)
        return self.d3(x)

    # Configs for loading the saved model file later on
    def get_config(self):
        config = super(DQN, self).get_config()
        config.update({"action_size": self.action_size})
        return config

    @classmethod
    def from_config(cls, config):
        return cls(**config)


In [9]:
class Agent:
    def __init__(self, state_size, action_size, gamma=0.99, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995, learning_rate=0.001):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.learning_rate = learning_rate

        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)

    def _build_model(self):
        return DQN(self.action_size)

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model(np.array([state]))
        return np.argmax(q_values[0].numpy())

    def save_model(self, filepath):
        self.model.save(filepath)

    def load_model(self, filepath):
        # Load the saved model from the specified filepath
        self.model = tf.keras.models.load_model(filepath, custom_objects={"DQN": DQN})
        self.target_model = tf.keras.models.load_model(filepath, custom_objects={"DQN": DQN})

    def replay(self, batch_size):
        minibatch = random.sample(memory, batch_size)
        total_loss = 0
        for state, action, reward, next_state, done in minibatch:
            with tf.GradientTape() as tape:
                q_values = self.model(np.array([state]), training=True)
                q_value = q_values[0][action]

                if done:
                    target = reward
                else:
                    next_action = np.argmax(self.model(np.array([next_state]))[0].numpy())
                    t = self.target_model(np.array([next_state]))[0][next_action]
                    target = reward + self.gamma * t

                loss = tf.reduce_mean(tf.square(target - q_value))

            grads = tape.gradient(loss, self.model.trainable_variables)
            self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
            total_loss += loss.numpy()

        return total_loss / len(minibatch)


In [13]:
def train_dqn(config=None):
    #Training function for W&B sweep
    with wandb.init(config=config) as run:
        config = wandb.config

        # Clear memory for new run
        memory.clear()

        # Environment setup
        env = gym.make('CartPole-v1')
        state_size = env.observation_space.shape[0]
        action_size = env.action_space.n

        # Hyperparameters from sweep
        batch_size = config.batch_size
        n_episodes = 500
        gamma = config.gamma
        epsilon = 1.0
        epsilon_min = 0.01
        epsilon_decay = 0.995
        learning_rate = config.learning_rate
        update_target_every = config.update_target_every

        output_dir = f'./cartpole_model_sweep_{run.id}/'

        # Create the output directory if it doesn't exist
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Initialize the Agent
        agent = Agent(state_size, action_size, gamma=gamma, epsilon=epsilon,
                     epsilon_min=epsilon_min, epsilon_decay=epsilon_decay,
                     learning_rate=learning_rate)
        done = False

        # Main training loop
        for e in range(n_episodes):
            state = env.reset()[0]
            state = np.reshape(state, [1, state_size])
            total_reward = 0

            for time_t in range(500):
                action = agent.act(state[0])
                next_state, reward, done, truncated, _ = env.step(action)
                done = done or truncated
                next_state = np.reshape(next_state, [1, state_size])
                agent.remember(state[0], action, reward, next_state[0], done)
                state = next_state
                total_reward += reward

                if done:
                    print(f"Episode: {e}/{n_episodes}, Score: {time_t}, Epsilon: {agent.epsilon:.2f}")
                    break

            loss = None
            if len(memory) > batch_size:
                loss = agent.replay(batch_size)

            # Update epsilon
            if agent.epsilon > agent.epsilon_min:
                agent.epsilon *= agent.epsilon_decay

            # Update target network
            if e % update_target_every == 0:
                agent.update_target_model()

            # Log metrics to W&B
            wandb.log({
                'score': time_t if done else 500,
                'total_reward': total_reward,
                'loss': loss if loss is not None else 0,
            })

            # Save model periodically
            if e % 100 == 0:
                agent.save_model(os.path.join(output_dir, f'model_{e}.keras'))

        # Save final model
        agent.save_model(os.path.join(output_dir, f'model_500.keras'))
        env.close()


In [None]:
# sweep configuration
sweep_config = {
    'method': 'random',
    'metric': {
        'name': 'total_reward',
        'goal': 'maximize'
    },
    'parameters': {
        'gamma': {
            'values': [0.90, 0.95, 0.99]
        },
        'learning_rate': {
            'values': [0.0001, 0.0005, 0.001, 0.005]
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'update_target_every': {
            'values': [5, 10, 20]
        }
    }
}

In [None]:
if __name__ == "__main__":
    wandb.login()

    # Initialize sweep
    sweep_id = wandb.sweep(sweep_config, project="dqn-cartpole-sweep")

    # Run sweep
    wandb.agent(sweep_id, function=train_dqn)

    print("\n=== Sweep completed! ===")

Create sweep with ID: pixl50yn
Sweep URL: https://wandb.ai/adityaroy-iit-kharagpur/dqn-cartpole-sweep/sweeps/pixl50yn


[34m[1mwandb[0m: Agent Starting Run: yiy1xep2 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	update_target_every: 10


Episode: 0/500, Score: 10, Epsilon: 1.00
Episode: 1/500, Score: 46, Epsilon: 0.99


  return saving_lib.save_model(model, filepath)


Episode: 2/500, Score: 42, Epsilon: 0.99
Episode: 3/500, Score: 10, Epsilon: 0.99
Episode: 4/500, Score: 13, Epsilon: 0.98
Episode: 5/500, Score: 13, Epsilon: 0.98
Episode: 6/500, Score: 20, Epsilon: 0.97
Episode: 7/500, Score: 21, Epsilon: 0.97
Episode: 8/500, Score: 16, Epsilon: 0.96
Episode: 9/500, Score: 13, Epsilon: 0.96
Episode: 10/500, Score: 17, Epsilon: 0.95
Episode: 11/500, Score: 19, Epsilon: 0.95
Episode: 12/500, Score: 8, Epsilon: 0.94
Episode: 13/500, Score: 16, Epsilon: 0.94
Episode: 14/500, Score: 18, Epsilon: 0.93
Episode: 15/500, Score: 45, Epsilon: 0.93
Episode: 16/500, Score: 22, Epsilon: 0.92
Episode: 17/500, Score: 13, Epsilon: 0.92
Episode: 18/500, Score: 18, Epsilon: 0.91
Episode: 19/500, Score: 23, Epsilon: 0.91
Episode: 20/500, Score: 15, Epsilon: 0.90
Episode: 21/500, Score: 8, Epsilon: 0.90
Episode: 22/500, Score: 33, Epsilon: 0.90
Episode: 23/500, Score: 36, Epsilon: 0.89
Episode: 24/500, Score: 15, Epsilon: 0.89
Episode: 25/500, Score: 16, Epsilon: 0.88
Ep

0,1
loss,▂▁▁▄▁▃▂▁▄▁▅▄█▂▃▁▅▂▃▇█▁▆▁▁▁▄▁▁▁▁▁▁▁▁▁▆▂▁▅
score,▁▁▁▁▂▁▁▄▂▁▃▃▂▂▂▃▄▂▂▂▂▃▃▃▃▂▄▅▅▅▄▅▃█▃▅▅▆▄▆
total_reward,▁▁▁▁▁▁▁▂▁▁▁▂▂▄▂▆▂▂▅▂▁▂▄▄▄▆▅▆▅▅▆▇█▁▄▅▆▅▇▁

0,1
loss,0.06473
score,296.0
total_reward,297.0


[34m[1mwandb[0m: Agent Starting Run: 3vn29ymi with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	update_target_every: 5


Episode: 0/500, Score: 19, Epsilon: 1.00
Episode: 1/500, Score: 41, Epsilon: 0.99
Episode: 2/500, Score: 26, Epsilon: 0.99
Episode: 3/500, Score: 10, Epsilon: 0.99
Episode: 4/500, Score: 34, Epsilon: 0.98
Episode: 5/500, Score: 11, Epsilon: 0.98
Episode: 6/500, Score: 27, Epsilon: 0.97
Episode: 7/500, Score: 43, Epsilon: 0.97
Episode: 8/500, Score: 23, Epsilon: 0.96
Episode: 9/500, Score: 21, Epsilon: 0.96
Episode: 10/500, Score: 11, Epsilon: 0.95
Episode: 11/500, Score: 12, Epsilon: 0.95
Episode: 12/500, Score: 9, Epsilon: 0.94
Episode: 13/500, Score: 13, Epsilon: 0.94
Episode: 14/500, Score: 25, Epsilon: 0.93
Episode: 15/500, Score: 33, Epsilon: 0.93
Episode: 16/500, Score: 17, Epsilon: 0.92
Episode: 17/500, Score: 12, Epsilon: 0.92
Episode: 18/500, Score: 14, Epsilon: 0.91
Episode: 19/500, Score: 32, Epsilon: 0.91
Episode: 20/500, Score: 28, Epsilon: 0.90
Episode: 21/500, Score: 16, Epsilon: 0.90
Episode: 22/500, Score: 10, Epsilon: 0.90
Episode: 23/500, Score: 12, Epsilon: 0.89
Epi

0,1
loss,▂▁▅▄▅█▇▆▃▃▁▃▆▁▄▁▃▁▃▁▄▅▄▃▁▁▁▁▂▃▁▁▁▁▁▁▁▁▁▁
score,▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▂▁▁▂▁▂▃▂▃▁▂▂▂▂▃▃▃▄▃▃▃▁▃▅█
total_reward,▁▁▁▁▁▂▁▁▁▂▁▁▁▁▁▁▁▁▃▁▁▁▂▂▁▂▂▂▂▅▅▄▅▄▅█▂███

0,1
loss,0.00251
score,499.0
total_reward,500.0


[34m[1mwandb[0m: Agent Starting Run: k3oohx69 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	update_target_every: 5


Episode: 0/500, Score: 32, Epsilon: 1.00
Episode: 1/500, Score: 12, Epsilon: 0.99
Episode: 2/500, Score: 44, Epsilon: 0.99
Episode: 3/500, Score: 20, Epsilon: 0.99
Episode: 4/500, Score: 13, Epsilon: 0.98
Episode: 5/500, Score: 48, Epsilon: 0.98
Episode: 6/500, Score: 19, Epsilon: 0.97
Episode: 7/500, Score: 9, Epsilon: 0.97
Episode: 8/500, Score: 23, Epsilon: 0.96
Episode: 9/500, Score: 11, Epsilon: 0.96
Episode: 10/500, Score: 13, Epsilon: 0.95
Episode: 11/500, Score: 22, Epsilon: 0.95
Episode: 12/500, Score: 16, Epsilon: 0.94
Episode: 13/500, Score: 39, Epsilon: 0.94
Episode: 14/500, Score: 15, Epsilon: 0.93
Episode: 15/500, Score: 28, Epsilon: 0.93
Episode: 16/500, Score: 18, Epsilon: 0.92
Episode: 17/500, Score: 11, Epsilon: 0.92
Episode: 18/500, Score: 19, Epsilon: 0.91
Episode: 19/500, Score: 37, Epsilon: 0.91
Episode: 20/500, Score: 22, Epsilon: 0.90
Episode: 21/500, Score: 11, Epsilon: 0.90
Episode: 22/500, Score: 13, Epsilon: 0.90
Episode: 23/500, Score: 13, Epsilon: 0.89
Epi

0,1
loss,▁▂▂▂▆▃█▇▄▁▅▁▄▁▃▅▁▄▁▁▁▆▅▁▅▁█▁▅█▁▁▆▄▁▃▁▁▁▁
score,▃▁▂▁▁▂▄▃▁▁▁▁▃▁▂▁▁▂▂▃▂▂▃▂▂▄▂▃▃▄▂▃▂▅█▄▃▅▄▃
total_reward,▃▁▁▁▃▁▂▂▂▃▄▁▂▂▁▄▃▂▂▁▂▄▄▃▅▄▃█▆▃▅▃▃▃▇▅▄▄▃▆

0,1
loss,0.03001
score,52.0
total_reward,53.0


[34m[1mwandb[0m: Agent Starting Run: bajxtbqk with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	update_target_every: 20


Episode: 0/500, Score: 17, Epsilon: 1.00
Episode: 1/500, Score: 20, Epsilon: 0.99
Episode: 2/500, Score: 38, Epsilon: 0.99
Episode: 3/500, Score: 11, Epsilon: 0.99
Episode: 4/500, Score: 10, Epsilon: 0.98
Episode: 5/500, Score: 15, Epsilon: 0.98
Episode: 6/500, Score: 37, Epsilon: 0.97
Episode: 7/500, Score: 14, Epsilon: 0.97
Episode: 8/500, Score: 10, Epsilon: 0.96
Episode: 9/500, Score: 14, Epsilon: 0.96
Episode: 10/500, Score: 19, Epsilon: 0.95
Episode: 11/500, Score: 21, Epsilon: 0.95
Episode: 12/500, Score: 10, Epsilon: 0.94
Episode: 13/500, Score: 10, Epsilon: 0.94
Episode: 14/500, Score: 14, Epsilon: 0.93
Episode: 15/500, Score: 30, Epsilon: 0.93
Episode: 16/500, Score: 22, Epsilon: 0.92
Episode: 17/500, Score: 11, Epsilon: 0.92
Episode: 18/500, Score: 19, Epsilon: 0.91
Episode: 19/500, Score: 18, Epsilon: 0.91
Episode: 20/500, Score: 16, Epsilon: 0.90
Episode: 21/500, Score: 21, Epsilon: 0.90
Episode: 22/500, Score: 27, Epsilon: 0.90
Episode: 23/500, Score: 20, Epsilon: 0.89
Ep

0,1
loss,▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂▁▁▂▁▁▂▂▁▁▁▁▁▁▃▁▄▁▁█▁▁▁▁▁▁
score,▂▁▁▁▁▁▂▁▁▂▁▁▂▂▄▄▁▂▁▃▂▂▃▂▄▁▄▄▃█▆▄▃▂▃▁▁▄▂▃
total_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▂▂▂▂▂▁▁▂▂▃▃▃▁▁▂█▂▃▃▃▃▁

0,1
loss,0.05085
score,60.0
total_reward,61.0


[34m[1mwandb[0m: Agent Starting Run: t013dcq1 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	update_target_every: 20


Episode: 0/500, Score: 10, Epsilon: 1.00
Episode: 1/500, Score: 16, Epsilon: 0.99
Episode: 2/500, Score: 8, Epsilon: 0.99
Episode: 3/500, Score: 14, Epsilon: 0.99
Episode: 4/500, Score: 10, Epsilon: 0.98
Episode: 5/500, Score: 24, Epsilon: 0.98
Episode: 6/500, Score: 39, Epsilon: 0.97
Episode: 7/500, Score: 9, Epsilon: 0.97
Episode: 8/500, Score: 11, Epsilon: 0.96
Episode: 9/500, Score: 27, Epsilon: 0.96
Episode: 10/500, Score: 17, Epsilon: 0.95
Episode: 11/500, Score: 32, Epsilon: 0.95
Episode: 12/500, Score: 24, Epsilon: 0.94
Episode: 13/500, Score: 12, Epsilon: 0.94
Episode: 14/500, Score: 23, Epsilon: 0.93
Episode: 15/500, Score: 36, Epsilon: 0.93
Episode: 16/500, Score: 14, Epsilon: 0.92
Episode: 17/500, Score: 12, Epsilon: 0.92
Episode: 18/500, Score: 16, Epsilon: 0.91
Episode: 19/500, Score: 17, Epsilon: 0.91
Episode: 20/500, Score: 13, Epsilon: 0.90
Episode: 21/500, Score: 20, Epsilon: 0.90
Episode: 22/500, Score: 19, Epsilon: 0.90
Episode: 23/500, Score: 33, Epsilon: 0.89
Epis

0,1
loss,▁▁▁▁▁▁▁▁▄▂▁▃▂▁▃▃▄▃▅▁▄█▄▂▆▇▄▂▆▆▇▆▄█▅▃▁▅▄▂
score,█▄▃▂▃▂▂▂▂▂▃▂▂▁▂▃▂▂▃▃▂▁▁▂▁▁▁▃▂▂▁▁▂▁▂▁▂▂▃▁
total_reward,█▃▇▃▃▇▂▂▂▂▃▂▂▂▁▂▂▃▁▁▂▂▁▃▁▂▁▂▄▁▁▂▂▁▂▁▁▁▁▁

0,1
loss,2.75764
score,8.0
total_reward,9.0


[34m[1mwandb[0m: Agent Starting Run: pof9oybh with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	update_target_every: 20


Episode: 0/500, Score: 22, Epsilon: 1.00
Episode: 1/500, Score: 12, Epsilon: 0.99
Episode: 2/500, Score: 17, Epsilon: 0.99
Episode: 3/500, Score: 13, Epsilon: 0.99
Episode: 4/500, Score: 27, Epsilon: 0.98
Episode: 5/500, Score: 28, Epsilon: 0.98
Episode: 6/500, Score: 15, Epsilon: 0.97
Episode: 7/500, Score: 19, Epsilon: 0.97
Episode: 8/500, Score: 14, Epsilon: 0.96
Episode: 9/500, Score: 16, Epsilon: 0.96
Episode: 10/500, Score: 24, Epsilon: 0.95
Episode: 11/500, Score: 24, Epsilon: 0.95
Episode: 12/500, Score: 18, Epsilon: 0.94
Episode: 13/500, Score: 20, Epsilon: 0.94
Episode: 14/500, Score: 16, Epsilon: 0.93
Episode: 15/500, Score: 26, Epsilon: 0.93
Episode: 16/500, Score: 15, Epsilon: 0.92
Episode: 17/500, Score: 13, Epsilon: 0.92
Episode: 18/500, Score: 37, Epsilon: 0.91
Episode: 19/500, Score: 13, Epsilon: 0.91
Episode: 20/500, Score: 32, Epsilon: 0.90
Episode: 21/500, Score: 13, Epsilon: 0.90
Episode: 22/500, Score: 15, Epsilon: 0.90
Episode: 23/500, Score: 35, Epsilon: 0.89
Ep

0,1
loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▁▁▁▂▂▃▁▂▁▂▁▁▃▅▄▂▃█▁▁█▃
score,▁▁▁▁▁▁▁▁▃▂▁▇█▅▁▁▇▁▂▂▄▄▅▆▁▂▅▁▁▄▁▁▁▂▁▁▁▁▁▁
total_reward,▁▁▁▁▁▁▁▂▁▁▂▂▂▃▄▁▁▄▃▁▁▁▄▁▄▅▄▅▁▁▇▇█▁▁▁▂▁▁▁

0,1
loss,16.76323
score,12.0
total_reward,13.0


[34m[1mwandb[0m: Agent Starting Run: o9lbxxzd with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	update_target_every: 20
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


Episode: 0/500, Score: 29, Epsilon: 1.00
Episode: 1/500, Score: 10, Epsilon: 0.99
Episode: 2/500, Score: 15, Epsilon: 0.99
Episode: 3/500, Score: 35, Epsilon: 0.99
Episode: 4/500, Score: 18, Epsilon: 0.98
Episode: 5/500, Score: 14, Epsilon: 0.98
Episode: 6/500, Score: 15, Epsilon: 0.97
Episode: 7/500, Score: 9, Epsilon: 0.97
Episode: 8/500, Score: 8, Epsilon: 0.96
Episode: 9/500, Score: 17, Epsilon: 0.96
Episode: 10/500, Score: 20, Epsilon: 0.95
Episode: 11/500, Score: 21, Epsilon: 0.95
Episode: 12/500, Score: 10, Epsilon: 0.94
Episode: 13/500, Score: 14, Epsilon: 0.94
Episode: 14/500, Score: 27, Epsilon: 0.93
Episode: 15/500, Score: 27, Epsilon: 0.93
Episode: 16/500, Score: 13, Epsilon: 0.92
Episode: 17/500, Score: 17, Epsilon: 0.92
Episode: 18/500, Score: 11, Epsilon: 0.91
Episode: 19/500, Score: 19, Epsilon: 0.91
Episode: 20/500, Score: 22, Epsilon: 0.90
Episode: 21/500, Score: 13, Epsilon: 0.90
Episode: 22/500, Score: 12, Epsilon: 0.90
Episode: 23/500, Score: 29, Epsilon: 0.89
Epis

0,1
loss,▂▂▂▂▂▂▂▁▂▂▂▂▁▁▃▁▄▇▁▆▂▃▄▇▅▄▅▄▃█▁▆▃▃▄▂▂▁▂▄
score,▃▄▃▅▁█▄▆▂▂▂▆▄▂▃▁▁▂▂▂▁▄▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
total_reward,▃▆▅▂▃▂▄▄▂▁▂▂█▂▆▁▅▁▁▁▁▁▂▁▁▁▁▂▁▂▁▂▂▁▁▂▁▁▁▁

0,1
loss,0.83832
score,8.0
total_reward,9.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hwfb8k8y with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	update_target_every: 20


Episode: 0/500, Score: 16, Epsilon: 1.00
Episode: 1/500, Score: 39, Epsilon: 0.99
Episode: 2/500, Score: 48, Epsilon: 0.99
Episode: 3/500, Score: 13, Epsilon: 0.99
Episode: 4/500, Score: 15, Epsilon: 0.98
Episode: 5/500, Score: 15, Epsilon: 0.98
Episode: 6/500, Score: 18, Epsilon: 0.97
Episode: 7/500, Score: 13, Epsilon: 0.97
Episode: 8/500, Score: 16, Epsilon: 0.96
Episode: 9/500, Score: 13, Epsilon: 0.96
Episode: 10/500, Score: 21, Epsilon: 0.95
Episode: 11/500, Score: 14, Epsilon: 0.95
Episode: 12/500, Score: 12, Epsilon: 0.94
Episode: 13/500, Score: 17, Epsilon: 0.94
Episode: 14/500, Score: 19, Epsilon: 0.93
Episode: 15/500, Score: 55, Epsilon: 0.93
Episode: 16/500, Score: 16, Epsilon: 0.92
Episode: 17/500, Score: 10, Epsilon: 0.92
Episode: 18/500, Score: 34, Epsilon: 0.91
Episode: 19/500, Score: 10, Epsilon: 0.91
Episode: 20/500, Score: 16, Epsilon: 0.90
Episode: 21/500, Score: 31, Epsilon: 0.90
Episode: 22/500, Score: 8, Epsilon: 0.90
Episode: 23/500, Score: 14, Epsilon: 0.89
Epi

0,1
loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▂▁▂▁▁▁▂▃▁▂▃▁▃▇▅▂▄██▃▂▄
score,▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▅▁▄▃▆█▁▇▁▂▄▂▁▂▃▃▁▂▁▃▁▁▄▃
total_reward,▁▁▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▅▁█▄▁▃▃▃▂▄▂▄▁▁▁▃▁▄▁▄▁▄

0,1
loss,2.15982
score,59.0
total_reward,60.0


[34m[1mwandb[0m: Agent Starting Run: xh3faf1t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	update_target_every: 20


Episode: 0/500, Score: 21, Epsilon: 1.00
Episode: 1/500, Score: 11, Epsilon: 0.99
Episode: 2/500, Score: 16, Epsilon: 0.99
Episode: 3/500, Score: 21, Epsilon: 0.99
Episode: 4/500, Score: 38, Epsilon: 0.98
Episode: 5/500, Score: 18, Epsilon: 0.98
Episode: 6/500, Score: 20, Epsilon: 0.97
Episode: 7/500, Score: 10, Epsilon: 0.97
Episode: 8/500, Score: 13, Epsilon: 0.96
Episode: 9/500, Score: 14, Epsilon: 0.96
Episode: 10/500, Score: 28, Epsilon: 0.95
Episode: 11/500, Score: 31, Epsilon: 0.95
Episode: 12/500, Score: 12, Epsilon: 0.94
Episode: 13/500, Score: 16, Epsilon: 0.94
Episode: 14/500, Score: 16, Epsilon: 0.93
Episode: 15/500, Score: 15, Epsilon: 0.93
Episode: 16/500, Score: 20, Epsilon: 0.92
Episode: 17/500, Score: 17, Epsilon: 0.92
Episode: 18/500, Score: 22, Epsilon: 0.91
Episode: 19/500, Score: 30, Epsilon: 0.91
Episode: 20/500, Score: 19, Epsilon: 0.90
Episode: 21/500, Score: 24, Epsilon: 0.90
Episode: 22/500, Score: 32, Epsilon: 0.90
Episode: 23/500, Score: 38, Epsilon: 0.89
Ep

0,1
loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▁▂▁▂▁▃▁▂▃▇▄█
score,▁▁▁▂▁▁▂▁▂▁▁▁▁▂▂▁▁▆▅█▅█▁▃▆▄▂▃▁▄▃▁▂▂▃▃▂▁▁▂
total_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▄▂█▂▄▁▃▁▃▁▂▃▃▂▁▂▂▁▃▂▁▂

0,1
loss,14.91498
score,95.0
total_reward,96.0


[34m[1mwandb[0m: Agent Starting Run: l3a7p5ci with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	update_target_every: 10


Episode: 0/500, Score: 14, Epsilon: 1.00
Episode: 1/500, Score: 29, Epsilon: 0.99
Episode: 2/500, Score: 33, Epsilon: 0.99
Episode: 3/500, Score: 25, Epsilon: 0.99
Episode: 4/500, Score: 13, Epsilon: 0.98
Episode: 5/500, Score: 22, Epsilon: 0.98
Episode: 6/500, Score: 25, Epsilon: 0.97
Episode: 7/500, Score: 16, Epsilon: 0.97
Episode: 8/500, Score: 21, Epsilon: 0.96
Episode: 9/500, Score: 22, Epsilon: 0.96
Episode: 10/500, Score: 26, Epsilon: 0.95
Episode: 11/500, Score: 21, Epsilon: 0.95
Episode: 12/500, Score: 41, Epsilon: 0.94
Episode: 13/500, Score: 29, Epsilon: 0.94
Episode: 14/500, Score: 10, Epsilon: 0.93
Episode: 15/500, Score: 28, Epsilon: 0.93
Episode: 16/500, Score: 29, Epsilon: 0.92
Episode: 17/500, Score: 17, Epsilon: 0.92
Episode: 18/500, Score: 8, Epsilon: 0.91
Episode: 19/500, Score: 14, Epsilon: 0.91
Episode: 20/500, Score: 12, Epsilon: 0.90
Episode: 21/500, Score: 13, Epsilon: 0.90
Episode: 22/500, Score: 33, Epsilon: 0.90
Episode: 23/500, Score: 9, Epsilon: 0.89
Epis

0,1
loss,▁▁▁▁▁▂▂▄▃▃▁▂▁▂▁▁▃▂▅▁▁▁▁▄▁▁▁▁▁▁▁▁▁▁█▁▁▁▅▁
score,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▃▂▁▂▃▃▃▁▂▃▄▃▆▄▁▅█▄▃▁▁▁▁▃
total_reward,▁▁▁▁▁▁▁▁▁▂▂▁▁▂▂▄▁▃▂▅▂▂▃▃▃▃▄▅▆▆▅█▅█▄▁▄▁▅▂

0,1
loss,8.74965
score,200.0
total_reward,201.0


[34m[1mwandb[0m: Agent Starting Run: yin3xet6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	update_target_every: 20


Episode: 0/500, Score: 11, Epsilon: 1.00
Episode: 1/500, Score: 14, Epsilon: 0.99
Episode: 2/500, Score: 74, Epsilon: 0.99
Episode: 3/500, Score: 15, Epsilon: 0.99
Episode: 4/500, Score: 23, Epsilon: 0.98
Episode: 5/500, Score: 14, Epsilon: 0.98
Episode: 6/500, Score: 22, Epsilon: 0.97
Episode: 7/500, Score: 30, Epsilon: 0.97
Episode: 8/500, Score: 10, Epsilon: 0.96
Episode: 9/500, Score: 9, Epsilon: 0.96
Episode: 10/500, Score: 14, Epsilon: 0.95
Episode: 11/500, Score: 10, Epsilon: 0.95
Episode: 12/500, Score: 14, Epsilon: 0.94
Episode: 13/500, Score: 18, Epsilon: 0.94
Episode: 14/500, Score: 20, Epsilon: 0.93
Episode: 15/500, Score: 20, Epsilon: 0.93
Episode: 16/500, Score: 18, Epsilon: 0.92
Episode: 17/500, Score: 38, Epsilon: 0.92
Episode: 18/500, Score: 27, Epsilon: 0.91
Episode: 19/500, Score: 13, Epsilon: 0.91
Episode: 20/500, Score: 48, Epsilon: 0.90
Episode: 21/500, Score: 24, Epsilon: 0.90
Episode: 22/500, Score: 13, Epsilon: 0.90
Episode: 23/500, Score: 11, Epsilon: 0.89
Epi

0,1
loss,▁▁▁▁▁▂▁▁▁▁▁▁▁▃▂▂▁▂▁▁▁▃▁▃▃▂▃▄█▅▅▃▁▅▄▁▃▂▆▁
score,▃▂▁▂▂▁▄▄▁▄▇▄▆▄▂▁▃▄▂▂▁▁▇▃▄▆▆▂▂▂█▂▁▁█▃▅▄▂▇
total_reward,▁▁▁▂▁▃▁▁▁▃▂▃▅█▃▁▅▁▃▁▁▆▂▄▂▅▅▆▁▁▁▁▃▄▅▃▄▅▄▃

0,1
loss,0.33424
score,82.0
total_reward,83.0


[34m[1mwandb[0m: Agent Starting Run: tihu7pts with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	update_target_every: 20


Episode: 0/500, Score: 23, Epsilon: 1.00
Episode: 1/500, Score: 17, Epsilon: 0.99
Episode: 2/500, Score: 33, Epsilon: 0.99
Episode: 3/500, Score: 10, Epsilon: 0.99
Episode: 4/500, Score: 42, Epsilon: 0.98
Episode: 5/500, Score: 13, Epsilon: 0.98
Episode: 6/500, Score: 12, Epsilon: 0.97
Episode: 7/500, Score: 10, Epsilon: 0.97
Episode: 8/500, Score: 17, Epsilon: 0.96
Episode: 9/500, Score: 31, Epsilon: 0.96
Episode: 10/500, Score: 21, Epsilon: 0.95
Episode: 11/500, Score: 14, Epsilon: 0.95
Episode: 12/500, Score: 28, Epsilon: 0.94
Episode: 13/500, Score: 23, Epsilon: 0.94
Episode: 14/500, Score: 22, Epsilon: 0.93
Episode: 15/500, Score: 16, Epsilon: 0.93
Episode: 16/500, Score: 18, Epsilon: 0.92
Episode: 17/500, Score: 38, Epsilon: 0.92
Episode: 18/500, Score: 10, Epsilon: 0.91
Episode: 19/500, Score: 11, Epsilon: 0.91
Episode: 20/500, Score: 11, Epsilon: 0.90
Episode: 21/500, Score: 36, Epsilon: 0.90
Episode: 22/500, Score: 15, Epsilon: 0.90
Episode: 23/500, Score: 12, Epsilon: 0.89
Ep

0,1
loss,▁▁▁▁▁▁▂▁▁▁▁▁▂▁▁▂▃▁▆▃▁▄▄▃▂▃▂▂▄▅█▄▄▂▄▄▃▂▄▃
score,▁▂▁▁▂▁▂▂▁▁▁▁▂▁▁▅▂▅▁▆▂▁▁▁▁▁▁▅▁▁▄▄▄▅▁█▁▁▆▇
total_reward,▁▁▁▁▃▁▃█▁▃▃▄▂▄▄▁▁▃▂▂▂▁▃▁▄▃▃▃▁▃▃▂▁▄▃▃▁▂▁▄

0,1
loss,0.59141
score,100.0
total_reward,101.0


[34m[1mwandb[0m: Agent Starting Run: l3smsdtu with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	update_target_every: 5


Episode: 0/500, Score: 20, Epsilon: 1.00
Episode: 1/500, Score: 11, Epsilon: 0.99
Episode: 2/500, Score: 14, Epsilon: 0.99
Episode: 3/500, Score: 10, Epsilon: 0.99
Episode: 4/500, Score: 39, Epsilon: 0.98
Episode: 5/500, Score: 25, Epsilon: 0.98
Episode: 6/500, Score: 10, Epsilon: 0.97
Episode: 7/500, Score: 12, Epsilon: 0.97
Episode: 8/500, Score: 22, Epsilon: 0.96
Episode: 9/500, Score: 30, Epsilon: 0.96
Episode: 10/500, Score: 21, Epsilon: 0.95
Episode: 11/500, Score: 32, Epsilon: 0.95
Episode: 12/500, Score: 24, Epsilon: 0.94
Episode: 13/500, Score: 19, Epsilon: 0.94
Episode: 14/500, Score: 10, Epsilon: 0.93
Episode: 15/500, Score: 16, Epsilon: 0.93
Episode: 16/500, Score: 46, Epsilon: 0.92
Episode: 17/500, Score: 15, Epsilon: 0.92
Episode: 18/500, Score: 14, Epsilon: 0.91
Episode: 19/500, Score: 18, Epsilon: 0.91
Episode: 20/500, Score: 13, Epsilon: 0.90
Episode: 21/500, Score: 10, Epsilon: 0.90
Episode: 22/500, Score: 11, Epsilon: 0.90
Episode: 23/500, Score: 10, Epsilon: 0.89
Ep

0,1
loss,▂▂▂▂▂▂▄▃▄█▁▇▃▁▄▆▁▆▂▆▃▂▂▁▃▃▁▁▃▂▁▁▄▁▁▁▄▁▆▁
score,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▃▂▃█▄▅█▆▄▇▄▃▁▅▃▄▃▄▄▆▃▃▃▆█▂
total_reward,▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▂▁▂▂▁▁▃▃█▄▅███▄▅▃█▃▃▃▃██▄

0,1
loss,0.00966
score,202.0
total_reward,203.0


[34m[1mwandb[0m: Agent Starting Run: nruutyqr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	update_target_every: 5


Episode: 0/500, Score: 21, Epsilon: 1.00
Episode: 1/500, Score: 13, Epsilon: 0.99
Episode: 2/500, Score: 15, Epsilon: 0.99
Episode: 3/500, Score: 38, Epsilon: 0.99
Episode: 4/500, Score: 41, Epsilon: 0.98
Episode: 5/500, Score: 48, Epsilon: 0.98
Episode: 6/500, Score: 17, Epsilon: 0.97
Episode: 7/500, Score: 13, Epsilon: 0.97
Episode: 8/500, Score: 17, Epsilon: 0.96
Episode: 9/500, Score: 14, Epsilon: 0.96
Episode: 10/500, Score: 18, Epsilon: 0.95
Episode: 11/500, Score: 16, Epsilon: 0.95
Episode: 12/500, Score: 12, Epsilon: 0.94
Episode: 13/500, Score: 20, Epsilon: 0.94
Episode: 14/500, Score: 51, Epsilon: 0.93
Episode: 15/500, Score: 20, Epsilon: 0.93
Episode: 16/500, Score: 11, Epsilon: 0.92
Episode: 17/500, Score: 18, Epsilon: 0.92
Episode: 18/500, Score: 30, Epsilon: 0.91
Episode: 19/500, Score: 40, Epsilon: 0.91
Episode: 20/500, Score: 13, Epsilon: 0.90
Episode: 21/500, Score: 13, Epsilon: 0.90
Episode: 22/500, Score: 63, Epsilon: 0.90
Episode: 23/500, Score: 15, Epsilon: 0.89
Ep

0,1
loss,▁▁▁▁▁▁▁▁▁▁▂▃▁▁▁█▁▁▁▆▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▁▁▁
score,▁▁▁▁▁▁▁▁▁▁▂▂▁▁▃▃▃▃▁▁▃▃▂▅▂▄▃▅▄▆▁▃▃▃▁▂▄█▃▅
total_reward,▂▁▁▂▁▁▁▂▁▁▁▁▂▂▁▃▄▃▃▃▃▅█▄▅▃▁▄▃▂▅▅█▅▃▅▄▄▃▃

0,1
loss,28.26465
score,237.0
total_reward,238.0


[34m[1mwandb[0m: Agent Starting Run: nwtso3jd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	update_target_every: 5


Episode: 0/500, Score: 36, Epsilon: 1.00
Episode: 1/500, Score: 19, Epsilon: 0.99
Episode: 2/500, Score: 24, Epsilon: 0.99
Episode: 3/500, Score: 11, Epsilon: 0.99
Episode: 4/500, Score: 24, Epsilon: 0.98
Episode: 5/500, Score: 15, Epsilon: 0.98
Episode: 6/500, Score: 10, Epsilon: 0.97
Episode: 7/500, Score: 17, Epsilon: 0.97
Episode: 8/500, Score: 11, Epsilon: 0.96
Episode: 9/500, Score: 25, Epsilon: 0.96
Episode: 10/500, Score: 14, Epsilon: 0.95
Episode: 11/500, Score: 16, Epsilon: 0.95
Episode: 12/500, Score: 9, Epsilon: 0.94
Episode: 13/500, Score: 11, Epsilon: 0.94
Episode: 14/500, Score: 25, Epsilon: 0.93
Episode: 15/500, Score: 21, Epsilon: 0.93
Episode: 16/500, Score: 11, Epsilon: 0.92
Episode: 17/500, Score: 14, Epsilon: 0.92
Episode: 18/500, Score: 17, Epsilon: 0.91
Episode: 19/500, Score: 14, Epsilon: 0.91
Episode: 20/500, Score: 14, Epsilon: 0.90
Episode: 21/500, Score: 11, Epsilon: 0.90
Episode: 22/500, Score: 14, Epsilon: 0.90
Episode: 23/500, Score: 18, Epsilon: 0.89
Epi

0,1
loss,▁▂▁▂▃▃▃▂▇▂▄▄▄▄▄▁▅▂▃▄▂▅▃▄▄▁▃▂▁▁▄▃▄▂▄▂▂▁▁█
score,▂▃▁▁▂█▂▁▂▂▁▂▂▂▂▁▁▁▁▁▁▁▂▁▂▂▁▃▂▂▂▁▂▂▃▄▁▂▁▁
total_reward,▄▂▃▆▁▂▂▄▂▂▂▄▁▂▃▂▁▁▂▁▂▂▂▂▂▂▄▂▂▂▃▂▃▃█▂▁▁▂▂

0,1
loss,13.52518
score,9.0
total_reward,10.0


[34m[1mwandb[0m: Agent Starting Run: 4eorh3nz with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	update_target_every: 10


Episode: 0/500, Score: 10, Epsilon: 1.00
Episode: 1/500, Score: 21, Epsilon: 0.99
Episode: 2/500, Score: 37, Epsilon: 0.99
Episode: 3/500, Score: 8, Epsilon: 0.99
Episode: 4/500, Score: 13, Epsilon: 0.98
Episode: 5/500, Score: 22, Epsilon: 0.98
Episode: 6/500, Score: 18, Epsilon: 0.97
Episode: 7/500, Score: 42, Epsilon: 0.97
Episode: 8/500, Score: 12, Epsilon: 0.96
Episode: 9/500, Score: 9, Epsilon: 0.96
Episode: 10/500, Score: 14, Epsilon: 0.95
Episode: 11/500, Score: 23, Epsilon: 0.95
Episode: 12/500, Score: 16, Epsilon: 0.94
Episode: 13/500, Score: 35, Epsilon: 0.94
Episode: 14/500, Score: 11, Epsilon: 0.93
Episode: 15/500, Score: 65, Epsilon: 0.93
Episode: 16/500, Score: 11, Epsilon: 0.92
Episode: 17/500, Score: 23, Epsilon: 0.92
Episode: 18/500, Score: 32, Epsilon: 0.91
Episode: 19/500, Score: 9, Epsilon: 0.91
Episode: 20/500, Score: 12, Epsilon: 0.90
Episode: 21/500, Score: 25, Epsilon: 0.90
Episode: 22/500, Score: 12, Epsilon: 0.90
Episode: 23/500, Score: 32, Epsilon: 0.89
Episo

0,1
loss,▂▂▁▂▁▁▁▁▁▂▆▃▂▁▂▃▁▁▁▁▁▁▁▁▁▁█▆▁▁▁▁▁▅▁▁▁▁▁▁
score,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▄▂▄▃▄▃▂▂▁▂▁▂▂▄▂▂▃▂▂▃▄█▄▅
total_reward,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▁▁▁▂▃▂▂▃▂▄▄▁▂▅▁▄▄▁▃▃▄█▇▂▃

0,1
loss,0.0279
score,271.0
total_reward,272.0


[34m[1mwandb[0m: Agent Starting Run: jrt788in with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	update_target_every: 5


Episode: 0/500, Score: 15, Epsilon: 1.00
Episode: 1/500, Score: 11, Epsilon: 0.99
Episode: 2/500, Score: 17, Epsilon: 0.99
Episode: 3/500, Score: 32, Epsilon: 0.99
Episode: 4/500, Score: 25, Epsilon: 0.98
Episode: 5/500, Score: 13, Epsilon: 0.98
Episode: 6/500, Score: 21, Epsilon: 0.97
Episode: 7/500, Score: 15, Epsilon: 0.97
Episode: 8/500, Score: 31, Epsilon: 0.96
Episode: 9/500, Score: 13, Epsilon: 0.96
Episode: 10/500, Score: 17, Epsilon: 0.95
Episode: 11/500, Score: 13, Epsilon: 0.95
Episode: 12/500, Score: 25, Epsilon: 0.94
Episode: 13/500, Score: 9, Epsilon: 0.94
Episode: 14/500, Score: 37, Epsilon: 0.93
Episode: 15/500, Score: 15, Epsilon: 0.93
Episode: 16/500, Score: 15, Epsilon: 0.92
Episode: 17/500, Score: 21, Epsilon: 0.92
Episode: 18/500, Score: 17, Epsilon: 0.91
Episode: 19/500, Score: 18, Epsilon: 0.91
Episode: 20/500, Score: 13, Epsilon: 0.90
Episode: 21/500, Score: 9, Epsilon: 0.90
Episode: 22/500, Score: 30, Epsilon: 0.90
Episode: 23/500, Score: 31, Epsilon: 0.89
Epis

0,1
loss,▂▂▂▂▃▃▃▆▂▁▁▇▅▅█▄▂▁█▂▂▁█▃▅▄▄▁▆▂▂▂▁▆▁▅▁▁▂▁
score,▂▂▂▆▂▆█▂▆▂▅▂▄▁▂▃▁▂▁▂▁▂▁▁▁▁▂▁▁▂▁▁▁▁▂▁▁▁▁▁
total_reward,▃█▆▃▄▂▂▂▂▂▂▂▄▂▁▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▂▂▂▂▂▂▂

0,1
loss,2.66247
score,8.0
total_reward,9.0


[34m[1mwandb[0m: Agent Starting Run: g74xxg5y with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	update_target_every: 10


Episode: 0/500, Score: 16, Epsilon: 1.00
Episode: 1/500, Score: 25, Epsilon: 0.99
Episode: 2/500, Score: 44, Epsilon: 0.99
Episode: 3/500, Score: 13, Epsilon: 0.99
Episode: 4/500, Score: 10, Epsilon: 0.98
Episode: 5/500, Score: 9, Epsilon: 0.98
Episode: 6/500, Score: 12, Epsilon: 0.97
Episode: 7/500, Score: 11, Epsilon: 0.97
Episode: 8/500, Score: 17, Epsilon: 0.96
Episode: 9/500, Score: 37, Epsilon: 0.96
Episode: 10/500, Score: 17, Epsilon: 0.95
Episode: 11/500, Score: 27, Epsilon: 0.95
Episode: 12/500, Score: 30, Epsilon: 0.94
Episode: 13/500, Score: 20, Epsilon: 0.94
Episode: 14/500, Score: 27, Epsilon: 0.93
Episode: 15/500, Score: 17, Epsilon: 0.93
Episode: 16/500, Score: 22, Epsilon: 0.92
Episode: 17/500, Score: 32, Epsilon: 0.92
Episode: 18/500, Score: 12, Epsilon: 0.91
Episode: 19/500, Score: 10, Epsilon: 0.91
Episode: 20/500, Score: 13, Epsilon: 0.90
Episode: 21/500, Score: 11, Epsilon: 0.90
Episode: 22/500, Score: 14, Epsilon: 0.90
Episode: 23/500, Score: 17, Epsilon: 0.89
Epi

0,1
loss,▁▁▁▁▁▁▁▁▁▂▃▂▂▂▃▂▃▂▃█▃▃▇▅▄▇▅█▅▄▃▂▄▄▃▄▅▂▄▇
score,▁▁▁▁▂▁▁▁▁▁▂▁▁▁▁▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁█▃▁▁▁▂▁▁▁▁
total_reward,▂▂▂▂▁▂▂▂▁▂▃▂▂▁▂▂▃▁▁▁▁▁▁▁▁█▆▄▂▂▁▁▁▁▁▁▂▅▆▂

0,1
loss,4.54065
score,10.0
total_reward,11.0


[34m[1mwandb[0m: Agent Starting Run: aku945x6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	update_target_every: 5


Episode: 0/500, Score: 16, Epsilon: 1.00
Episode: 1/500, Score: 16, Epsilon: 0.99
Episode: 2/500, Score: 15, Epsilon: 0.99
Episode: 3/500, Score: 12, Epsilon: 0.99
Episode: 4/500, Score: 11, Epsilon: 0.98
Episode: 5/500, Score: 19, Epsilon: 0.98
Episode: 6/500, Score: 24, Epsilon: 0.97
Episode: 7/500, Score: 13, Epsilon: 0.97
Episode: 8/500, Score: 16, Epsilon: 0.96
Episode: 9/500, Score: 34, Epsilon: 0.96
Episode: 10/500, Score: 24, Epsilon: 0.95
Episode: 11/500, Score: 8, Epsilon: 0.95
Episode: 12/500, Score: 18, Epsilon: 0.94
Episode: 13/500, Score: 24, Epsilon: 0.94
Episode: 14/500, Score: 14, Epsilon: 0.93
Episode: 15/500, Score: 19, Epsilon: 0.93
Episode: 16/500, Score: 15, Epsilon: 0.92
Episode: 17/500, Score: 21, Epsilon: 0.92
Episode: 18/500, Score: 16, Epsilon: 0.91
Episode: 19/500, Score: 13, Epsilon: 0.91
Episode: 20/500, Score: 22, Epsilon: 0.90
Episode: 21/500, Score: 50, Epsilon: 0.90
Episode: 22/500, Score: 10, Epsilon: 0.90
Episode: 23/500, Score: 17, Epsilon: 0.89
Epi

0,1
loss,▂▃▃▆▃▇▅▅▁▃▄█▇▆▆▁▁▁▁▁▄▁▅▃▅▁▁▁▁▅▁▁▁▁▁▁▁▁▄▁
score,▁▁▁▁▁▁▃▁▁▁▁▁▁▂▂▁▂▂▁▁▂▂▁▅▄▂▅▃▁▃▆██▂▄▃▄▃▄▄
total_reward,▁▁▁▁▁▁▂▁▁▁▂▂▂▂▁▁▂▁▅▁▁▃▃██▂▂▅▄▁▃▄██▄█▁▁▄█

0,1
loss,0.00909
score,499.0
total_reward,500.0


[34m[1mwandb[0m: Agent Starting Run: 43flwwbn with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	update_target_every: 5


Episode: 0/500, Score: 23, Epsilon: 1.00
Episode: 1/500, Score: 9, Epsilon: 0.99
Episode: 2/500, Score: 15, Epsilon: 0.99
Episode: 3/500, Score: 28, Epsilon: 0.99
Episode: 4/500, Score: 11, Epsilon: 0.98
Episode: 5/500, Score: 16, Epsilon: 0.98
Episode: 6/500, Score: 16, Epsilon: 0.97
Episode: 7/500, Score: 36, Epsilon: 0.97
Episode: 8/500, Score: 12, Epsilon: 0.96
Episode: 9/500, Score: 25, Epsilon: 0.96
Episode: 10/500, Score: 8, Epsilon: 0.95
Episode: 11/500, Score: 17, Epsilon: 0.95
Episode: 12/500, Score: 27, Epsilon: 0.94
Episode: 13/500, Score: 22, Epsilon: 0.94
Episode: 14/500, Score: 29, Epsilon: 0.93
Episode: 15/500, Score: 26, Epsilon: 0.93
Episode: 16/500, Score: 12, Epsilon: 0.92
Episode: 17/500, Score: 9, Epsilon: 0.92
Episode: 18/500, Score: 24, Epsilon: 0.91
Episode: 19/500, Score: 67, Epsilon: 0.91
Episode: 20/500, Score: 19, Epsilon: 0.90
Episode: 21/500, Score: 24, Epsilon: 0.90
Episode: 22/500, Score: 15, Epsilon: 0.90
Episode: 23/500, Score: 20, Epsilon: 0.89
Episo

0,1
loss,▁▁▁▁▁▂▃▄▂▆▁▄▄█▃▂▂▂▄▁▄▃▃▁▁▁▁▁▄▁▄▁▁▁▄▄▄▄▁▁
score,▂▁▂▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂▂▁█▂▂▂▂▂▃▇▃▁▂▂▃▃▂▁
total_reward,▂▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▂▂▂▄█▃▂▂▇▃▃▇▂▂▂▃▂▁

0,1
loss,0.36651
score,20.0
total_reward,21.0


[34m[1mwandb[0m: Agent Starting Run: bet5rg10 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	update_target_every: 20


Episode: 0/500, Score: 11, Epsilon: 1.00
Episode: 1/500, Score: 11, Epsilon: 0.99
Episode: 2/500, Score: 13, Epsilon: 0.99
Episode: 3/500, Score: 15, Epsilon: 0.99
Episode: 4/500, Score: 16, Epsilon: 0.98
Episode: 5/500, Score: 38, Epsilon: 0.98
Episode: 6/500, Score: 14, Epsilon: 0.97
Episode: 7/500, Score: 39, Epsilon: 0.97
Episode: 8/500, Score: 15, Epsilon: 0.96
Episode: 9/500, Score: 20, Epsilon: 0.96
Episode: 10/500, Score: 17, Epsilon: 0.95
Episode: 11/500, Score: 14, Epsilon: 0.95
Episode: 12/500, Score: 17, Epsilon: 0.94
Episode: 13/500, Score: 43, Epsilon: 0.94
Episode: 14/500, Score: 9, Epsilon: 0.93
Episode: 15/500, Score: 12, Epsilon: 0.93
Episode: 16/500, Score: 12, Epsilon: 0.92
Episode: 17/500, Score: 26, Epsilon: 0.92
Episode: 18/500, Score: 49, Epsilon: 0.91
Episode: 19/500, Score: 19, Epsilon: 0.91
Episode: 20/500, Score: 12, Epsilon: 0.90
Episode: 21/500, Score: 18, Epsilon: 0.90
Episode: 22/500, Score: 16, Epsilon: 0.90
Episode: 23/500, Score: 28, Epsilon: 0.89
Epi

0,1
loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▃▁▂▁▁██▃▃▁▁▁▃▁▁▁▁▁▁▂▁▁
score,▁▁▁▁▁▁▁▁▁▂▁▂▂▁▂▁▁▂▂▂▃▂▂▁▂▄▄▅▅▃▄█▄▄▅▆▅▅▄▄
total_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▁▁▂▂▁▂▁▁▁▁▂▂▂▂▂▃▃▄▄▅▅█▅

0,1
loss,0.09213
score,282.0
total_reward,283.0


[34m[1mwandb[0m: Agent Starting Run: so01yekv with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	update_target_every: 20


Episode: 0/500, Score: 16, Epsilon: 1.00
Episode: 1/500, Score: 21, Epsilon: 0.99
Episode: 2/500, Score: 16, Epsilon: 0.99
Episode: 3/500, Score: 23, Epsilon: 0.99
Episode: 4/500, Score: 12, Epsilon: 0.98
Episode: 5/500, Score: 10, Epsilon: 0.98
Episode: 6/500, Score: 15, Epsilon: 0.97
Episode: 7/500, Score: 15, Epsilon: 0.97
Episode: 8/500, Score: 21, Epsilon: 0.96
Episode: 9/500, Score: 21, Epsilon: 0.96
Episode: 10/500, Score: 12, Epsilon: 0.95
Episode: 11/500, Score: 12, Epsilon: 0.95
Episode: 12/500, Score: 13, Epsilon: 0.94
Episode: 13/500, Score: 11, Epsilon: 0.94
Episode: 14/500, Score: 17, Epsilon: 0.93
Episode: 15/500, Score: 16, Epsilon: 0.93
Episode: 16/500, Score: 10, Epsilon: 0.92
Episode: 17/500, Score: 13, Epsilon: 0.92
Episode: 18/500, Score: 10, Epsilon: 0.91
Episode: 19/500, Score: 13, Epsilon: 0.91
Episode: 20/500, Score: 24, Epsilon: 0.90
Episode: 21/500, Score: 15, Epsilon: 0.90
Episode: 22/500, Score: 49, Epsilon: 0.90
Episode: 23/500, Score: 65, Epsilon: 0.89
Ep

0,1
loss,▁▁▁▁▁▁▁▁▁▃▂▂▃▂▁▂▁▁▄▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▄▁▇▁▁█
score,▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▂▁▂▂▁▃▁▂▁▁▂▃▆▇█▃▃▁█▄▇▅
total_reward,▁▁▁▁▁▁▁▁▂▁▁▁▂▁▁▁▃▁▂▂▃▂▂▁▁▇▂▂█▇▃▂▃▆▂▅▇▇█▃

0,1
loss,0.02941
score,189.0
total_reward,190.0


[34m[1mwandb[0m: Agent Starting Run: po8gojzu with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	learning_rate: 0.0005
[34m[1mwandb[0m: 	update_target_every: 5


Episode: 0/500, Score: 15, Epsilon: 1.00
Episode: 1/500, Score: 13, Epsilon: 0.99
Episode: 2/500, Score: 21, Epsilon: 0.99
Episode: 3/500, Score: 35, Epsilon: 0.99
Episode: 4/500, Score: 23, Epsilon: 0.98
Episode: 5/500, Score: 26, Epsilon: 0.98
Episode: 6/500, Score: 14, Epsilon: 0.97
Episode: 7/500, Score: 31, Epsilon: 0.97
Episode: 8/500, Score: 24, Epsilon: 0.96
Episode: 9/500, Score: 24, Epsilon: 0.96
Episode: 10/500, Score: 23, Epsilon: 0.95
Episode: 11/500, Score: 16, Epsilon: 0.95
Episode: 12/500, Score: 25, Epsilon: 0.94
Episode: 13/500, Score: 10, Epsilon: 0.94
Episode: 14/500, Score: 41, Epsilon: 0.93
Episode: 15/500, Score: 19, Epsilon: 0.93
Episode: 16/500, Score: 22, Epsilon: 0.92
Episode: 17/500, Score: 18, Epsilon: 0.92
Episode: 18/500, Score: 9, Epsilon: 0.91
Episode: 19/500, Score: 14, Epsilon: 0.91
Episode: 20/500, Score: 26, Epsilon: 0.90
Episode: 21/500, Score: 12, Epsilon: 0.90
Episode: 22/500, Score: 9, Epsilon: 0.90
Episode: 23/500, Score: 11, Epsilon: 0.89
Epis

0,1
loss,▁▂▆▁▆▄▄▃█▂▃▅▅▅▆▄▄▃▄▁▃▁▅▅▆▅▁▁▁▅▁▁▁▅▁▃▆▂▇▇
score,▁▁▁▁▁▁▁▂▃▁▁▁▂▁▂▂▁▂▁▃▂▇█▂▂▂▇▇█▃▂▅▄▄▄▅▆▆▅▁
total_reward,▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▁▁▄▄▁▃▅▂▆▂▆▅█▃▃▃▄▃█▇▄▅▁

0,1
loss,12.67703
score,13.0
total_reward,14.0


[34m[1mwandb[0m: Agent Starting Run: au5a46jc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.95
[34m[1mwandb[0m: 	learning_rate: 0.005
[34m[1mwandb[0m: 	update_target_every: 10


Episode: 0/500, Score: 28, Epsilon: 1.00
Episode: 1/500, Score: 16, Epsilon: 0.99
Episode: 2/500, Score: 11, Epsilon: 0.99
Episode: 3/500, Score: 16, Epsilon: 0.99
Episode: 4/500, Score: 22, Epsilon: 0.98
Episode: 5/500, Score: 14, Epsilon: 0.98
Episode: 6/500, Score: 22, Epsilon: 0.97
Episode: 7/500, Score: 11, Epsilon: 0.97
Episode: 8/500, Score: 9, Epsilon: 0.96
Episode: 9/500, Score: 14, Epsilon: 0.96
Episode: 10/500, Score: 15, Epsilon: 0.95
Episode: 11/500, Score: 12, Epsilon: 0.95
Episode: 12/500, Score: 11, Epsilon: 0.94
Episode: 13/500, Score: 40, Epsilon: 0.94
Episode: 14/500, Score: 11, Epsilon: 0.93
Episode: 15/500, Score: 19, Epsilon: 0.93
Episode: 16/500, Score: 30, Epsilon: 0.92
Episode: 17/500, Score: 19, Epsilon: 0.92
Episode: 18/500, Score: 10, Epsilon: 0.91
Episode: 19/500, Score: 21, Epsilon: 0.91
Episode: 20/500, Score: 10, Epsilon: 0.90
Episode: 21/500, Score: 16, Epsilon: 0.90
Episode: 22/500, Score: 15, Epsilon: 0.90
Episode: 23/500, Score: 14, Epsilon: 0.89
Epi

0,1
loss,▁▁▁▂▂▁▃▂▂▃▂▂▂▁▂▂▅▄▁▆▅▂▂▁▁▁▇▅█▃▁▁▁█▂▄▁▅▁▂
score,▁▁▁▁▁▁▁▁▁▁▂▁▁▂▁▃▄▁▅▁▂▆▃▁▃▃▁█▄▁▄▁▄▄▁▃▅▄▃▃
total_reward,▁▁▁▁▁▁▂▁▂▂▃▁▁▃▄▂▃▂▂▃▂▁█▃▁▁▂▃▂▂▁▁▁▂▁▂▂▃▁▂

0,1
loss,0.64178
score,142.0
total_reward,143.0


[34m[1mwandb[0m: Agent Starting Run: yt8ny51z with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	update_target_every: 20


Episode: 0/500, Score: 8, Epsilon: 1.00
Episode: 1/500, Score: 10, Epsilon: 0.99
Episode: 2/500, Score: 22, Epsilon: 0.99
Episode: 3/500, Score: 22, Epsilon: 0.99
Episode: 4/500, Score: 36, Epsilon: 0.98
Episode: 5/500, Score: 51, Epsilon: 0.98
Episode: 6/500, Score: 62, Epsilon: 0.97
Episode: 7/500, Score: 35, Epsilon: 0.97
Episode: 8/500, Score: 20, Epsilon: 0.96
Episode: 9/500, Score: 12, Epsilon: 0.96
Episode: 10/500, Score: 14, Epsilon: 0.95
Episode: 11/500, Score: 23, Epsilon: 0.95
Episode: 12/500, Score: 12, Epsilon: 0.94
Episode: 13/500, Score: 25, Epsilon: 0.94
