<a href="https://colab.research.google.com/github/spindouken/atlas-machine_learning/blob/main/reinforcement_learning/deep_q_learning/Deep_Q_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This project was finalized 1/10/2024.<br>
Author: Mason Counts

Resources:<br>
https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf<br>
https://gymnasium.farama.org/environments/atari/breakout/#breakout<br>
https://gymnasium.farama.org/api/wrappers/<br>
https://gymnasium.farama.org/api/env/<br>

# **DEEP Q-LEARNING WITH A DEEP Q-NETWORK (DQN)**

#### environment set-up

In [None]:
import os
import tensorflow as tf
if tf.__version__!='2.11.0':
  !pip install tensorflow==2.11.0 --quiet
  os.kill(os.getpid(), 9)

In [None]:
# gym[atari] now has ale-py integrated
!pip install gym[atari]



In [None]:
pip install gym[accept-rom-license]



In [None]:
pip install keras-rl2 --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/52.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━[0m [32m41.0/52.1 kB[0m [31m989.8 kB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.1/52.1 kB[0m [31m879.0 kB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### ***make gym env***

In [None]:
# this is the format suggested by ALE dev blog post
import gym

env = gym.make('ALE/Breakout-v5',
    obs_type='rgb',                   # ram | rgb | grayscale
    frameskip=4,                      # frame skip
    mode=None,                        # game mode, see Machado et al. 2018
    difficulty=None,                  # game difficulty, see Machado et al. 2018
    repeat_action_probability=0.25,   # Sticky action probability
    full_action_space=False,          # Use all actions
    render_mode='rgb_array',          # None | human | rgb_array
)

# "The one keyword argument of note is render_mode.
#   To realize the benefits of natively rendering with SDL we must supply the render mode when constructing the environment.
#    We highly discourage using env.render()."

  deprecation(
  deprecation(


#### we do a little environment analysis.. is gud

In [None]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT']

In [None]:
# Test the environment to see the observation
initial_observation = env.reset()

print(f"Initial Observation Type: {type(initial_observation)}")
print(f"Initial Observation Shape: {initial_observation.shape if hasattr(initial_observation, 'shape') else 'No shape attribute'}")


Initial Observation Type: <class 'numpy.ndarray'>
Initial Observation Shape: (210, 160, 3)


In [None]:
print(f"Contents of the initial observation tuple: {initial_observation}")

Contents of the initial observation tuple: [[[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 ...

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]]


In [None]:
# Print action space details
print(f"Action Space: {env.action_space}")
print(f"Sample Action: {env.action_space.sample()}")

Action Space: Discrete(4)
Sample Action: 1


In [None]:
# Print observation space details
print(f"Observation Space: {env.observation_space}")
print(f"Sample Observation: {env.observation_space.sample()}")

Observation Space: Box(0, 255, (210, 160, 3), uint8)
Sample Observation: [[[221  46 189]
  [130 197  36]
  [ 73  55 175]
  ...
  [ 88 205 169]
  [157 105 217]
  [ 44  70 204]]

 [[ 46 243 231]
  [252  61 199]
  [ 35 235  54]
  ...
  [ 91  88 251]
  [ 97 220 178]
  [ 11  69 142]]

 [[202  46 200]
  [ 70  93  78]
  [228   0  35]
  ...
  [116 188 183]
  [ 29 124 115]
  [112 239 121]]

 ...

 [[138 133 247]
  [ 97 187 180]
  [179 218  51]
  ...
  [186 247 156]
  [123  81  15]
  [ 72  56 112]]

 [[206 217 238]
  [138 171 153]
  [240  60  84]
  ...
  [ 24 191   6]
  [186 254 210]
  [ 57  87  87]]

 [[114 249 104]
  [236  32 130]
  [166  11  22]
  ...
  [ 57  97  75]
  [190 219   9]
  [  9 245  47]]]


In [None]:
# Perform a sample action to see the next state, reward, done, info, and possibly truncated
action = env.action_space.sample()
result = env.step(action)

# Check the length of the result to handle both old and new step API formats
if len(result) == 4:
    next_state, reward, done, info = result
    truncated = None  # Old API does not return 'truncated'
else:
    next_state, reward, done, truncated, info = result

# Print details of the step
print(f"Next State Type: {type(next_state)}")
print(f"Next State Shape: {next_state.shape if hasattr(next_state, 'shape') else 'No shape attribute'}")
print(f"Reward: {reward}")
print(f"Done: {done}")
print(f"Truncated: {truncated}")
print(f"Info: {info}")

Next State Type: <class 'numpy.ndarray'>
Next State Shape: (210, 160, 3)
Reward: 0.0
Done: False
Truncated: None
Info: {'lives': 5, 'episode_frame_number': 4, 'frame_number': 4, 'rgb': array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=uint8)}


  logger.deprecation(


#### **Imports and hyperparameters**

In [None]:
import gym
import ale_py
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D, Activation, Permute
from tensorflow.keras.optimizers.legacy import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.callbacks import ModelIntervalCheckpoint, FileLogger
from PIL import Image
import numpy as np
from rl.core import Processor
import matplotlib as plt

In [None]:
np.random.seed(123)
env.seed(123)

(693650678, 2973253328)

In [None]:
# Hyperparameters
DQN_CONFIG = {
    'learning_rate': 1e-4,
    'training_steps': 60000,  # total number of steps to train for (including warmup)
    'input_shape': (84, 84),
    'memory_limit': 1000000,
    'window_length': 4,
    'nb_steps_warmup': 40000,  # number of steps to take before training begins
    'target_model_update': 20000,
    'enable_dueling_network': False,
    'dueling_type': None,
    # epsilon greedy action selection parameters below (policy)
    'eps_max': 1.0,
    'eps_min': 0.1,
    'eps_test': 0.05,
    'eps_nb_steps': 40000
}

#### **the code that does the preprocessing, indicates the policy, implements the DQN agent, and does the learnin and stuff**

In [None]:
class AtariProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(DQN_CONFIG['input_shape']).convert('L')  # resize and convert to grayscale
        processed_observation = np.array(img)
        assert processed_observation.shape == DQN_CONFIG['input_shape']
        return processed_observation.astype('uint8')  # saves storage in experience memory

    def process_state_batch(self, batch):
        processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

    def process_info(self, info):
        """
        filter out non-scalar values from the info dictionary
        """
        scalar_info = {k: v for k, v in info.items() if np.isscalar(v)}
        return scalar_info

In [None]:
nb_actions = env.action_space.n

# this is the same model that was described by Mnih et al. (2015) in
#   in Playing Atari with Deep Reinforcement Learning
#   http://www.cs.toronto.edu/~vmnih/docs/dqn.pdf
input_shape = (DQN_CONFIG['window_length'],) + DQN_CONFIG['input_shape']

model = Sequential()
# (width, height, channels)
model.add(Permute((2, 3, 1), input_shape=input_shape))

model.add(Convolution2D(32, (8, 8), strides=(4, 4)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (4, 4), strides=(2, 2)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (3, 3), strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 permute_1 (Permute)         (None, 84, 84, 4)         0         
                                                                 
 conv2d_3 (Conv2D)           (None, 20, 20, 32)        8224      
                                                                 
 activation_5 (Activation)   (None, 20, 20, 32)        0         
                                                                 
 conv2d_4 (Conv2D)           (None, 9, 9, 64)          32832     
                                                                 
 activation_6 (Activation)   (None, 9, 9, 64)          0         
                                                                 
 conv2d_5 (Conv2D)           (None, 7, 7, 64)          36928     
                                                                 
 activation_7 (Activation)   (None, 7, 7, 64)         

In [None]:
# configure and compile the DQN agent with the model, sequential memory,
#  a linear annealed greedy eps greedy q policy with decay, and Adam learning rate.

memory = SequentialMemory(limit=DQN_CONFIG['memory_limit'], window_length=DQN_CONFIG['window_length'])

policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=DQN_CONFIG['eps_max'],
                              value_min=DQN_CONFIG['eps_min'], value_test=DQN_CONFIG['eps_test'],
                              nb_steps=DQN_CONFIG['eps_nb_steps'])

dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, nb_steps_warmup=DQN_CONFIG['nb_steps_warmup'],
               enable_dueling_network=DQN_CONFIG['enable_dueling_network'], dueling_type=DQN_CONFIG['dueling_type'],
               target_model_update=DQN_CONFIG['target_model_update'], policy=policy,
               processor=AtariProcessor())

dqn.compile(Adam(learning_rate=DQN_CONFIG['learning_rate']), metrics=['mae'])


In [None]:
# check new obervation space
observation = env.reset()
processed_observation = dqn.processor.process_observation(observation)
print(f"Processed Observation Shape: {processed_observation.shape}")

# run a single step to test
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
print(f"Next Observation: {observation}, Reward: {reward}, Done: {done}, Info: {info}")


Processed Observation Shape: (84, 84)
Next Observation: [[[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 ...

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]], Reward: 0.0, Done: False, Info: {'lives': 5, 'episode_frame_number': 4, 'frame_number': 4, 'rgb': array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 

  logger.deprecation(


In [None]:
weights_filename = '/content/drive/MyDrive/dqn/DQN_Breakout_Weights.h5'
checkpoint_weights_filename = '/content/drive/MyDrive/dqn_breakout_weights_{step}.h5f'
log_filename = '/content/drive/MyDrive/dqn_breakout_log.json'

# callbacks interval
interval = 50000

callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=interval)]
callbacks += [FileLogger(log_filename, interval=100)]

# train
training_steps = DQN_CONFIG['training_steps']
dqn.fit(env, callbacks=callbacks, nb_steps=training_steps, log_interval=10000, verbose=2)

# save the final weights after training completes
dqn.save_weights(weights_filename, overwrite=True)

Training for 60000 steps ...


  updates=self.state_updates,


   173/60000: episode: 1, duration: 2.590s, episode steps: 173, steps per second:  67, episode reward:  1.000, mean reward:  0.006 [ 0.000,  1.000], mean action: 1.520 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --
   451/60000: episode: 2, duration: 2.809s, episode steps: 278, steps per second:  99, episode reward:  3.000, mean reward:  0.011 [ 0.000,  1.000], mean action: 1.518 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --
   678/60000: episode: 3, duration: 1.877s, episode steps: 227, steps per second: 121, episode reward:  2.000, mean reward:  0.009 [ 0.000,  1.000], mean action: 1.520 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --
   931/60000: episode: 4, duration: 2.147s, episode steps: 253, steps per second: 118, episode reward:  3.000, mean reward:  0.012 [ 0.000,  1.000], mean action: 1.482 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --
  1076/60000: episode: 5, duration: 1.139s, episode steps: 145, steps per second: 12

  updates=self.state_updates,


 40107/60000: episode: 193, duration: 29.662s, episode steps: 275, steps per second:   9, episode reward:  1.000, mean reward:  0.004 [ 0.000,  1.000], mean action: 0.593 [0.000, 3.000],  loss: 0.002629, mae: 0.043687, mean_q: 0.063893, mean_eps: 0.100000
 40304/60000: episode: 194, duration: 51.738s, episode steps: 197, steps per second:   4, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 1.335 [0.000, 3.000],  loss: 0.002820, mae: 0.043621, mean_q: 0.061571, mean_eps: 0.100000
 40609/60000: episode: 195, duration: 77.474s, episode steps: 305, steps per second:   4, episode reward:  4.000, mean reward:  0.013 [ 0.000,  1.000], mean action: 1.456 [0.000, 3.000],  loss: 0.003167, mae: 0.044038, mean_q: 0.059911, mean_eps: 0.100000
 40799/60000: episode: 196, duration: 48.936s, episode steps: 190, steps per second:   4, episode reward:  1.000, mean reward:  0.005 [ 0.000,  1.000], mean action: 1.547 [0.000, 3.000],  loss: 0.003092, mae: 0.045522, mean_q: 0.062

#### **agent play while we evaluate it and record the gameplay (*we hopes*)**

In [None]:
# not yet implemented
# purpose: for user to be able to pull the weights from this project's training
#  to evaluate the agent and/or continue training
!wget https://github.com/spindoken/atlasschool-machine_learning/reinforcement_learning/Deep_Q_Learning/policy.h5 -O policy.h5

code to evaluate agent without recording or rendering

In [None]:
weights_filename = '/content/drive/MyDrive/dqn/DQN_Breakout_Weights.h5'

dqn.load_weights(weights_filename)

# evaluate for 10 episodes
dqn.test(env, nb_episodes=1, visualize=False)

ValueError: Layer count mismatch when loading weights from file. Model expected 5 layers, found 6 saved layers.

The code below will save the AI gameplay videos to the specified folder. A Video will be saved for each AI episode.<br>
The AI will utilize the weights filename that is specified in order to play the specified amount of episodes.
I was not able to get test to work properly with recording. I am unsure if this is due to a bug from using an odd combination of a certain tf version and other libraries or if I didn't properly initialize the agent for testing.

In [None]:
import gym
from gym.wrappers import RecordEpisodeStatistics, RecordVideo

# wrap environment with the RecordEpisodeStatistics wrapper
env = RecordEpisodeStatistics(env)  # this will record the episode statistics

# wrap environment with RecordVideo wrapper
# this wrapper will record each episode of the agent playing
env = RecordVideo(env, '/content/drive/MyDrive/dqn/AIgameplayVODs', episode_trigger=lambda episode: True)

# load the trained weights
weights_file = '/content/drive/MyDrive/dqn/DQN_Breakout_Weights.h5'
dqn.load_weights(weights_file)

# evaluate AI
nb_episodes = 1  # number of runs for the AI to play
dqn.test(env, nb_episodes=1, visualize=False)

env.close()

  deprecation(
  logger.warn(


Testing for 1 episodes ...


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.warn(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


Episode 1: reward: 0.000, steps: 27000


code below will load a video from drive and play it in colab

In [None]:
from IPython.display import HTML
from base64 import b64encode

def play_video(video_path):
    """
    workaround to display a video in colab

    Args:
        video_path: Path to the video file.
    """
    mp4 = open(video_path,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML("""
    <video width=400 controls>
        <source src="%s" type="video/mp4">
    </video>
    """ % data_url)

video_path = '/content/drive/MyDrive/dqn/AIgameplayVODs/rl-video-episode-0.mp4'
play_video(video_path)



#### renderer dump
The code below is not currently functioning as intended.

In [None]:
from rl.callbacks import Callback
import matplotlib.pyplot as plt
from IPython import display

class Render(Callback):
    def on_step_end(self, step, logs={}):
        plt.clf()
        plt.imshow(env.render(mode='rgb_array'))
        display.display(plt.gcf())
        display.clear_output(wait=True)

In [None]:
dqn.test(env, nb_episodes=1, visualize=False, callbacks=[Render()])

NameError: name 'Render' is not defined

# **DEEP Q-LEARNING WITH A  DOUBLE DUELING Q-NETWORK (DDQN)**

## Full Environment Set-Up
This is includes installs, imports, config, lr scheduler, processing, policy, dqn, and model. <br>
Will have to run the first code block (tf version set-up) twice as it has a built in runtime reset built-in in order to reset tensorflow in environment.

In [None]:
import os
import tensorflow as tf
# check if tf version is 2.11.0, if not: install it
if tf.__version__ != '2.11.0':
    !pip install tensorflow==2.11.0 --quiet
    os.kill(os.getpid(), 9)

# install all required libraries
!pip install gym[atari] gym[accept-rom-license] keras-rl2 --quiet

from google.colab import drive
drive.mount('/content/drive')

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.1/52.1 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m434.7/434.7 kB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for AutoROM.accept-rom-license (pyproject.toml) ... [?25l[?25hdone
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Activation, Permute, Convolution2D, Input, Lambda
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.callbacks import ModelIntervalCheckpoint, FileLogger
from PIL import Image
import numpy as np
from rl.core import Processor
import gym
import ale_py
import matplotlib.pyplot as plt
from gym.wrappers import RecordEpisodeStatistics, RecordVideo

# make the gym enviornment using rgb mode
env = gym.make('ALE/Breakout-v5', obs_type='rgb', frameskip=4, mode=None, difficulty=None, repeat_action_probability=0.25, full_action_space=False, render_mode='rgb_array')

# random seeds for reproducibility
np.random.seed(123)
env.seed(123)

# hyperparameters for easy adjusting
DQN_CONFIG = {
    'initial_learning_rate': 1e-4,
    'decay_steps': 100000,
    'decay_rate': 0.96,
    'staircase': True,
    'input_shape': (84, 84),
    'memory_limit': 1000000,
    'window_length': 4,
    'target_model_update': 10000,
    'enable_dueling_network': True,
    'dueling_type': 'avg',
    # epsilon hyperparameters below
    'eps_max': 1.0,
    'eps_min': 0.1,
    'eps_test': 0.05,
    'eps_nb_steps': 250000,
    # training step hyperparameters below
    'training_steps': 500000,  # total training steps (including warmup)
    'nb_steps_warmup': 20000  # amount of steps to warmup for
}

# preprocessing observations
class AtariProcessor(Processor):
    """
    Processor for Atari games: processes the observation from the environment.
    """
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(DQN_CONFIG['input_shape']).convert('L')  # resize and convert to grayscale
        processed_observation = np.array(img)
        assert processed_observation.shape == DQN_CONFIG['input_shape']
        return processed_observation.astype('uint8')  # saves storage in experience memory

    def process_state_batch(self, batch):
        processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

    def process_info(self, info):
        """
        Filter out non-scalar values from the info dictionary
        """
        scalar_info = {k: v for k, v in info.items() if np.isscalar(v)}
        return scalar_info

# learning rate schedule setup
lr_schedule = ExponentialDecay(
    initial_learning_rate=DQN_CONFIG['initial_learning_rate'],
    decay_steps=DQN_CONFIG['decay_steps'],
    decay_rate=DQN_CONFIG['decay_rate'],
    staircase=DQN_CONFIG['staircase']
)

# implementing dueling DQN Model architecture
def build_dueling_dqn_model(input_shape, nb_actions):
    """
    Builds and returns a Dueling DQN model.
    """
    input_layer = Input(shape=(DQN_CONFIG['window_length'],) + input_shape)
    x = Permute((2, 3, 1))(input_layer)
    x = Convolution2D(32, (8, 8), strides=(4, 4), activation='relu')(x)
    x = Convolution2D(64, (4, 4), strides=(2, 2), activation='relu')(x)
    x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x)
    x = Flatten()(x)

    # Dueling streams
    value_stream = Dense(512, activation='relu')(x)
    value = Dense(1, activation='linear')(value_stream)

    advantage_stream = Dense(512, activation='relu')(x)
    advantages = Dense(nb_actions, activation='linear')(advantage_stream)

    # combine streams
    def aggregate_streams(args):
        value, advantages = args
        return value + (advantages - tf.reduce_mean(advantages, axis=1, keepdims=True))

    output_layer = Lambda(aggregate_streams)([value, advantages])

    return Model(inputs=input_layer, outputs=output_layer)

# build dueling DQN model
model = build_dueling_dqn_model(DQN_CONFIG['input_shape'], env.action_space.n)
print(model.summary())

# DQN agent setup
memory = SequentialMemory(limit=DQN_CONFIG['memory_limit'], window_length=DQN_CONFIG['window_length'])
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=DQN_CONFIG['eps_max'], value_min=DQN_CONFIG['eps_min'], value_test=DQN_CONFIG['eps_test'], nb_steps=DQN_CONFIG['eps_nb_steps'])
dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, nb_steps_warmup=DQN_CONFIG['nb_steps_warmup'], enable_dueling_network=DQN_CONFIG['enable_dueling_network'], dueling_type=DQN_CONFIG['dueling_type'], target_model_update=DQN_CONFIG['target_model_update'], policy=policy, processor=AtariProcessor())
dqn.compile(Adam(learning_rate=lr_schedule), metrics=['mae'])




  deprecation(
  deprecation(


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 4, 84, 84)]  0           []                               
                                                                                                  
 permute (Permute)              (None, 84, 84, 4)    0           ['input_1[0][0]']                
                                                                                                  
 conv2d (Conv2D)                (None, 20, 20, 32)   8224        ['permute[0][0]']                
                                                                                                  
 conv2d_1 (Conv2D)              (None, 9, 9, 64)     32832       ['conv2d[0][0]']                 
                                                                                              

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


## Initial Training

In [None]:
# checkpoint indicator for saving weights during training
checkpoint_weights_filename = '/content/drive/MyDrive/dqn/BetaDDQN/dqn_weights_{step}.h5'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=50000)]  # Save every 50,000 steps

# train
dqn.fit(env, nb_steps=DQN_CONFIG['training_steps'], callbacks=callbacks, verbose=2)

# save final weights
dqn.save_weights('/content/drive/MyDrive/dqn/BetaDDQN/dqn_final_weights.h5', overwrite=True)

## Evaluate Agent

In [None]:
from gym.wrappers import RecordVideo

def evaluate_agent(env, dqn_agent, nb_episodes, video_folder):
    """
    test the DQN agent for a specified number of episodes and record videos
    """
    # wrap the environment to record videos
    env = RecordVideo(env, video_folder, episode_trigger=lambda episode: True)

    dqn_agent.test(env, nb_episodes=nb_episodes, visualize=False)

    env.close()

# load weights for the agent to utilize
weights_file = '/content/drive/MyDrive/dqn/BetaDDQN/dqn_weights_150000.h5'
dqn.load_weights(weights_file)

# test the agent
evaluate_agent(env, dqn, nb_episodes=2, video_folder='/content/drive/MyDrive/dqn/BetaDDQN/dqn_evaluation_videos')

print("Evaluation completed.")


  deprecation(
  logger.warn(
  logger.deprecation(


Testing for 2 episodes ...


See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


Episode 1: reward: 0.000, steps: 27000


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


Episode 2: reward: 0.000, steps: 27000
Evaluation completed.


In [None]:
from IPython.display import HTML
from base64 import b64encode

def play_video(video_path):
    """
    workaround to display a video in colab
    """
    mp4 = open(video_path,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML("""
    <video width=400 controls>
        <source src="%s" type="video/mp4">
    </video>
    """ % data_url)

video_path = '/content/drive/MyDrive/dqn/BetaDDQN/training_videos (continued training of 150k step model)/rl-video-episode-74.mp4'
play_video(video_path)


In [None]:
import os
from IPython.display import HTML, display
from base64 import b64encode

def play_videos_in_sequence(folder_path, speed=1.0):
    """
    play videos in sequence from a specified folder in google drive

    Args:
        folder_path: Path to the folder containing video files.
        speed: Playback speed (1.0 is normal speed, >1.0 is faster, <1.0 is slower).
    """
    # list all video files in the folder
    video_files = sorted([file for file in os.listdir(folder_path) if file.endswith('.mp4')])

    # display each video
    for video_file in video_files:
        video_path = os.path.join(folder_path, video_file)
        mp4 = open(video_path, 'rb').read()
        data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
        html = HTML("""
        <video width=400 controls autoplay loop>
            <source src="%s" type="video/mp4">
        </video>
        """ % data_url)
        display(html)

video_folder = '/content/drive/MyDrive/dqn/BetaDDQN/dqn_evaluation_videos/'
play_videos_in_sequence(video_folder, speed=1.5)  # can adjust playback speed as needed


## Code for Training Continuation <br>
The purpose of the code below is to load the weights and memory files from the training in order to continue training.<br>
The code below was crafted to work with the full environmet set-up section. Meaning, some things like policy and and over config values are simly overwritten here before recompiling the dqn.

In [None]:
!wget https://github.com/spindoken/atlasschool-machine_learning/reinforcement_learning/Deep_Q_Learning/policy.h5 -O policy.h5

In [None]:
from gym.wrappers import RecordVideo, TimeLimit

# Function to wrap the environment
def wrap_env_for_recording(env, video_folder, max_video_length):
    env = TimeLimit(env, max_episode_steps=max_video_length)  # Limit the length of each episode
    env = RecordVideo(env, video_folder, episode_trigger=lambda episode: True)  # Record every episode
    return env

# Wrap the environment
video_folder = '/content/drive/MyDrive/dqn/BetaDDQN/300k_play_eps.2start'
max_video_length = 1500  # Set the maximum length of video recording (in steps)
env = wrap_env_for_recording(env, video_folder, max_video_length)

  deprecation(


In [None]:
import os
import pickle

# save and load replay memory (useful for retraining)
def save_replay_memory(dqn_agent, filename):
    with open(filename, 'wb') as f:
        pickle.dump(dqn_agent.memory, f)

def load_replay_memory(dqn_agent, filename):
    with open(filename, 'rb') as f:
        dqn_agent.memory = pickle.load(f)

nb_steps_warmup=5000 # didn't have replay memory saved before 200k, so have to warmup a bit (around 5% of total is good)
additional_training_steps = 100000  # total training steps to continue trainig for  (including warmup)

# Adjust the epsilon parameters for continued training (assuming linear decay in epsilon-greedy strategy)
epsilon_max_continued = 0.2  # New starting epsilon (adjust epsilon for what the value would have been at the end of the last training session.. or not, you do you)
epsilon_min_continued = 0.1
epsilon_test_continued = 0.05 # Test epsilon value

# Set up the policy with new epsilon values
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=epsilon_max_continued, value_min=epsilon_min_continued, value_test=epsilon_test_continued, nb_steps=additional_training_steps)

# Reinitialize the DQN agent with warmup steps
dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, nb_steps_warmup=nb_steps_warmup,
               enable_dueling_network=DQN_CONFIG['enable_dueling_network'], dueling_type=DQN_CONFIG['dueling_type'],
               target_model_update=DQN_CONFIG['target_model_update'], policy=policy, processor=AtariProcessor())

# Recompile the agent
dqn.compile(Adam(learning_rate=lr_schedule), metrics=['mae'])

# Load the trained weights and memory
memory_file = '/content/drive/MyDrive/dqn/BetaDDQN/dqn_memory(3rd_run).pkl'
weights_file = '/content/drive/MyDrive/dqn/BetaDDQN/dqn_300k(3rd_run).h5'
dqn.load_weights(weights_file)

# Load weights and replay memory if they exist
if os.path.exists(weights_file):
    dqn.load_weights(weights_file)
if os.path.exists(memory_file):
    load_replay_memory(dqn, memory_file)

# Continue training
dqn.fit(env, nb_steps=additional_training_steps, verbose=2)

# save weights and replay memory after training
# remember to backup (and/or rename) this output weights file before continuing another training session or it will OVERWRITTEN!!!!
new_weights_file = '/content/drive/MyDrive/dqn/BetaDDQN/dqn_new_weights.h5'
dqn.save_weights(new_weights_file, overwrite=True)
save_replay_memory(dqn, memory_file)


Training for 100000 steps ...


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.warn(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  updates=self.state_updates,
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


   728/100000: episode: 1, duration: 10.746s, episode steps: 728, steps per second:  68, episode reward: 14.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.889 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  1346/100000: episode: 2, duration: 6.800s, episode steps: 618, steps per second:  91, episode reward: 13.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 2.078 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  2070/100000: episode: 3, duration: 9.503s, episode steps: 724, steps per second:  76, episode reward: 16.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.970 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  2564/100000: episode: 4, duration: 5.612s, episode steps: 494, steps per second:  88, episode reward:  7.000, mean reward:  0.014 [ 0.000,  1.000], mean action: 2.152 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  3042/100000: episode: 5, duration: 7.328s, episode steps: 478, steps per second:  65, episode reward:  8.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 2.038 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  3481/100000: episode: 6, duration: 4.759s, episode steps: 439, steps per second:  92, episode reward:  5.000, mean reward:  0.011 [ 0.000,  1.000], mean action: 2.262 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  4188/100000: episode: 7, duration: 9.487s, episode steps: 707, steps per second:  75, episode reward: 14.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.885 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  4581/100000: episode: 8, duration: 4.883s, episode steps: 393, steps per second:  80, episode reward:  6.000, mean reward:  0.015 [ 0.000,  1.000], mean action: 1.911 [0.000, 3.000],  loss: --, mae: --, mean_q: --, mean_eps: --


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  updates=self.state_updates,


  5093/100000: episode: 9, duration: 28.077s, episode steps: 512, steps per second:  18, episode reward:  7.000, mean reward:  0.014 [ 0.000,  1.000], mean action: 2.033 [0.000, 3.000],  loss: 0.009379, mae: 0.418895, mean_q: 0.569104, mean_eps: 0.194953


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  5663/100000: episode: 10, duration: 133.660s, episode steps: 570, steps per second:   4, episode reward: 10.000, mean reward:  0.018 [ 0.000,  1.000], mean action: 1.926 [0.000, 3.000],  loss: 0.004011, mae: 0.424337, mean_q: 0.580501, mean_eps: 0.194623


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  6251/100000: episode: 11, duration: 134.980s, episode steps: 588, steps per second:   4, episode reward: 13.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.813 [0.000, 3.000],  loss: 0.003115, mae: 0.418181, mean_q: 0.575242, mean_eps: 0.194044


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  6651/100000: episode: 12, duration: 97.328s, episode steps: 400, steps per second:   4, episode reward:  5.000, mean reward:  0.013 [ 0.000,  1.000], mean action: 2.000 [0.000, 3.000],  loss: 0.002640, mae: 0.414959, mean_q: 0.571806, mean_eps: 0.193550


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  7047/100000: episode: 13, duration: 97.410s, episode steps: 396, steps per second:   4, episode reward:  6.000, mean reward:  0.015 [ 0.000,  1.000], mean action: 1.977 [0.000, 3.000],  loss: 0.002605, mae: 0.415605, mean_q: 0.571546, mean_eps: 0.193152


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  7609/100000: episode: 14, duration: 133.517s, episode steps: 562, steps per second:   4, episode reward: 11.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 2.034 [0.000, 3.000],  loss: 0.002699, mae: 0.414358, mean_q: 0.571194, mean_eps: 0.192673


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  8256/100000: episode: 15, duration: 150.750s, episode steps: 647, steps per second:   4, episode reward: 12.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.773 [0.000, 3.000],  loss: 0.002651, mae: 0.421371, mean_q: 0.580629, mean_eps: 0.192068


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  8724/100000: episode: 16, duration: 104.271s, episode steps: 468, steps per second:   4, episode reward:  9.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 2.124 [0.000, 3.000],  loss: 0.002518, mae: 0.420155, mean_q: 0.580454, mean_eps: 0.191511


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


  9269/100000: episode: 17, duration: 122.638s, episode steps: 545, steps per second:   4, episode reward: 10.000, mean reward:  0.018 [ 0.000,  1.000], mean action: 1.897 [0.000, 3.000],  loss: 0.002492, mae: 0.424940, mean_q: 0.585524, mean_eps: 0.191004


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 10007/100000: episode: 18, duration: 166.902s, episode steps: 738, steps per second:   4, episode reward: 16.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.717 [0.000, 3.000],  loss: 0.002426, mae: 0.416226, mean_q: 0.573266, mean_eps: 0.190363


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 10565/100000: episode: 19, duration: 125.736s, episode steps: 558, steps per second:   4, episode reward:  8.000, mean reward:  0.014 [ 0.000,  1.000], mean action: 1.900 [0.000, 3.000],  loss: 0.004178, mae: 0.446971, mean_q: 0.609804, mean_eps: 0.189715


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 11098/100000: episode: 20, duration: 125.370s, episode steps: 533, steps per second:   4, episode reward:  8.000, mean reward:  0.015 [ 0.000,  1.000], mean action: 1.612 [0.000, 3.000],  loss: 0.002696, mae: 0.445126, mean_q: 0.610571, mean_eps: 0.189169


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 11663/100000: episode: 21, duration: 137.418s, episode steps: 565, steps per second:   4, episode reward: 12.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.943 [0.000, 3.000],  loss: 0.002655, mae: 0.445782, mean_q: 0.611824, mean_eps: 0.188620


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 12181/100000: episode: 22, duration: 120.880s, episode steps: 518, steps per second:   4, episode reward: 12.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 1.817 [0.000, 3.000],  loss: 0.002326, mae: 0.447805, mean_q: 0.615568, mean_eps: 0.188079


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 12741/100000: episode: 23, duration: 127.822s, episode steps: 560, steps per second:   4, episode reward: 12.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.809 [0.000, 3.000],  loss: 0.002403, mae: 0.441664, mean_q: 0.607093, mean_eps: 0.187539


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 13453/100000: episode: 24, duration: 162.783s, episode steps: 712, steps per second:   4, episode reward: 15.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.937 [0.000, 3.000],  loss: 0.002187, mae: 0.444274, mean_q: 0.610418, mean_eps: 0.186904


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 14069/100000: episode: 25, duration: 140.034s, episode steps: 616, steps per second:   4, episode reward: 13.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.779 [0.000, 3.000],  loss: 0.002347, mae: 0.442982, mean_q: 0.608110, mean_eps: 0.186240


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 14804/100000: episode: 26, duration: 168.526s, episode steps: 735, steps per second:   4, episode reward: 14.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.793 [0.000, 3.000],  loss: 0.002186, mae: 0.444948, mean_q: 0.612186, mean_eps: 0.185564


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 15523/100000: episode: 27, duration: 167.553s, episode steps: 719, steps per second:   4, episode reward: 14.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.894 [0.000, 3.000],  loss: 0.002153, mae: 0.445041, mean_q: 0.611979, mean_eps: 0.184837


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 16267/100000: episode: 28, duration: 172.704s, episode steps: 744, steps per second:   4, episode reward: 15.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.870 [0.000, 3.000],  loss: 0.002021, mae: 0.441513, mean_q: 0.608774, mean_eps: 0.184106


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 16715/100000: episode: 29, duration: 103.895s, episode steps: 448, steps per second:   4, episode reward:  7.000, mean reward:  0.016 [ 0.000,  1.000], mean action: 1.958 [0.000, 3.000],  loss: 0.002140, mae: 0.446099, mean_q: 0.613811, mean_eps: 0.183510


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 17536/100000: episode: 30, duration: 189.972s, episode steps: 821, steps per second:   4, episode reward: 17.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.853 [0.000, 3.000],  loss: 0.001938, mae: 0.443658, mean_q: 0.610744, mean_eps: 0.182875


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 18100/100000: episode: 31, duration: 129.027s, episode steps: 564, steps per second:   4, episode reward:  9.000, mean reward:  0.016 [ 0.000,  1.000], mean action: 1.959 [0.000, 3.000],  loss: 0.002162, mae: 0.449044, mean_q: 0.617332, mean_eps: 0.182182


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 18791/100000: episode: 32, duration: 162.047s, episode steps: 691, steps per second:   4, episode reward: 13.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.909 [0.000, 3.000],  loss: 0.002144, mae: 0.445980, mean_q: 0.612778, mean_eps: 0.181555


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 19465/100000: episode: 33, duration: 148.925s, episode steps: 674, steps per second:   5, episode reward: 15.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.843 [0.000, 3.000],  loss: 0.001901, mae: 0.443156, mean_q: 0.608647, mean_eps: 0.180873


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 20069/100000: episode: 34, duration: 133.415s, episode steps: 604, steps per second:   5, episode reward: 11.000, mean reward:  0.018 [ 0.000,  1.000], mean action: 1.949 [0.000, 3.000],  loss: 0.002674, mae: 0.447794, mean_q: 0.615041, mean_eps: 0.180234


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 20636/100000: episode: 35, duration: 122.873s, episode steps: 567, steps per second:   5, episode reward: 11.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 2.009 [0.000, 3.000],  loss: 0.003270, mae: 0.471692, mean_q: 0.644214, mean_eps: 0.179648


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 21331/100000: episode: 36, duration: 154.285s, episode steps: 695, steps per second:   5, episode reward: 14.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 2.058 [0.000, 3.000],  loss: 0.002377, mae: 0.465881, mean_q: 0.636473, mean_eps: 0.179017


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 22111/100000: episode: 37, duration: 174.402s, episode steps: 780, steps per second:   4, episode reward: 17.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.897 [0.000, 3.000],  loss: 0.002198, mae: 0.466290, mean_q: 0.637573, mean_eps: 0.178280


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 22773/100000: episode: 38, duration: 157.490s, episode steps: 662, steps per second:   4, episode reward: 10.000, mean reward:  0.015 [ 0.000,  1.000], mean action: 2.021 [0.000, 3.000],  loss: 0.002249, mae: 0.469042, mean_q: 0.641946, mean_eps: 0.177559


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 23398/100000: episode: 39, duration: 145.956s, episode steps: 625, steps per second:   4, episode reward:  9.000, mean reward:  0.014 [ 0.000,  1.000], mean action: 1.994 [0.000, 3.000],  loss: 0.002005, mae: 0.464169, mean_q: 0.636467, mean_eps: 0.176915


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 24024/100000: episode: 40, duration: 141.288s, episode steps: 626, steps per second:   4, episode reward: 13.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.631 [0.000, 3.000],  loss: 0.001917, mae: 0.465948, mean_q: 0.638805, mean_eps: 0.176290


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 24678/100000: episode: 41, duration: 144.442s, episode steps: 654, steps per second:   5, episode reward: 13.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.873 [0.000, 3.000],  loss: 0.002008, mae: 0.465425, mean_q: 0.637583, mean_eps: 0.175650


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 25225/100000: episode: 42, duration: 121.959s, episode steps: 547, steps per second:   4, episode reward:  9.000, mean reward:  0.016 [ 0.000,  1.000], mean action: 2.005 [0.000, 3.000],  loss: 0.001996, mae: 0.469058, mean_q: 0.642783, mean_eps: 0.175049


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 25844/100000: episode: 43, duration: 137.413s, episode steps: 619, steps per second:   5, episode reward: 12.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 2.052 [0.000, 3.000],  loss: 0.001972, mae: 0.467595, mean_q: 0.642410, mean_eps: 0.174466


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 26549/100000: episode: 44, duration: 154.745s, episode steps: 705, steps per second:   5, episode reward: 15.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 2.000 [0.000, 3.000],  loss: 0.001936, mae: 0.468170, mean_q: 0.642334, mean_eps: 0.173804


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 27190/100000: episode: 45, duration: 143.266s, episode steps: 641, steps per second:   4, episode reward: 12.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.966 [0.000, 3.000],  loss: 0.001933, mae: 0.473524, mean_q: 0.649670, mean_eps: 0.173131


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 27852/100000: episode: 46, duration: 147.603s, episode steps: 662, steps per second:   4, episode reward: 11.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 2.187 [0.000, 3.000],  loss: 0.001826, mae: 0.465632, mean_q: 0.639260, mean_eps: 0.172480


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 28372/100000: episode: 47, duration: 116.374s, episode steps: 520, steps per second:   4, episode reward:  8.000, mean reward:  0.015 [ 0.000,  1.000], mean action: 2.171 [0.000, 3.000],  loss: 0.001934, mae: 0.466539, mean_q: 0.640827, mean_eps: 0.171889


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 28976/100000: episode: 48, duration: 133.291s, episode steps: 604, steps per second:   5, episode reward: 11.000, mean reward:  0.018 [ 0.000,  1.000], mean action: 1.937 [0.000, 3.000],  loss: 0.001929, mae: 0.462287, mean_q: 0.634934, mean_eps: 0.171327


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 29496/100000: episode: 49, duration: 115.441s, episode steps: 520, steps per second:   5, episode reward:  9.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 2.115 [0.000, 3.000],  loss: 0.001811, mae: 0.464807, mean_q: 0.638905, mean_eps: 0.170764


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 30349/100000: episode: 50, duration: 190.357s, episode steps: 853, steps per second:   4, episode reward: 20.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 1.528 [0.000, 3.000],  loss: 0.004242, mae: 0.479993, mean_q: 0.658684, mean_eps: 0.170078


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 30657/100000: episode: 51, duration: 71.828s, episode steps: 308, steps per second:   4, episode reward:  5.000, mean reward:  0.016 [ 0.000,  1.000], mean action: 1.390 [0.000, 3.000],  loss: 0.006896, mae: 0.505714, mean_q: 0.701635, mean_eps: 0.169497


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 30997/100000: episode: 52, duration: 75.154s, episode steps: 340, steps per second:   5, episode reward:  6.000, mean reward:  0.018 [ 0.000,  1.000], mean action: 1.450 [0.000, 3.000],  loss: 0.007054, mae: 0.499913, mean_q: 0.699363, mean_eps: 0.169174


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 31276/100000: episode: 53, duration: 61.869s, episode steps: 279, steps per second:   5, episode reward:  4.000, mean reward:  0.014 [ 0.000,  1.000], mean action: 1.520 [0.000, 3.000],  loss: 0.007038, mae: 0.502574, mean_q: 0.706375, mean_eps: 0.168864


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 31505/100000: episode: 54, duration: 52.702s, episode steps: 229, steps per second:   4, episode reward:  3.000, mean reward:  0.013 [ 0.000,  1.000], mean action: 1.467 [0.000, 3.000],  loss: 0.007555, mae: 0.513682, mean_q: 0.723127, mean_eps: 0.168610


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 31909/100000: episode: 55, duration: 90.204s, episode steps: 404, steps per second:   4, episode reward:  7.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 1.517 [0.000, 3.000],  loss: 0.006362, mae: 0.501048, mean_q: 0.709759, mean_eps: 0.168294


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 32415/100000: episode: 56, duration: 112.639s, episode steps: 506, steps per second:   4, episode reward: 10.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.555 [0.000, 3.000],  loss: 0.006587, mae: 0.503859, mean_q: 0.714429, mean_eps: 0.167839


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 32951/100000: episode: 57, duration: 118.215s, episode steps: 536, steps per second:   5, episode reward: 11.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.392 [0.000, 3.000],  loss: 0.006447, mae: 0.502670, mean_q: 0.713369, mean_eps: 0.167318


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 33103/100000: episode: 58, duration: 34.017s, episode steps: 152, steps per second:   4, episode reward:  1.000, mean reward:  0.007 [ 0.000,  1.000], mean action: 1.283 [0.000, 3.000],  loss: 0.006575, mae: 0.498473, mean_q: 0.709657, mean_eps: 0.166974


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 33692/100000: episode: 59, duration: 134.753s, episode steps: 589, steps per second:   4, episode reward: 11.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.433 [0.000, 3.000],  loss: 0.006221, mae: 0.510690, mean_q: 0.725189, mean_eps: 0.166603


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 34115/100000: episode: 60, duration: 96.507s, episode steps: 423, steps per second:   4, episode reward:  9.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.532 [0.000, 3.000],  loss: 0.006485, mae: 0.505676, mean_q: 0.719855, mean_eps: 0.166097


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 34467/100000: episode: 61, duration: 77.448s, episode steps: 352, steps per second:   5, episode reward:  6.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 1.415 [0.000, 3.000],  loss: 0.006039, mae: 0.502780, mean_q: 0.716523, mean_eps: 0.165710


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 34936/100000: episode: 62, duration: 106.967s, episode steps: 469, steps per second:   4, episode reward:  8.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 1.678 [0.000, 3.000],  loss: 0.006913, mae: 0.508384, mean_q: 0.725822, mean_eps: 0.165299


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 35358/100000: episode: 63, duration: 94.307s, episode steps: 422, steps per second:   4, episode reward:  7.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 1.637 [0.000, 3.000],  loss: 0.005873, mae: 0.502377, mean_q: 0.717804, mean_eps: 0.164854


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 36052/100000: episode: 64, duration: 155.895s, episode steps: 694, steps per second:   4, episode reward: 18.000, mean reward:  0.026 [ 0.000,  1.000], mean action: 1.782 [0.000, 3.000],  loss: 0.006175, mae: 0.507298, mean_q: 0.724941, mean_eps: 0.164296


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 36540/100000: episode: 65, duration: 108.643s, episode steps: 488, steps per second:   4, episode reward:  9.000, mean reward:  0.018 [ 0.000,  1.000], mean action: 1.535 [0.000, 3.000],  loss: 0.006002, mae: 0.503723, mean_q: 0.717617, mean_eps: 0.163705


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 37071/100000: episode: 66, duration: 118.015s, episode steps: 531, steps per second:   4, episode reward: 11.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.482 [0.000, 3.000],  loss: 0.005351, mae: 0.502847, mean_q: 0.714997, mean_eps: 0.163195


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 37669/100000: episode: 67, duration: 132.942s, episode steps: 598, steps per second:   4, episode reward: 13.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.732 [0.000, 3.000],  loss: 0.006006, mae: 0.505548, mean_q: 0.722193, mean_eps: 0.162631


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 38169/100000: episode: 68, duration: 118.541s, episode steps: 500, steps per second:   4, episode reward: 11.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.838 [0.000, 3.000],  loss: 0.005163, mae: 0.504000, mean_q: 0.719172, mean_eps: 0.162082


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 38738/100000: episode: 69, duration: 137.349s, episode steps: 569, steps per second:   4, episode reward: 12.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.717 [0.000, 3.000],  loss: 0.005208, mae: 0.505850, mean_q: 0.722039, mean_eps: 0.161547


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 39301/100000: episode: 70, duration: 131.153s, episode steps: 563, steps per second:   4, episode reward: 12.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.799 [0.000, 3.000],  loss: 0.005027, mae: 0.503429, mean_q: 0.717787, mean_eps: 0.160981


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 39694/100000: episode: 71, duration: 87.453s, episode steps: 393, steps per second:   4, episode reward:  6.000, mean reward:  0.015 [ 0.000,  1.000], mean action: 1.618 [0.000, 3.000],  loss: 0.005496, mae: 0.506486, mean_q: 0.722067, mean_eps: 0.160503


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 40081/100000: episode: 72, duration: 86.028s, episode steps: 387, steps per second:   4, episode reward:  8.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.313 [0.000, 3.000],  loss: 0.006227, mae: 0.515471, mean_q: 0.734251, mean_eps: 0.160113


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 40565/100000: episode: 73, duration: 107.759s, episode steps: 484, steps per second:   4, episode reward:  9.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.583 [0.000, 3.000],  loss: 0.006358, mae: 0.564113, mean_q: 0.791314, mean_eps: 0.159677


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 41092/100000: episode: 74, duration: 116.775s, episode steps: 527, steps per second:   5, episode reward: 11.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.759 [0.000, 3.000],  loss: 0.005921, mae: 0.558688, mean_q: 0.789523, mean_eps: 0.159172


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 41627/100000: episode: 75, duration: 120.540s, episode steps: 535, steps per second:   4, episode reward: 11.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.703 [0.000, 3.000],  loss: 0.005490, mae: 0.558572, mean_q: 0.790453, mean_eps: 0.158641


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 41904/100000: episode: 76, duration: 62.737s, episode steps: 277, steps per second:   4, episode reward:  4.000, mean reward:  0.014 [ 0.000,  1.000], mean action: 1.585 [0.000, 3.000],  loss: 0.005908, mae: 0.561219, mean_q: 0.795576, mean_eps: 0.158235


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 42259/100000: episode: 77, duration: 79.764s, episode steps: 355, steps per second:   4, episode reward:  6.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 1.549 [0.000, 3.000],  loss: 0.005190, mae: 0.552264, mean_q: 0.783839, mean_eps: 0.157919


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 42536/100000: episode: 78, duration: 62.412s, episode steps: 277, steps per second:   4, episode reward:  4.000, mean reward:  0.014 [ 0.000,  1.000], mean action: 1.599 [0.000, 3.000],  loss: 0.005128, mae: 0.557009, mean_q: 0.788761, mean_eps: 0.157603


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 42979/100000: episode: 79, duration: 100.038s, episode steps: 443, steps per second:   4, episode reward:  8.000, mean reward:  0.018 [ 0.000,  1.000], mean action: 1.634 [0.000, 3.000],  loss: 0.005440, mae: 0.555810, mean_q: 0.786271, mean_eps: 0.157243


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 43855/100000: episode: 80, duration: 193.583s, episode steps: 876, steps per second:   5, episode reward: 21.000, mean reward:  0.024 [ 0.000,  1.000], mean action: 1.517 [0.000, 3.000],  loss: 0.005514, mae: 0.555272, mean_q: 0.787801, mean_eps: 0.156584


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 44209/100000: episode: 81, duration: 81.689s, episode steps: 354, steps per second:   4, episode reward:  6.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 1.520 [0.000, 3.000],  loss: 0.004747, mae: 0.558554, mean_q: 0.793898, mean_eps: 0.155969


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 44749/100000: episode: 82, duration: 120.089s, episode steps: 540, steps per second:   4, episode reward: 11.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.559 [0.000, 3.000],  loss: 0.005308, mae: 0.552183, mean_q: 0.784360, mean_eps: 0.155522


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 45451/100000: episode: 83, duration: 155.518s, episode steps: 702, steps per second:   5, episode reward: 15.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.701 [0.000, 3.000],  loss: 0.004965, mae: 0.553144, mean_q: 0.785638, mean_eps: 0.154901


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 46094/100000: episode: 84, duration: 143.654s, episode steps: 643, steps per second:   4, episode reward: 13.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.708 [0.000, 3.000],  loss: 0.004878, mae: 0.556841, mean_q: 0.788612, mean_eps: 0.154228


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 46574/100000: episode: 85, duration: 105.983s, episode steps: 480, steps per second:   5, episode reward:  9.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.627 [0.000, 3.000],  loss: 0.005202, mae: 0.557442, mean_q: 0.792328, mean_eps: 0.153667


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 46930/100000: episode: 86, duration: 78.415s, episode steps: 356, steps per second:   5, episode reward:  6.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 1.654 [0.000, 3.000],  loss: 0.004995, mae: 0.557314, mean_q: 0.791953, mean_eps: 0.153249


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 47303/100000: episode: 87, duration: 83.426s, episode steps: 373, steps per second:   4, episode reward:  6.000, mean reward:  0.016 [ 0.000,  1.000], mean action: 1.678 [0.000, 3.000],  loss: 0.004952, mae: 0.554518, mean_q: 0.788055, mean_eps: 0.152884


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 47753/100000: episode: 88, duration: 101.592s, episode steps: 450, steps per second:   4, episode reward:  9.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.771 [0.000, 3.000],  loss: 0.004451, mae: 0.554454, mean_q: 0.788457, mean_eps: 0.152473


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 48151/100000: episode: 89, duration: 89.449s, episode steps: 398, steps per second:   4, episode reward:  6.000, mean reward:  0.015 [ 0.000,  1.000], mean action: 1.663 [0.000, 3.000],  loss: 0.004904, mae: 0.554509, mean_q: 0.787415, mean_eps: 0.152049


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 48781/100000: episode: 90, duration: 141.147s, episode steps: 630, steps per second:   4, episode reward: 14.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.819 [0.000, 3.000],  loss: 0.004993, mae: 0.555365, mean_q: 0.790261, mean_eps: 0.151534


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 49342/100000: episode: 91, duration: 125.771s, episode steps: 561, steps per second:   4, episode reward: 12.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.380 [0.000, 3.000],  loss: 0.005115, mae: 0.557178, mean_q: 0.794069, mean_eps: 0.150939


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 49720/100000: episode: 92, duration: 85.453s, episode steps: 378, steps per second:   4, episode reward:  7.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.733 [0.000, 3.000],  loss: 0.004601, mae: 0.556822, mean_q: 0.791768, mean_eps: 0.150470


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 50179/100000: episode: 93, duration: 102.866s, episode steps: 459, steps per second:   4, episode reward:  8.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 1.634 [0.000, 3.000],  loss: 0.005484, mae: 0.577777, mean_q: 0.812788, mean_eps: 0.150051


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 50704/100000: episode: 94, duration: 117.027s, episode steps: 525, steps per second:   4, episode reward: 10.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.680 [0.000, 3.000],  loss: 0.004778, mae: 0.610138, mean_q: 0.846923, mean_eps: 0.149559


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 50976/100000: episode: 95, duration: 61.551s, episode steps: 272, steps per second:   4, episode reward:  5.000, mean reward:  0.018 [ 0.000,  1.000], mean action: 1.757 [0.000, 3.000],  loss: 0.004749, mae: 0.606643, mean_q: 0.839826, mean_eps: 0.149161


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 51635/100000: episode: 96, duration: 146.581s, episode steps: 659, steps per second:   4, episode reward: 14.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.599 [0.000, 3.000],  loss: 0.004487, mae: 0.608318, mean_q: 0.844313, mean_eps: 0.148695


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 52312/100000: episode: 97, duration: 150.388s, episode steps: 677, steps per second:   5, episode reward: 14.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.919 [0.000, 3.000],  loss: 0.004233, mae: 0.607508, mean_q: 0.841767, mean_eps: 0.148027


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 52972/100000: episode: 98, duration: 148.959s, episode steps: 660, steps per second:   4, episode reward: 14.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.920 [0.000, 3.000],  loss: 0.003859, mae: 0.607311, mean_q: 0.841027, mean_eps: 0.147359


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 53444/100000: episode: 99, duration: 109.853s, episode steps: 472, steps per second:   4, episode reward:  9.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.860 [0.000, 3.000],  loss: 0.003948, mae: 0.601405, mean_q: 0.830526, mean_eps: 0.146793


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 53902/100000: episode: 100, duration: 104.954s, episode steps: 458, steps per second:   4, episode reward:  8.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 1.803 [0.000, 3.000],  loss: 0.003920, mae: 0.605283, mean_q: 0.837356, mean_eps: 0.146328


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 54518/100000: episode: 101, duration: 147.290s, episode steps: 616, steps per second:   4, episode reward: 14.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 1.779 [0.000, 3.000],  loss: 0.003935, mae: 0.603080, mean_q: 0.834526, mean_eps: 0.145791


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 54821/100000: episode: 102, duration: 73.849s, episode steps: 303, steps per second:   4, episode reward:  6.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.815 [0.000, 3.000],  loss: 0.003919, mae: 0.605932, mean_q: 0.837691, mean_eps: 0.145331


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 55446/100000: episode: 103, duration: 144.067s, episode steps: 625, steps per second:   4, episode reward: 15.000, mean reward:  0.024 [ 0.000,  1.000], mean action: 1.710 [0.000, 3.000],  loss: 0.003965, mae: 0.607943, mean_q: 0.839932, mean_eps: 0.144867


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 55962/100000: episode: 104, duration: 115.975s, episode steps: 516, steps per second:   4, episode reward: 10.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.641 [0.000, 3.000],  loss: 0.003859, mae: 0.610339, mean_q: 0.842936, mean_eps: 0.144297


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 56369/100000: episode: 105, duration: 92.454s, episode steps: 407, steps per second:   4, episode reward:  7.000, mean reward:  0.017 [ 0.000,  1.000], mean action: 1.899 [0.000, 3.000],  loss: 0.003988, mae: 0.603973, mean_q: 0.836302, mean_eps: 0.143835


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 56972/100000: episode: 106, duration: 134.103s, episode steps: 603, steps per second:   4, episode reward: 13.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.753 [0.000, 3.000],  loss: 0.003895, mae: 0.607213, mean_q: 0.839264, mean_eps: 0.143330


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 57499/100000: episode: 107, duration: 118.019s, episode steps: 527, steps per second:   4, episode reward: 12.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 1.740 [0.000, 3.000],  loss: 0.004021, mae: 0.606411, mean_q: 0.839391, mean_eps: 0.142765


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 57727/100000: episode: 108, duration: 53.345s, episode steps: 228, steps per second:   4, episode reward:  3.000, mean reward:  0.013 [ 0.000,  1.000], mean action: 1.566 [0.000, 3.000],  loss: 0.003437, mae: 0.603117, mean_q: 0.835459, mean_eps: 0.142388


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 58336/100000: episode: 109, duration: 134.525s, episode steps: 609, steps per second:   5, episode reward: 14.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 1.824 [0.000, 3.000],  loss: 0.003704, mae: 0.603887, mean_q: 0.835871, mean_eps: 0.141969


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 59016/100000: episode: 110, duration: 151.803s, episode steps: 680, steps per second:   4, episode reward: 16.000, mean reward:  0.024 [ 0.000,  1.000], mean action: 1.787 [0.000, 3.000],  loss: 0.003794, mae: 0.606760, mean_q: 0.838974, mean_eps: 0.141325


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 59624/100000: episode: 111, duration: 135.200s, episode steps: 608, steps per second:   4, episode reward: 14.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 1.794 [0.000, 3.000],  loss: 0.003812, mae: 0.606516, mean_q: 0.839502, mean_eps: 0.140681


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 59981/100000: episode: 112, duration: 79.401s, episode steps: 357, steps per second:   4, episode reward:  5.000, mean reward:  0.014 [ 0.000,  1.000], mean action: 1.630 [0.000, 3.000],  loss: 0.003557, mae: 0.602083, mean_q: 0.833895, mean_eps: 0.140198


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 60521/100000: episode: 113, duration: 121.526s, episode steps: 540, steps per second:   4, episode reward: 11.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.554 [0.000, 3.000],  loss: 0.004484, mae: 0.635779, mean_q: 0.871780, mean_eps: 0.139749


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 61249/100000: episode: 114, duration: 163.865s, episode steps: 728, steps per second:   4, episode reward: 17.000, mean reward:  0.023 [ 0.000,  1.000], mean action: 1.867 [0.000, 3.000],  loss: 0.003510, mae: 0.632872, mean_q: 0.866530, mean_eps: 0.139116


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 61929/100000: episode: 115, duration: 152.551s, episode steps: 680, steps per second:   4, episode reward: 15.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 2.006 [0.000, 3.000],  loss: 0.003873, mae: 0.636528, mean_q: 0.870131, mean_eps: 0.138412


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 62459/100000: episode: 116, duration: 119.277s, episode steps: 530, steps per second:   4, episode reward: 11.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.889 [0.000, 3.000],  loss: 0.003835, mae: 0.634815, mean_q: 0.867919, mean_eps: 0.137807


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 63014/100000: episode: 117, duration: 126.149s, episode steps: 555, steps per second:   4, episode reward: 11.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.535 [0.000, 3.000],  loss: 0.003419, mae: 0.632811, mean_q: 0.866214, mean_eps: 0.137264


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 63794/100000: episode: 118, duration: 176.991s, episode steps: 780, steps per second:   4, episode reward: 15.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.812 [0.000, 3.000],  loss: 0.003578, mae: 0.633519, mean_q: 0.866667, mean_eps: 0.136597


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 64302/100000: episode: 119, duration: 115.760s, episode steps: 508, steps per second:   4, episode reward: 10.000, mean reward:  0.020 [ 0.000,  1.000], mean action: 1.648 [0.000, 3.000],  loss: 0.003524, mae: 0.632538, mean_q: 0.865550, mean_eps: 0.135953


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 64934/100000: episode: 120, duration: 143.518s, episode steps: 632, steps per second:   4, episode reward: 15.000, mean reward:  0.024 [ 0.000,  1.000], mean action: 1.948 [0.000, 3.000],  loss: 0.003438, mae: 0.633404, mean_q: 0.867875, mean_eps: 0.135383


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 65672/100000: episode: 121, duration: 168.210s, episode steps: 738, steps per second:   4, episode reward: 18.000, mean reward:  0.024 [ 0.000,  1.000], mean action: 1.875 [0.000, 3.000],  loss: 0.003375, mae: 0.634806, mean_q: 0.868747, mean_eps: 0.134698


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 66145/100000: episode: 122, duration: 107.330s, episode steps: 473, steps per second:   4, episode reward:  9.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.727 [0.000, 3.000],  loss: 0.003374, mae: 0.635783, mean_q: 0.871289, mean_eps: 0.134092


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 66780/100000: episode: 123, duration: 142.313s, episode steps: 635, steps per second:   4, episode reward: 14.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.696 [0.000, 3.000],  loss: 0.003220, mae: 0.635717, mean_q: 0.869955, mean_eps: 0.133538


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 67215/100000: episode: 124, duration: 98.202s, episode steps: 435, steps per second:   4, episode reward:  8.000, mean reward:  0.018 [ 0.000,  1.000], mean action: 1.754 [0.000, 3.000],  loss: 0.003244, mae: 0.636356, mean_q: 0.871381, mean_eps: 0.133003


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 67763/100000: episode: 125, duration: 122.737s, episode steps: 548, steps per second:   4, episode reward: 12.000, mean reward:  0.022 [ 0.000,  1.000], mean action: 1.821 [0.000, 3.000],  loss: 0.003345, mae: 0.634682, mean_q: 0.868602, mean_eps: 0.132512


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 68348/100000: episode: 126, duration: 130.958s, episode steps: 585, steps per second:   4, episode reward: 12.000, mean reward:  0.021 [ 0.000,  1.000], mean action: 1.771 [0.000, 3.000],  loss: 0.003432, mae: 0.633609, mean_q: 0.868071, mean_eps: 0.131945


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


 69071/100000: episode: 127, duration: 162.618s, episode steps: 723, steps per second:   4, episode reward: 14.000, mean reward:  0.019 [ 0.000,  1.000], mean action: 1.860 [0.000, 3.000],  loss: 0.003233, mae: 0.634627, mean_q: 0.869280, mean_eps: 0.131291


  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  logger.deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


In [None]:
new_weights_file = '/content/drive/MyDrive/dqn/BetaDDQN/dqn_new_weights.h5'
dqn.save_weights(new_weights_file, overwrite=True)

### Play Videos

In [None]:
from IPython.display import HTML
from base64 import b64encode

def play_video(video_path):
    """
    workaround to display a video in colab

    Args:
        video_path: Path to the video file.
    """
    mp4 = open(video_path,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML("""
    <video width=400 controls>
        <source src="%s" type="video/mp4">
    </video>
    """ % data_url)

video_path = '/content/drive/MyDrive/dqn/BetaDDQN/training_videos/rl-video-episode-0.mp4'
play_video(video_path)

In [None]:
import os
from IPython.display import HTML, display
from base64 import b64encode

def play_videos_in_sequence(folder_path, speed=1.0):
    """
    play videos in sequence from a specified folder in google drive

    Args:
        folder_path: Path to the folder containing video files.
        speed: Playback speed (1.0 is normal speed, >1.0 is faster, <1.0 is slower).
    """
    # list all video files in the folder
    video_files = sorted([file for file in os.listdir(folder_path) if file.endswith('.mp4')])

    # display each video
    for video_file in video_files:
        video_path = os.path.join(folder_path, video_file)
        mp4 = open(video_path, 'rb').read()
        data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
        html = HTML("""
        <video width=400 controls autoplay loop>
            <source src="%s" type="video/mp4">
        </video>
        """ % data_url)
        display(html)

video_folder = '/content/drive/MyDrive/dqn/BetaDDQN/training_videos'
play_videos_in_sequence(video_folder, speed=1.5)  # Adjust speed as needed