In [1]:
from sumTree import Memory
from DQN import DQNAgent
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import datetime
import tensorflow as tf
import numpy as np
import gymnasium as gym

In [2]:
def preprocess_observation(obs):
    # Convert RGB to grayscale
    if len(obs.shape) == 3 and obs.shape[-1] == 3:  # RGB input
        obs = np.dot(obs[..., :3], [0.2989, 0.5870, 0.1140])  # Grayscale
    # Normalize to [0, 1]
    obs = obs / 255.0
    # Flatten if necessary
    obs = obs.flatten()
    return obs

In [None]:
# Create TensorBoard callback
log_dir = "logs/dqn_carRacing" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# alada logs/dqn or Log/ddqn
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)


checkpoint_path = "checkpoints/dqn_carRacing_checkpoint_{epoch:02d}.weights.h5"
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True,  # Only save weights, not the full model
    save_best_only=False,
    save_freq='epoch',       # Save every epoch (you can change this)
    verbose=1
)

model_save_path = "saved_models/dqn_model_carRacing"


# Lists to store metrics for each episode
episode_rewards = []
episode_epsilons = []

# to write the tensorboard logs
writer = tf.summary.create_file_writer(log_dir)


env = gym.make("CarRacing-v2", render_mode="human", lap_complete_percent=0.95, domain_randomize=False, continuous=False)
state_size = 96 * 96
action_size = env.action_space.n

agent = DQNAgent(state_size, action_size)
scores = []
EPISODES = 2000
avg_window = 100

save_interval = 100

for e in range(EPISODES):
    state, _ = env.reset(seed=42, options={"randomize": True})
    # print(f'state : {state}')
    state = preprocess_observation(state)
    state = np.reshape(state, [1, state_size])
    done = False
    time = 0
    total_reward = 0

    while not done:
        time += 1
        action = agent.act(state)
        next_state, reward, done, trauncated, _ = env.step(action)
        # reward = reward if not done else -10
        next_state = preprocess_observation(next_state)
        next_state = np.reshape(next_state, [1, state_size])
        agent.memorize(state, action, reward, next_state, done)
        state = next_state

        total_reward += reward

        if done or trauncated:
            scores.append(total_reward)
            if e % 100 == 0 and e > 1:
                print("episode: {}/{}, Score Mean: {} / Median: {} ".format(e, EPISODES, int(np.mean(scores)), int(np.median(scores))))
                print("Beta {:.5f} / Eps: {:.5f}".format(agent.memory.beta, agent.epsilon))
            # scores.append(time)
    if agent.memory.tree.n_entries > 1000:
        agent.replay()

    # Save manually if desired or use replay
    if e % save_interval == 0:
        agent.model.save(f"{model_save_path}_episode_{e}.h5")

    # Use the callback for automated saving
    if e == 0:  # First save
        checkpoint_callback.set_model(agent.model)

    with writer.as_default():
        tf.summary.scalar('Total Reward', total_reward, step=e)
        tf.summary.scalar('Epsilon', agent.epsilon, step=e)

        # Compute and log the average reward over the last 100 episodes
        if len(scores) >= avg_window:
            avg_reward = np.mean(scores[-avg_window:])
            tf.summary.scalar('Average Reward (last 100 episodes)', avg_reward, step=e)

env.close()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms

In [None]:
from tensorflow.keras.models import load_model
