In [1]:
import numpy as np
from collections import deque
import sys
import os

# Get the current notebook directory
notebook_dir = os.getcwd()

# Define the path to the src directory
src_dir = os.path.join(notebook_dir, '..', 'src')

# Add the src directory to the Python path
sys.path.append(src_dir)

import game
import dqn

import importlib

importlib.reload(game)
importlib.reload(dqn)

LEARNING_RATE = 1e-4
LEARNING_RATE_DECAY = 0.99
EXPLORATION_DECAY = 0.95
GAMMA = 0.975
UPDATE_TARGET_EVERY = 10

BATCH_SIZE = 128
EPISODES = 135

env = game.Environment()
agent = dqn.DQN(
    state_shape=env.ENVIRONMENT_SHAPE,
    action_size=env.ACTION_SPACE_SIZE,
    batch_size=BATCH_SIZE,
    learning_rate_max=LEARNING_RATE,
    learning_rate_decay=LEARNING_RATE_DECAY,
    exploration_decay=EXPLORATION_DECAY,
    gamma=GAMMA
)
# agent.save(f'models/-1.h5')
agent.load(f'models/130.h5')

state = env.reset()
state = np.expand_dims(state, axis=0)

most_recent_losses = deque(maxlen=BATCH_SIZE)

log = []

# fill up memory
while agent.memory.length() < BATCH_SIZE:
    action = agent.act(state)
    next_state, reward, done, score = env.step(action)
    next_state = np.expand_dims(next_state, axis=0)
    agent.remember(state, action, reward, next_state, done)
    state = next_state

for e in range(129, EPISODES):
    state = env.reset()
    state = np.expand_dims(state, axis=0)
    done = False
    step = 0
    ma_loss = None

    while not done:
        action = agent.act(state)
        next_state, reward, done, score = env.step(action)
        next_state = np.expand_dims(next_state, axis=0)
        agent.remember(state, action, reward, next_state, done)

        state = next_state
        step += 1

        loss = agent.replay(episode=e)
        most_recent_losses.append(loss)
        ma_loss = np.array(most_recent_losses).mean()

        if loss != None:
            print(f"Step: {step}. Score: {score}. -- Loss: {loss}", end="          \r")

        if done:
            print(f"Episode {e}/{EPISODES-1} completed with {step} steps. Score: {score:.0f}. LR: {agent.learning_rate:.6f}. EP: {agent.exploration_rate:.2f}. MA loss: {ma_loss:.6f}")
            break

    log.append([e, step, score, agent.learning_rate, agent.exploration_rate, ma_loss])

    agent.save(f'models/{e}.h5')

pygame 2.6.0 (SDL 2.28.4, Python 3.10.0)
Hello from the pygame community. https://www.pygame.org/contribute.html
Episode 129/134 completed with 393 steps. Score: 5. LR: 0.000027. EP: 0.01. MA loss: 0.002264
Episode 130/134 completed with 249 steps. Score: 5. LR: 0.000027. EP: 0.01. MA loss: 0.001832
Episode 131/134 completed with 393 steps. Score: 5. LR: 0.000027. EP: 0.01. MA loss: 0.001638
Episode 132/134 completed with 393 steps. Score: 5. LR: 0.000027. EP: 0.01. MA loss: 0.001541
Episode 133/134 completed with 249 steps. Score: 5. LR: 0.000026. EP: 0.01. MA loss: 0.001541
Episode 134/134 completed with 609 steps. Score: 5. LR: 0.000026. EP: 0.01. MA loss: 0.001576
