In [None]:
import os
os.chdir('/content/drive/MyDrive/AI_Racing_Game')


In [None]:
import tensorflow
import numpy as np
import random
import pygame

from collections import deque

from tensorflow.keras.layers import Conv2D, Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, RMSprop, Adamax, Adagrad
from tensorflow.keras.losses import mse

from environment import Environment

pygame 2.6.1 (SDL 2.28.4, Python 3.11.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
class DQNAgent:
  def __init__(self, stateShape, actionSize):
    self.stateShape = stateShape
    self.actionSize = actionSize
    self.memory = deque(maxlen=10000)
    self.gamma = 0.99
    self.epsilon = 1.0    ### epsilon dustukce random hareket azalacak, bir bakima iyi bir sey. min -> 0.01 olabilir (ogrenmeye devam) erken dusmesi iyi mi ??? - arastir
    self.epsilonMin = 0.01
    self.epsilonDecay = 0.995  ### bununla carparak epsilonu dusuruyoruz.
    self.lr = 0.0005
    self.model = self._buildModel()
    self.targetModel = self._buildModel()
    self.updateTargetModel()

  def _buildModel(self):
    model = Sequential()
    model.add(Conv2D(16, (4,4), strides=2, padding="same", input_shape=self.stateShape, activation="relu"))
    model.add(Conv2D(32, (3,3), strides=1, padding="same", activation="relu"))
    model.add(Flatten())
    model.add(Dense(128, activation="relu"))
    model.add(Dense(self.actionSize, activation="linear"))

    model.compile(loss=mse, optimizer=Adam(learning_rate=self.lr))
    return model

  def updateTargetModel(self):
    self.targetModel.set_weights(self.model.get_weights())

  def remember(self, state, action, reward, nextState, done):
    self.memory.append((state, action, reward, nextState, done))

  def act(self, state):
    if np.random.rand() <= self.epsilon:
        return random.randrange(self.actionSize)
    act_values = self.model.predict(state[np.newaxis, ...])
    return np.argmax(act_values[0])

  def replay(self, batch_size):
    if len(self.memory) < batch_size:
        return

    minibatch = random.sample(self.memory, batch_size)

    states = np.array([t[0] for t in minibatch])
    actions = np.array([t[1] for t in minibatch])
    rewards = np.array([t[2] for t in minibatch])
    next_states = np.array([t[3] for t in minibatch])
    dones = np.array([t[4] for t in minibatch])

    targets = self.model.predict(states)
    next_q_values = self.targetModel.predict(next_states)

    for i in range(batch_size):
        if dones[i]:
            targets[i][actions[i]] = rewards[i]
        else:
            targets[i][actions[i]] = rewards[i] + self.gamma * np.amax(next_q_values[i])

    self.model.fit(states, targets, epochs=1, verbose=0)

    if self.epsilon > self.epsilonMin:
        self.epsilon *= self.epsilonDecay

  def load(self, name):
      self.model.load_weights(name)

  def save(self, name):
      self.model.save_weights(name)

In [None]:
def trainAgent():
    env = Environment()
    stateShape = env.observationSpace
    actionSize = env.actionSpace

    agent = DQNAgent(stateShape, actionSize)
    batchSize = 32
    episodes = 20

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, stateShape).astype(np.float32)
        totalReward = 0
        done = False

        while not done:
            action = agent.act(state)
            nextState, reward, done, info = env.step(action)
            nextState = np.reshape(nextState, stateShape).astype(np.float32)

            agent.remember(state, action, reward, nextState, done)
            state = nextState
            totalReward += reward

            if done:
                print(f"Episode: {e+1}/{episodes}, Score: {info['score']}, Epsilon: {agent.epsilon:.2f}")
                break

            if len(agent.memory) > batchSize:
                agent.replay(batchSize)

        if e % 10 == 0:
            agent.updateTargetModel()

        if e % 50 == 0:
            agent.save(f"dqn_model_{e}.weights.h5")

    env.close()

In [None]:
def testAgent():
    env = Environment()
    stateShape = env.observationSpace
    actionSize = env.actionSpace

    agent = DQNAgent(stateShape, actionSize)
    agent.load("dqn_model_0.weights.h5")
    agent.epsilon = 0.01

    state = env.reset()
    state = np.reshape(state, stateShape)
    done = False
    totalReward = 0

    while not done:
        action = agent.act(state)
        nextState, reward, done, info = env.step(action)
        nextState = np.reshape(nextState, stateShape)
        state = nextState
        totalReward += reward
        env.render()
        pygame.time.delay(50)

        if done:
            print(f"Test Score: {info['score']}")
            break

    env.close()

In [None]:
trainAgent()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36

In [None]:
testAgent()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2