<a href="https://colab.research.google.com/github/melchilegion/Qwasar.io/blob/main/atari_games.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **SETTING UP THE ENVIRONMENT**

In [1]:
!pip install gym[atari] tensorflow keras opencv-python

Collecting ale-py~=0.7.5 (from gym[atari])
  Downloading ale_py-0.7.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Downloading ale_py-0.7.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ale-py
Successfully installed ale-py-0.7.5


# **IMPORT LIBRARIES**

In [2]:
import gym
import numpy as np
import random
import matplotlib.pyplot as plt
from collections import deque
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

  from jax import xla_computation as _xla_computation


# **BUILDING THE DQN MODELS**

In [3]:
def build_model(state_size, action_size):
    model = keras.Sequential()
    model.add(layers.Dense(24, input_dim=state_size, activation='relu'))
    model.add(layers.Dense(24, activation='relu'))
    model.add(layers.Dense(action_size, activation='linear'))
    model.compile(loss='mse', optimizer=keras.optimizers.Adam(learning_rate=0.001))
    return model

# **IMPLEMENTING THE DQN ALGORITHM**

In [4]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = build_model(state_size, action_size)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# **TRAINING THE MODEL FOR EACH GAME**

# **CartPole**

In [5]:
def train_cartpole():
    env = gym.make('CartPole-v1')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    episodes = 1000
    batch_size = 32

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        for time in range(500):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                print(f"Episode: {e}/{episodes}, score: {time}, e: {agent.epsilon:.2}")
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

# **Space Invaders**

In [6]:
def train_space_invaders():
    env = gym.make('SpaceInvaders-v0')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    episodes = 1000
    batch_size = 32

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        for time in range(500):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                print(f"Episode: {e}/{episodes}, score: {time}, e: {agent.epsilon:.2}")
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

# **Pacman**

In [7]:
def train_pacman():
    env = gym.make('MsPacman-v0')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    episodes = 1000
    batch_size = 32

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        for time in range(500):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                print(f"Episode: {e}/{episodes}, score: {time}, e: {agent.epsilon:.2}")
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

In [8]:
def train_cartpole():
    env = gym.make('CartPole-v0')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size) # Make sure DQNAgent is defined
    episodes = 1000
    batch_size = 32
    scores = []

    for e in range(episodes):
        state = env.reset()

        # If state is a tuple, try reshaping only the first element
        if isinstance(state, tuple):
            state = np.reshape(state[0], [1, state_size])
        else:
            state = np.reshape(state, [1, state_size])

        score = 0
        for time in range(500):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10

            if isinstance(next_state, tuple):
                next_state = np.reshape(next_state[0], [1, state_size])
            else:
                next_state = np.reshape(next_state, [1, state_size])

            agent.remember(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                print(f"Episode: {e}/{episodes}, score: {score}, e: {agent.epsilon:.2}")
                scores.append(score)
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

    return scores

In [9]:
def train_space_invaders():
    env = gym.make('SpaceInvaders-v0')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size) # Make sure DQNAgent is defined
    episodes = 1000
    batch_size = 32
    scores = []

    for e in range(episodes):
        state = env.reset()

        # If state is a tuple, try reshaping only the first element
        if isinstance(state, tuple):
            state = np.reshape(state[0], [1, state_size])
        else:
            state = np.reshape(state, [1, state_size])

        score = 0
        for time in range(500):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10

            if isinstance(next_state, tuple):
                next_state = np.reshape(next_state[0], [1, state_size])
            else:
                next_state = np.reshape(next_state, [1, state_size])

            agent.remember(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                print(f"Episode: {e}/{episodes}, score: {score}, e: {agent.epsilon:.2}")
                scores.append(score)
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

    return scores

In [10]:
def train_pacman():
    env = gym.make('MsPacman-v0')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size) # Make sure DQNAgent is defined
    episodes = 1000
    batch_size = 32
    scores = []

    for e in range(episodes):
        state = env.reset()

        # If state is a tuple, try reshaping only the first element
        if isinstance(state, tuple):
            state = np.reshape(state[0], [1, state_size])
        else:
            state = np.reshape(state, [1, state_size])

        score = 0
        for time in range(500):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10

            if isinstance(next_state, tuple):
                next_state = np.reshape(next_state[0], [1, state_size])
            else:
                next_state = np.reshape(next_state, [1, state_size])

            agent.remember(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done:
                print(f"Episode: {e}/{episodes}, score: {score}, e: {agent.epsilon:.2}")
                scores.append(score)
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

    return scores

# **VISUALIZING RESULTS**

In [None]:
def plot_scores(scores, title):
    plt.plot(scores)
    plt.title(title)
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.show()
    plt.savefig(f"{title}.png")
    plt.close()
    return scores

if __name__ == "__main__":
    scores = train_cartpole()
    plot_scores(scores, "CartPole")
    scores = train_space_invaders()
    plot_scores(scores, "SpaceInvaders")
    scores = train_pacman()
    plot_scores(scores, "Pacman")
    plt.plot(scores)
    plt.title("Pacman")
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.savefig("Pacman.png")
    plt.close()
    plt.plot(scores)
    plt.title("SpaceInvaders")
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.savefig("SpaceInvaders.png")
    plt.close()
    plt.plot(scores)
    plt.title("CartPole")
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.savefig("CartPole.png")
    plt.close()
    plt.plot(scores)
    plt.title("Pacman")
    plt.xlabel('Episode')
    plt.ylabel('Score')
    plt.show(scores)

  logger.warn(
  deprecation(
  deprecation(
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  if not isinstance(terminated, (bool, np.bool8)):


Episode: 0/1000, score: 10.0, e: 1.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━