In [None]:
#!pip install tensorflow
#!pip install keras
#!pip install gym
#!pip install gym_super_mario_bros

Importing all the libraries and dependencies

In [None]:
import random
import gym
import numpy as np
import tensorflow as tf
from tensorflow import keras
from collections import deque
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
from keras.optimizers import Adam
from keras import backend as K
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from nes_py.wrappers import BinarySpaceToDiscreteSpaceEnv
import cv2

Creating the Agent Class

In [None]:
EPISODES = 5000


#Creating the agent class
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.99    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.99
        self.learning_rate = 0.5
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

        #Defining a loss function       
    def _huber_loss(self, target, prediction):
        # sqrt(1+error^2)-1
        error = prediction - target
        return K.mean(K.sqrt(1+K.square(error))-1, axis=-1)

    # The Architecture
    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(48, activation='tanh'))
        model.add(Dense(72, activation='softmax'))
        model.add(Dense(96, activation='sigmoid'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss=self._huber_loss,
                      optimizer=Adam(lr=self.learning_rate))
        return model

    def update_target_model(self):
        # copy weights from model to target_model
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = self.model.predict(state)
            if done:
                target[0][action] = reward
            else:
                # a = self.model.predict(next_state)[0]
                t = self.target_model.predict(next_state)[0]
                target[0][action] = reward + self.gamma * np.amax(t)
                #target[0][action] = reward + self.gamma * t[np.argmax(t)]
            self.model.fit(state, target, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [None]:
if __name__ == "__main__":
    env = gym.make("SuperMarioBros-v0")
    env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT)
    state_size = env.observation_space.shape[1]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    # agent.load("./save/cartpole-ddqn.h5")
    done = False
    batch_size = 24

    for e in range(EPISODES):
        state = env.reset()
        state = cv2.cvtColor(state, cv2.COLOR_BGR2GRAY)
        #state = np.reshape(state, [1, state_size])
        for time in range(250):
            env.render()
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            next_state = cv2.cvtColor(next_state, cv2.COLOR_BGR2GRAY)
            reward = reward if not done else -50
            #next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            agent.update_target_model()
            print("episode: {}/{}, score: {}, e: {:.2}, r: {}".format(e, EPISODES, time, agent.epsilon, reward))
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
        # if e % 10 == 0:
        #     agent.save("./save/cartpole-ddqn.h5")

In [None]:
env.observation_space.shape[1]

In [None]:
target[0][action]