In [1]:
# -*- coding: utf-8 -*-
"""
Created on Sat Jul  3 15:32:19 2021

@author: Matheus
"""

import gym
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten
from keras.optimizers import Adam
import random

#hyperparameters
alpha = 0.1
gamma = 1
epsilon = 0.99
epsilon_decay = 0.99

class DQN:
    def __init__(self, action_space):
        #hyperparameters
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        
        #experience replay
        self.memory = []
        self.batch_size = 32
        
        #AI model
        self.model = Sequential()
        self.model.add(Conv2D(32, (8,8), strides=(4,4), input_shape=(210,160,3)))
        self.model.add(Conv2D(64, (4,4), strides=(2,2), activation='relu'))
        self.model.add(Conv2D(64, (3,3), activation='relu'))
        self.model.add(Flatten())
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dense(256, activation='relu'))
        self.model.add(Dense(action_space, activation='linear'))
        self.model.compile(loss='mse', optimizer=Adam(learning_rate=self.alpha))
        
    def update(self):
        if len(self.memory) < self.batch_size:
            return
        
        #train the model
        mem_sample = random.sample(self.memory, self.batch_size)
        for state, action, reward, next_state, done in mem_sample:
            q = self.model.predict(state)
            q_update = self.alpha * ( reward + self.gamma * np.max(self.model.predict(next_state)) )
            q[0, action] = q_update
            self.model.fit(state, q, verbose=0)
        
        #decay epsilon
        self.epsilon *= epsilon_decay
        
    def memory_add(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        
        #clear memory
        if len(self.memory) > 512:
            pct = int(len(self.memory) * 0.4)
            self.memory = self.memory[pct:]
        
    def choose_action(self, state, env):
        if random.random() < self.epsilon:
            #exploration
            return env.action_space.sample()
        #exploitation
        return np.argmax(self.model.predict(state))
    
    def save(self):
        self.model.save_weights('weights.h5')
        
    def load(self):
        self.model.load_weights('weights.h5')
        
        
def get_state(state):
    state = np.array(state) 
    state = state / 255
    state = state.reshape(-1, 210, 160, 3)
    return state
        
def game():
    #make environment and model
    env = gym.make('Breakout-v0')
    dqn = DQN(env.action_space.n)
    
    #start loop and train
    for episode in range(10000):
        state = env.reset()
        score = 0
        dqn.load()
        done = False
        
        while not done:
            #sanitize state img
            state = get_state(state)
            env.render()
            
            #get random or best action
            action = dqn.choose_action(state, env)
            
            #get next_state afteraction
            next_state, reward, done, info = env.step(action)
            score += reward
            
            #save
            dqn.memory_add(state, action, reward, get_state(next_state), done)
            
            #skip 4 frames
            for _ in range(4):
                next_state, reward, done, info = env.step(0)
                score += reward
            
            #go to next_state
            state = next_state
            
        #train ai and show score
        dqn.update()
        dqn.save()
        print('Episode', episode, 'score', score)
        
    #test ai
    state = env.reset()
    done = False
    env.render()
    while not done:
        state = get_state(state)
        env.render()
        action = dqn.choose_action(state)
        next_state, reward, done, info = env.step(action)
        state = get_state(next_state)
        

In [None]:
game()

Episode 0 score 4.0
Episode 1 score 2.0
Episode 2 score 1.0
Episode 3 score 0.0
Episode 4 score 0.0
Episode 5 score 1.0
Episode 6 score 1.0
Episode 7 score 0.0
Episode 8 score 1.0
Episode 9 score 5.0
Episode 10 score 0.0
Episode 11 score 1.0
Episode 12 score 2.0
Episode 13 score 0.0
Episode 14 score 1.0
Episode 15 score 1.0
Episode 16 score 3.0
Episode 17 score 1.0
Episode 18 score 1.0
Episode 19 score 2.0
Episode 20 score 0.0
Episode 21 score 0.0
Episode 22 score 0.0
Episode 23 score 0.0
Episode 24 score 2.0
Episode 25 score 2.0
Episode 26 score 2.0
Episode 27 score 1.0
Episode 28 score 1.0
Episode 29 score 2.0
Episode 30 score 2.0
Episode 31 score 1.0
Episode 32 score 2.0
Episode 33 score 2.0
Episode 34 score 0.0
Episode 35 score 5.0
Episode 36 score 0.0
Episode 37 score 3.0
Episode 38 score 2.0
Episode 39 score 1.0
Episode 40 score 2.0
Episode 41 score 3.0
Episode 42 score 2.0
Episode 43 score 2.0
Episode 44 score 0.0
Episode 45 score 0.0
Episode 46 score 1.0
Episode 47 score 4.0
Ep