# Playing Atari Games Using DQN

In [1]:
import tensorflow as tf
print(tf.__version__)

2.0.0


In [2]:
import random
import gym
import numpy as np
from collections import deque

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam

In [33]:
env = gym.make("MsPacman-v0")

In [34]:
state_size = (88, 80, 1)

In [35]:
action_size = env.action_space.n

In [36]:
action_size

9

## Preprocess the Game Screen

In [37]:
color = np.array([210, 164, 74]).mean()

In [52]:
def preprocess_state(state):
    
    # crop and resize the image
    image = state[1:176:2, ::2]
    
    # convert the image to greyscale
    image = image.mean(axis=2)
    
    # improve image contrast
    image[image==color] =0
    
    # normalize the image
    image = (image - 128) / 128-1
    
    # reshape the image
    image = np.expand_dims(image.reshape(88, 80, 1), axis=0)
    
    return image

In [53]:
class DQN:
    def __init__(self, state_size, action_size):
        
        # define the state size
        self.state_size = state_size
        
        # define the action size
        self.action_size = action_size
        
        # define the replay buffer
        self.reply_buffer = deque(maxlen=5000)
        
        # define the discount factor
        self.gama = 0.9
        
        # define the epsilon value
        self.epsilon = 0.8
        
        # define the update rate at which we want to update the target network
        self.update_rate = 1000
        
        # define the main network
        self.main_network = self.build_network()
        
        # define the target network
        self.target_network = self.build_network()
        
        # copy the weights of the main network to the target network
        self.target_network.set_weights(self.main_network.get_weights())
        
        
    # let's define a function called build_network which is essentialy your DQN
    
    def build_network(self):
        model = Sequential()
        model.add(Conv2D(32, (8, 8), strides=4, padding='same', input_shape=self.state_size))
        model.add(Activation('relu'))
        
        model.add(Conv2D(64, (4, 4), strides=2, padding='same'))
        model.add(Activation('relu'))
                  
        model.add(Conv2D(64, (3, 3), strides=1, padding='same'))
        model.add(Activation('relu'))
        model.add(Flatten())
        
        model.add(Dense(512, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
                  
        model.compile(loss='mse', optimizer=Adam())
                  
        return model
                  
        
    # we learned that in DQN, to take care of exploration-exploitation trade off, we select action
    # using the epsilon-greedy policy. So, now we define the function called epsilon-greedy 
    # for selecting action using the epsilon-greedy policy.
                  
    def epsilon_greedy(self, state):
        if random.uniform(0, 1) < self.epsilon:
              return np.random.randint(self.action_size)
                  
        Q_values = self.main_network.predict(state)
                  
        return np.argmax(Q_value[0])\
                  
    # train the network
                  
    def train(self, batch_size):
                  
        # sample a mini batch of transition from the replay buffer
        minibatch = random.sample(self.replay_buffer, batch_size)
                  
        # compute the Q value using the target network
        for state, action, reward, next_state, done in minibatch:
            if not done:
                  target_Q = (reward, self.gamma * np.amax(self.target_network.predict(next_state)))
            else:
                  target_Q = reward
                
            # compute the Q value using the main nework
            Q_values = self.main_network.predict(state)
                  
            Q_values[0][action] = target_Q
                  
            # train the main network
            self.main_network.fit(state, Q_values, epochs=1, verbose=0)
                  
            
    # update the target network weights by coping from the main network
    def update_target_network(self):
        self.target_network.set_weights(self.main_network.get_weights())
            
                  
            

## Training the Network

In [54]:
num_episodes = 500

In [55]:
num_timesteps = 20000

In [56]:
batch_size = 8

In [57]:
num_screens = 4

In [58]:
dqn = DQN(state_size, action_size)

In [63]:
done = False 
time_step = 0
for i in range(num_episodes):
    
    # set return to 0
    Return = 0
    
    # preprocess the game screen
    state = preprocess_state(env.reset())
    
    # for each step in the episode
    for t in range(num_timesteps):
        
        # render the environment 
        env.render()
        
        # update the timestep
        time_step = time_step + 1
        
        # update the target network
        if time_step % dqn.update_rate == 0:
            dqn.update_target_network
        
        # select the action
        action = dqn.epsilon_greedy(state)
        
        # perform the selected action
        next_state, reward, done, _ = evn.step(action)
        
        # preprocess the next state
        next_state = preprocess_state(next_state)
        
        # store the transition information
        dqn.store_transition(state, action, reward, next_state, done)
        
        # update current state to the next state
        state = next_state
        
        # update the return
        Return = Return + reward
        
        # if the episode is done then print the return 
        if done:
            print('Episode :', i, ', ', 'Return', Return )
            break
            
        # if the number of transition in the replay buffer is greater than batch size
        # then train the network
        if len(dqn.replay_buffer) > batch_size:
            dqn.train(batch_size)
        
            
        
        
        

MissingFunctionException: glCreateShader is not exported by the available OpenGL driver.  VERSION_2_0 is required for this functionality.