In [1]:
# getting in all the imports

# Standard libraries for util support
from __future__ import print_function, division
import os
import time
import random
import json
from collections import namedtuple

# libraris for processing and rendering 
import matplotlib
import matplotlib.pyplot as plt
import scipy.ndimage
from PIL import Image
from skimage.transform import resize
import numpy as np
%matplotlib inline

# Libraries for games and deep learning
import gym
import keras
# import tensorflow as tf



  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# util functions
def time_it(some_function):

    """
    Outputs the time a function takes
    to execute.
    """

    def wrapper():
        t1 = time.time()
        some_function()
        t2 = time.time()
        return "Time it took to run the function: " + str((t2 - t1)) + "\n"
    return wrapper

In [3]:
# iterators

# Eplsilon Greedy Iterator
def select_action(possible_actions, iteration, num_actions, num_episode, start_value = 1, end_value = 0.1, 
                        num_iterations = 5e6, decay = 0.1,repeat = False):
    
    coeff = ((end_value - start_value) / num_iterations)
#     weight = np.exp((-decay*iterarion)/num_iterations)
    weight = 1
    episode_decay = (1/(num_episode+1))
    
    if repeat:
        iteration %= num_iterations
        
    if iteration < num_iterations:
        epsilon = (iteration * coeff + start_value) * weight * episode_decay
    else:
        epsilon = end_value * weight * episode_decay
    
    if np.random.random() < epsilon:
        # Select a random action.
        action = np.random.randint(low=0, high=num_actions)
    else:
        # Otherwise select the action that has the highest Q-value.
        action = np.argmax(possible_actions)
#         print('Selected action {}'.format(action))

    return action, epsilon
    

In [4]:
#### Setting the working and environment variables

# Game to play with
env_name = 'Breakout-v0'

# Create the game-environment using OpenAI Gym.
env = gym.make(env_name)

# The number of possible actions that the agent may take in every step.
num_actions = env.action_space.n

# getting actions names
# action_names = env.unwrapped.get_action_meanings()


# Height of each image-frame in the state.
state_height = 105

# Width of each image-frame in the state.
state_width = 80

# Number of images in the state.
state_channels = 2

# Number of images in the state.
state_channels = 4

# Shape of the state-array.
state_shape = [state_height, state_width, state_channels]

# Set replay memory size based on RAM to be used
replay_ram_space = 4

# getting replay size based on RAM space available
rp_size = int(np.ceil((replay_ram_space*1024*1024)/(state_height*state_width*state_channels*100))*100000)


DependencyNotInstalled: No module named 'atari_py'. (HINT: you can install Atari dependencies by running 'pip install gym[atari]'.)

In [None]:
# Implementing Motion tracer for arbitary channels

def _pre_process_image(image,state_img_size):
    """Pre-process a raw image from the game-environment."""

    # Convert image to gray-scale.
    # Get the separate colour-channels.
    r, g, b = image[:, :, 0], image[:, :, 1], image[:, :, 2]

    # Convert to gray-scale using the Wikipedia formula.
    img = 0.2990 * r + 0.5870 * g + 0.1140 * b

    # Resize to the desired size using SciPy for convenience.
    img = resize(img, output_shape=state_img_size, order = 3, mode='wrap')

    return img


class MotionTracer:

    def __init__(self, image, decay=0.75, num_images=2, state_height=105, state_width = 80):

        
        # Size of each image in the state.
        self.state_img_size = np.array([state_height, state_width])
        self.state_shape = [state_height, state_width, num_images] 
        
        # Preprocessing the image
        img = _pre_process_image(image,self.state_img_size)
  
        # initialising image for first set and rest will be zeros
#         self.imageset[num_images-1] = img.astype(np.float) 
        self.last_input = img.astype(np.float)

        self.last_output = np.zeros_like(img)
        
        # Storint the inputs to class
        self.decay = decay
        self.num_images = num_images

    def process(self, image):

        img = _pre_process_image(image,self.state_img_size)
        img_dif = img - self.last_input
        
#         [for k in reversed(range(1,num_images-1))]
            
        # Copy the contents of the input-image to the last input.
        self.last_input[:] = img[:]

        img_motion = np.where(np.abs(img_dif) > 20, 255.0, 0.0)

        # Add some of the previous output. This recurrent formula
        # is what gives the trace / tail.
        output = img_motion + self.decay * self.last_output

        # Ensure the pixel-values are within the allowed bounds.
        output = np.clip(output, 0.0, 255.0)

        # Set the last output.
        self.last_output = output

        return output

    def get_state(self):
        
        # Stack the last input and output images.
        state = np.dstack([self.last_input, self.last_output])

        # Convert to 8-bit integer.
        # This is done to save space in the replay-memory.
        state = state.astype(np.uint8)
        return state
    
    def get_inputs(self):
        inputs = np.zeros(shape=[1] + self.state_shape, dtype=np.uint8)
        inputs[0:1] = self.get_state()
        return inputs
    

In [5]:
# Building the neural network architecture

from keras.layers import Conv2D, Dense,Flatten, Dropout
from keras.models import Sequential
from keras.optimizers import Adagrad

def buildmodel(num_actions, batch_size, img_rows=105, img_cols=80, img_channels = 2):
    
    init = keras.initializers.TruncatedNormal(mean=0.0, stddev=0.02, seed=None)
    
    model = Sequential()
    model.add(Conv2D(filters=16, kernel_size=3, strides=2, padding = 'same',activation = 'relu',
                    kernel_initializer = init,data_format="channels_last",
                     input_shape=(img_rows, img_cols, img_channels)))
    model.add(Conv2D(filters=32, kernel_size=3, strides=2, padding = 'same',activation = 'relu',
                    kernel_initializer = init))
    model.add(Conv2D(filters=64, kernel_size=2, strides=1, padding = 'same',activation = 'relu',
                    kernel_initializer = init))
    model.add(Flatten())
    model.add(Dense(1024, activation='relu',kernel_initializer = init))
    model.add(Dense(1024, activation='relu',kernel_initializer = init))
    model.add(Dropout(0.2))
    model.add(Dense(512, activation='relu',kernel_initializer = init))
    model.add(Dense(512, activation='relu',kernel_initializer = init))
    model.add(Dense(num_actions,activation=None))
   
    adam = Adagrad(lr=1e-6)
    model.compile(loss='mse',optimizer=adam)
    print("Model is built")
    return model

In [6]:

# One type of Experience foreplay
class ExperienceReplay(object):
    
    def __init__(self, max_memory=100, discount=.9):
        self.max_memory = max_memory
        self.memory = list()
        self.discount = discount

    def remember(self, states, game_over):
        # memory[i] = [[state_t, action_t, reward_t, state_t+1], game_over?]
        self.memory.append([states, game_over])
        if len(self.memory) > self.max_memory:
            del self.memory[0]

    def get_batch(self, model, batch_size=10):
        
        len_memory = len(self.memory)
        num_actions = model.output_shape[-1]
        env_dim = [self.memory[0][0][0].shape[0], self.memory[0][0][0].shape[1], self.memory[0][0][0].shape[2]]
        
        
#         inputs = np.zeros((min(len_memory, batch_size), env_dim))
        inputs = np.zeros(shape=[min(len_memory, batch_size)] + env_dim, dtype=np.uint8)
        temp_input = np.zeros(shape=[min(len_memory, batch_size)] + env_dim, dtype=np.uint8)
#         print(inputs.shape)
        targets = np.zeros((min(len_memory, batch_size), num_actions))
        
        for i, idx in enumerate(np.random.randint(0, len_memory,size=inputs.shape[0])):
            
            state_t, action_t, reward_t, state_tp1 = self.memory[idx][0]
            game_over = self.memory[idx][1]

            inputs[i:i+1] = state_t
            temp_input[i:i+1] = self.memory[idx][0][3]
            
            # There should be no target values for actions not taken.
            # Thou shalt not correct actions not taken #deep
            targets[i] = model.predict(inputs)[0]
            Q_sa = np.max(model.predict(temp_input)[0])
            
            if game_over:  # if game_over is True
                targets[i, action_t] = reward_t
            else:
                # reward_t + gamma * max_a' Q(s', a')
                targets[i, action_t] = reward_t + self.discount * Q_sa
        return inputs, targets
    
#     def get_action_from_state(self, state, model):
#         env_dim = [self.memory[0][0][0].shape[0], self.memory[0][0][0].shape[1], self.memory[0][0][0].shape[2]]
#         inputs = np.zeros(shape=[1] + env_dim, dtype=np.uint8)
#         inputs[0:1]= state
#         return model.predict(inputs)[0]
        
    
# Another type of Experience foreplay
Transition = namedtuple('Transition',('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):

    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        """Saves a transition."""
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [7]:
def run_dqn(env,model,memory,episodes,iterations,batch_size = 32):
    
    num_action = env.action_space.n
    env.reset()
    win_cnt = 0
    
    for e in range(episodes):
        loss = []
        game_over = False
        env.reset()
        
        # get initial input
#         screen = env.render(mode='rgb_array')
#         screen = 
        
        try:
            screen = env.render(mode='rgb_array')
        except:
            screen=env.reset()
            print(screen.shape)
#         get_screen(env)
        motion_tracer = MotionTracer(screen)
        
        netReward = 0
        for itr in range(iterations):
            state = motion_tracer.get_state()
            possible_actions = model.predict(motion_tracer.get_inputs())[0]
            action,_ = select_action(possible_actions=possible_actions, iteration=itr, num_actions=num_action,
                                     num_episode=e, num_iterations=iterations)
            
            img, reward, game_over, info = env.step(action=action)
            
            if not game_over:
                motion_tracer.process(img)
                next_state = motion_tracer.get_state()
            else:
                try:
                    motion_tracer.process(np.zeros_like(state))
                    next_state = motion_tracer.get_state()
                except:
                    break
            
            # Adding to replay memory
            memory.remember([state, action, reward, next_state], game_over)

            netReward += reward
            env.render()
            time.sleep(0.05)
#             if game_over:
#                 break
        
        inputs, targets = memory.get_batch(model, batch_size=batch_size)
        loss.append(model.train_on_batch(inputs, targets))
        print("Episode {:02d} | Iteration {:03d} | Loss {:.8f} | Win count {}".format(
             e, itr,np.mean(loss), netReward))
            
        if (e+1) % 100 == 0:
            print('Saving Model!')
            # Save trained model weights and architecture, this will be used by the visualization code
            model.save_weights("model.h5", overwrite=True)
            with open("model.json", "w") as outfile:
                json.dump(model.to_json(), outfile)
            
            

In [11]:
num_actions = 8
rp_size = 2

In [12]:
# Making Model
model = buildmodel(num_actions,32)

# Making replay memory
memory = ExperienceReplay(rp_size)
# memory = ReplayMemory(rp_size)

run_dqn(env,model,memory,episodes = 1000,iterations =  1000, batch_size = 128 )

Model is built


NameError: name 'env' is not defined