In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:

import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt


In [None]:
maze = np.array([ [ 1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.],
    [ 0.,  0.,  1.,  0.,  0.,  1.,  0.,  1.,  1.,  1.],
    [ 1.,  1.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  1.],
    [ 1.,  1.,  0.,  1.,  0.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.,  0.],
    [ 1.,  0.,  0.,  0.,  0.,  0.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.]
])


In [None]:
maze.size

In [None]:
visited = 0.8
agent_mark = 0.5
LEFT  = 0
UP = 1
RIGHT = 2
DOWN = 3
actions = {LEFT:'left', UP:'up',RIGHT:'right',DOWN:'down'}

epsilon = 0.1
        

In [None]:
class Qmaze(object):
    def __init__(self,maze,agent=(0,0)):
        self._maze = np.array(maze)
        nrows, ncols = self._maze.shape
        self.target = (nrows-1,ncols-1)
        free_cells = []
        for r in range(nrows):
            for c in range(ncols):
                if(self._maze[r,c]==1.0):
                    free_cells.append((r,c))
        self.free_cells = free_cells
        self.free_cells.remove(self.target) #target cell is not counted as a free cell
        if(self._maze[self.target]==0.0):
            raise Exception("Target cell cannot be blocked in the maze.")
        if not agent in self.free_cells:
            raise Exception("Agent must be in a free cell")
        self.reset(agent)
        
    def reset(self, agent):
        self.agent = agent
        self.maze = np.copy(self._maze)
        nrows, ncols = self.maze.shape
        row,col = agent #position of agent
        self.maze[row,col] = agent_mark
        self.state = (row,col,'start')
        self.min_reward = -50
        self.total_reward = 0
        self.visited = set()
    
    def update_state(self, action):
        #takes action as input, updates state 
        nrows, ncols = self.maze.shape
        nrow, ncol, nmode = agent_row, agent_col, mode = self.state
        if self.maze[agent_row,agent_col]>0.0:
            self.visited.add((agent_row,agent_col)) #marking visited cell
            
        valid_actions = self.valid_actions()
        if not valid_actions:
            nmode = 'blocked'
        elif action in valid_actions:
            nmode = 'valid'
            if action == LEFT:
                ncol-=1
            if action == RIGHT:
                ncol+=1
            if action == UP:
                nrow-=1
            if action == DOWN:
                nrow+=1
            else:
                mode = 'invalid'
        self.state = (nrow,ncol,nmode) #updating the state to new value
        
    def get_reward(self):
        agent_row, agent_col, mode = self.state
        nrows, ncols = self.maze.shape
        if (agent_row,agent_col)==self.target:
            return 1.0
        if mode=='blocked':
            return self.min_reward - 1
        if (agent_row,agent_col) in self.visited:
            return -0.25
        if mode == 'invalid':
            return -0.75
        if mode == 'valid':
            return -0.04
    
    def act(self,action):
        self.update_state(action)
        reward = self.get_reward()
        self.total_reward += reward
        status = self.game_status()
        envstate = self.observe()
        return envstate, reward, status
    
    def observe(self):
        canvas  = self.draw_env()
        envstate = canvas.reshape((1,-1))
        return envstate
    
    def draw_env(self):
        canvas = np.copy(self.maze)
        nrows,ncols = self.maze.shape
        #clearing preexisting visual marks
        for r in range(nrows):
            for c in range(ncols):
                if canvas[r,c]>0.0:
                    canvas[r,c] = 1.0
        #drawing the agent now.
        row, col, mode = self.state
        canvas[row,col] = agent_mark
        return canvas
    
    def game_status(self):
        if self.total_reward < self.min_reward:
            return 'lose'
        agent_row, agent_col, mode = self.state
        nrows,ncols = self.maze.shape
        if (agent_row, agent_col) == self.target:
            return 'win'
        
        return 'not over'
    
    def valid_actions(self, cell= None):
        if cell is None:
            row,col,mode = self.state
        else:
            row,col = cell
        actions = [0,1,2,3]
        nrows, ncols = self.maze.shape
        if row == 0:
            actions.remove(1)
        elif row == nrows-1:
            actions.remove(3)
            
        if col == 0:
            actions.remove(0)
        elif col == ncols-1:
            actions.remove(2)
        
        if row>0 and self.maze[row-1, col] == 0.0:
            actions.remove(1)
        if row<nrows-1 and self.maze[row+1,col] == 0.0:
            actions.remove(3)
            
        if col>0 and self.maze[row, col-1] == 0.0:
            actions.remove(0)
        if col < ncols -1 and self.maze[row, col +1] == 0.0:
            actions.remove(2)
            
        return actions
        
        
        

In [None]:
def show(qmaze):
    plt.grid('on')
    nrows, ncols = qmaze.maze.shape
    ax = plt.gca()
    ax.set_xticks(np.arange(0.5,nrows,1))
    ax.set_yticks(np.arange(0.5,nrows,1))
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    canvas = np.copy(qmaze.maze)
    for row,col in qmaze.visited:
        canvas[row,col] = 0.6
    agent_row, agent_col, _ = qmaze.state
    canvas[agent_row, agent_col] = 0.3   # agent cell
    canvas[nrows-1, ncols-1] = 0.9 # target cell
    img = plt.imshow(canvas, interpolation='none', cmap='gray')
    return img

In [None]:
qmaze = Qmaze(maze)
canvas, reward, game_over = qmaze.act(DOWN)
print("reward = ", reward)

In [None]:
def play_game(model, qmaze, agent_cell):
    qmaze.reset(agent_cell)
    envstate = qmaze.observe()
    while True:
        prev_envstate = envstate
        # get next action
        q = model.predict(prev_envstate)
        action = np.argmax(q[0])

        # apply action, get rewards and new state
        envstate, reward, game_status = qmaze.act(action)
        if game_status == 'win':
            return True
        elif game_status == 'lose':
            return False


In [None]:
def completion_check(model, qmaze):
    for cell in qmaze.free_cells:
        if not qmaze.valid_actions(cell):
            return False
        if not play_game(model, qmaze, cell):
            return False
    return True

In [None]:
class Experience(object):
    def __init__(self, model, max_memory = 100, discount = 0.95):
        self.model = model
        self.max_memory - max_memory
        self.discount = discount
        self.memory = list()
        self.num_actions = model.output_shape[-1]
    
    def predict(self,envstate):
        return self.model.predict(envstate)[0]
    
    def get_data(self, data_size = 10):
        env_size = self.memory[0][0].shape[1]
        mem_size = len(self.memory)