### Import Libraries
#### Please use pip install pyamaze before running this code if not already executed

In [1]:
from pyamaze import maze, agent, COLOR, textLabel
import tracemalloc as memory_trace
import time
from IPython.display import display
import pandas as pd
import copy

### Impelement MDP Value Iteration Algortihm Functionality

In [2]:
class MDP_Value_Iteration_Search : 
    
    def __init__(self, maze_size) : 
        self.maze_size = maze_size
    
    def load_maze(self) : 
        m = maze()
        maze_name = 'Maze_' + str(self.maze_size) + 'X' + str(self.maze_size)
        m.CreateMaze(loadMaze = maze_name + '.csv')
        return m
    
    def start_memory_tracing(self) :
        memory_trace.stop()
        memory_trace.start()
        
    def stop_memory_tracing(self) : 
        memory_size, memory_peak = memory_trace.get_traced_memory()
        return memory_size, memory_peak
    
    def initialize_maze(self) : 
        self.maze = self.load_maze()
        self.goal_node = self.maze._goal
        self.start_node = (self.maze_size, self.maze_size)
    
    def initialise_cost(self) : 
        self.transition_value = {node: 10 if node == self.maze._goal else 0 for node in self.maze.grid}
        self.transition_reward = {node: 100 if node == self.maze._goal else 0 for node in self.maze.grid}
        
        self.transition_dictionary = copy.deepcopy(self.maze.maze_map)
        for key in self.transition_dictionary : 
            for subkey in self.transition_dictionary[key] : 
                self.transition_dictionary[key][subkey] = 0
        
        self.initial_transition_value = {}
        self.initial_transition_value['N'] = 1
        self.initial_transition_value['S'] = 1
        self.initial_transition_value['E'] = 1
        self.initial_transition_value['W'] = 1
        
        self.gamma = 0.9
        self.threshold = 0.000001
    
    def execute_mdp_value_iteration_search(self):

        self.initialize_maze()
        self.initialise_cost()

        start_time = time.time() * 1000
        self.start_memory_tracing()
        has_value_converged = False
    
        while not has_value_converged : 
            has_value_converged = True
           
            for current_node in self.maze.grid : 
                
               
                temp_transition_value = []
                
                for __direction__ in ['N', 'S', 'E', 'W']:
                    
                    if self.maze.maze_map[current_node][__direction__] == 1 :
                        
                        try:
                            if __direction__ == 'N' : 
                                next_node = (current_node[0] - 1, current_node[1])

                            elif __direction__ == 'S' : 
                                next_node = (current_node[0] + 1, current_node[1])

                            elif __direction__ == 'E' : 
                                next_node = (current_node[0], current_node[1] + 1)

                            elif __direction__ == 'W' : 
                                next_node = (current_node[0], current_node[1] - 1)
                        except:
                            next_node = None

                        if next_node is not None:
                            next_transtion_value = self.initial_transition_value[__direction__] * (self.transition_reward[current_node] + self.transition_value[next_node] * self.gamma)
                            temp_transition_value.append(next_transtion_value)
                            self.transition_dictionary[current_node][__direction__] = next_transtion_value
                
                best_transtion_value = (max(temp_transition_value))
                
                if abs(best_transtion_value - self.transition_value[current_node]) > self.threshold : 
                    has_value_converged = False
                    self.transition_value[current_node] = best_transtion_value
        
        end_time = time.time() * 1000
        time_taken = (end_time - start_time)
        
        memory_size, memory_peak = self.stop_memory_tracing()
        memory_consumed = round((memory_peak/(1024*1024)), 3)
        goal_nodes = self.find_goal_nodes(self.transition_dictionary, self.start_node, self.goal_node)
        
        statistics_df = pd.DataFrame(columns=['Maze Size', 'Time Taken (in ms)', 'Memory Consumed (in MB)', 'Number of Cell in Shortest Path to Goal'])
        statistics_dict = {}
        statistics_dict['Maze Size'] = str(self.maze_size) + 'X' + str(self.maze_size)
        statistics_dict['Time Taken (in ms)'] = time_taken
        statistics_dict['Memory Consumed (in MB)'] = memory_consumed
        statistics_dict['Number of Cell in Shortest Path to Goal'] = len(goal_nodes) + 1
        
        self.display_mdp_value_iteration_path(goal_nodes, time_taken, memory_consumed, len(goal_nodes) + 1)
        
        statistics_df = statistics_df.append(statistics_dict, ignore_index = True)
        
        return statistics_df

    
    def find_goal_nodes(self, transition_dictionary, start_node, goal_node) : 
        goal_nodes = {}
        next_node_to_goal = [start_node]
        
        while len(next_node_to_goal) > 0 : 
            current_node = next_node_to_goal.pop()
            
            if current_node == goal_node : 
                break
            
            best_transition_policy = self.find_best_transition_direction(self.transition_dictionary[current_node])
            print(f'\nCurrent Cell: {current_node}, Best Transition State for this cell: {transition_dictionary[current_node]}, Best Transition Direction: {best_transition_policy}')
            if best_transition_policy == 'N' : 
                next_node = (current_node[0] - 1, current_node[1])

            elif best_transition_policy == 'S' : 
                next_node = (current_node[0] + 1, current_node[1])

            elif best_transition_policy == 'E' : 
                next_node = (current_node[0], current_node[1] + 1)

            elif best_transition_policy == 'W' : 
                next_node = (current_node[0], current_node[1] - 1)
            
            goal_nodes[current_node] = next_node
            next_node_to_goal.append(next_node)
        
        return goal_nodes

    
    def find_best_transition_direction(self, current_node) : 
        transition_values = list(current_node.values())
        directions = list(current_node.keys())
        return directions[transition_values.index(max(transition_values))]
    
    def display_mdp_value_iteration_path(self, goal_nodes, time_taken, memory_consumed, len_goal_nodes) : 
        
        goal_path = agent(self.maze, x = self.maze_size, y = self.maze_size, footprints = True, color=COLOR.cyan) 
        self.maze.tracePath({goal_path : goal_nodes}, delay = 100)
        
        textLabel(self.maze, 'Maze Size ', str(self.maze_size) + 'X' + str(self.maze_size))
        textLabel(self.maze, 'Time Taken (in ms) ', time_taken)
        textLabel(self.maze, 'Memory Consumed (in MB) ', memory_consumed)
        textLabel(self.maze, 'Number of Cell in Shortest Path to Goal ', len_goal_nodes)
        
        self.maze.run()

### Executing MDP Value Iteration for Maze Size 20 X 20

In [3]:
mdp_value_iteration_20 = MDP_Value_Iteration_Search(20)

statistics = mdp_value_iteration_20.execute_mdp_value_iteration_search()

statistics = statistics.style.applymap(lambda x:'white-space:nowrap')
display(statistics)



Current Cell: (20, 20), Best Transition State for this cell: {'E': 0, 'W': 0.50262952954014, 'N': 0.6205313753175258, 'S': 0}, Best Transition Direction: N

Current Cell: (19, 20), Best Transition State for this cell: {'E': 0, 'W': 0.6894802886495355, 'N': 0.5584782377857732, 'S': 0.5584782377857732}, Best Transition Direction: W

Current Cell: (19, 19), Best Transition State for this cell: {'E': 0.6205313753175258, 'W': 0.766089209610595, 'N': 0, 'S': 0}, Best Transition Direction: W

Current Cell: (19, 18), Best Transition State for this cell: {'E': 0.6894802886495355, 'W': 0, 'N': 0.8512112156418341, 'S': 0}, Best Transition Direction: N

Current Cell: (18, 18), Best Transition State for this cell: {'E': 0, 'W': 0, 'N': 0.9457902396020379, 'S': 0.766089209610595}, Best Transition Direction: N

Current Cell: (17, 18), Best Transition State for this cell: {'E': 1.0508790267434387, 'W': 0, 'N': 0, 'S': 0.8512112156418341}, Best Transition Direction: E

Current Cell: (17, 19), Best Tra

Unnamed: 0,Maze Size,Time Taken (in ms),Memory Consumed (in MB),Number of Cell in Shortest Path to Goal
0,20X20,136.666992,0.029,65


### Executing MDP Value Iteration for Maze Size 30 X 30

In [4]:
mdp_value_iteration_30 = MDP_Value_Iteration_Search(30)

statistics = mdp_value_iteration_30.execute_mdp_value_iteration_search()

statistics = statistics.style.applymap(lambda x:'white-space:nowrap')
display(statistics)



Current Cell: (30, 30), Best Transition State for this cell: {'E': 0, 'W': 0.013975028535276934, 'N': 0.01725421358312726, 'S': 0}, Best Transition Direction: N

Current Cell: (29, 30), Best Transition State for this cell: {'E': 0, 'W': 0.019172331166870445, 'N': 0, 'S': 0.015528792224814533}, Best Transition Direction: W

Current Cell: (29, 29), Best Transition State for this cell: {'E': 0.01725421358312726, 'W': 0.021302590185411606, 'N': 0, 'S': 0}, Best Transition Direction: W

Current Cell: (29, 28), Best Transition State for this cell: {'E': 0.019172331166870445, 'W': 0, 'N': 0.023670527391630815, 'S': 0}, Best Transition Direction: N

Current Cell: (28, 28), Best Transition State for this cell: {'E': 0.026300585990700905, 'W': 0, 'N': 0, 'S': 0.021302590185411606}, Best Transition Direction: E

Current Cell: (28, 29), Best Transition State for this cell: {'E': 0.029223856064174485, 'W': 0.023670527391630815, 'N': 0, 'S': 0}, Best Transition Direction: E

Current Cell: (28, 30),

Unnamed: 0,Maze Size,Time Taken (in ms),Memory Consumed (in MB),Number of Cell in Shortest Path to Goal
0,30X30,331.145264,0.064,99


### Executing MDP Value Iteration for Maze Size 40 X 40

In [5]:
mdp_value_iteration_40 = MDP_Value_Iteration_Search(40)

statistics = mdp_value_iteration_40.execute_mdp_value_iteration_search()

statistics = statistics.style.applymap(lambda x:'white-space:nowrap')
display(statistics)



Current Cell: (40, 40), Best Transition State for this cell: {'E': 0, 'W': 0.0008985837170999086, 'N': 0.0007269683437947913, 'S': 0}, Best Transition Direction: W

Current Cell: (40, 39), Best Transition State for this cell: {'E': 0.0008087253453899178, 'W': 0.0008087253453899178, 'N': 0.000999409093506714, 'S': 0}, Best Transition Direction: N

Current Cell: (39, 39), Best Transition State for this cell: {'E': 0, 'W': 0.0011104545483407932, 'N': 0, 'S': 0.0008985837170999086}, Best Transition Direction: W

Current Cell: (39, 38), Best Transition State for this cell: {'E': 0.000999409093506714, 'W': 0.0012348211282188109, 'N': 0, 'S': 0}, Best Transition Direction: W

Current Cell: (39, 37), Best Transition State for this cell: {'E': 0.0011104545483407932, 'W': 0.0011104545483407932, 'N': 0.0011104545483407932, 'S': 0.0013720234757986787}, Best Transition Direction: S

Current Cell: (40, 37), Best Transition State for this cell: {'E': 0, 'W': 0.0015254532698386802, 'N': 0.00123482112

Unnamed: 0,Maze Size,Time Taken (in ms),Memory Consumed (in MB),Number of Cell in Shortest Path to Goal
0,40X40,606.377197,0.112,127
