<a href="https://colab.research.google.com/github/samp3209/personalprojects/blob/main/ex2_table.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import random
import numpy as np
import tqdm.notebook as tqdm
import matplotlib.pyplot as plt
import ipywidgets as widgets
from tqdm import tqdm


In [22]:
# FOUR ROOM ENVIRONMENT
class FourRooms(object):
    def __init__(self):
        # define the four room as a 2-D array for easy state space reference and visualization
        # 0 represents an empty cell; 1 represents a wall cell
        self.four_room_space = np.array([[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                                         [1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1],
                                         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                                         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]])

        # find the positions for all empty cells
        # not that: the origin for a 2-D numpy array is located at top-left while the origin for the FourRooms is at
        # the bottom-left. The following codes performs the re-projection.
        empty_cells = np.where(self.four_room_space == 0.0)
        self.state_space = [[col, 10 - row] for row, col in zip(empty_cells[0], empty_cells[1])]

        # define the action space
        self.action_space = {'LEFT': np.array([-1, 0]),
                             'RIGHT': np.array([1, 0]),
                             'DOWN': np.array([0, -1]),
                             'UP': np.array([0, 1])}

        # define the start state
        self.start_state = [0, 0]

        # define the goal state
        self.goal_state = [10, 10]


    def reset(self):
        """
        Reset the agent's state to the start state [0, 0]
        Return both the start state and reward
        """
        state = self.start_state  # reset the agent to [0, 0]
        reward = 0  # reward is 0
        return state, reward


    def step(self, state, act):
        """
        Args:
            state: a list variable containing x, y integer coordinates. (i.e., [1, 1]).
            act: a string variable (i.e., "UP"). All feasible values are ["UP", "DOWN", "LEFT", "RIGHT"].
        Output args:
            next_state: a list variable containing x, y integer coordinates (i.e., [1, 1])
            reward: an integer. it can be either 0 or 1.
        """

        # CODE HERE: implement the stochastic dynamics as described in Q1.
        # Please note, we provide you with the deterministic transition function "take_action" below.
        # Therefore, you only have to implement the logics of the stochasticity.
        perpendicular_actions = {
          'UP': ['LEFT', 'RIGHT'],
          'DOWN': ['LEFT', 'RIGHT'],
          'LEFT': ['UP', 'DOWN'],
          'RIGHT': ['UP', 'DOWN']
      }
        if np.random.random() < 0.8:
          next_state = self.take_action(state, act)
        else:
          perpendicular_action = np.random.choice(perpendicular_actions[act])
          next_state = self.take_action(state, perpendicular_action)

        # CODE HERE: compute the reward based on the resulting state
        reward = 1 if next_state == self.goal_state else 0


        # return the current state, reward
        return next_state, reward


    """ DO NOT CHANGE BELOW """
    def take_action(self, state, act):
        """
        Input args:
            state (list): a list variable containing x, y integer coordinates. (i.e., [1, 1]).
            act (string): a string variable (i.e., "UP"). All feasible values are ["UP", "DOWN", "LEFT", "RIGHT"].
        Output args:
            next_state (list): a list variable containing x, y integer coordinates (i.e., [1, 1])
        """
        state = np.array(state)
        next_state = state + self.action_space[act]
        return next_state.tolist() if next_state.tolist() in self.state_space else state.tolist()


    def build_table(self):
      table = []
      perpendicular_actions = {
              'UP': ['LEFT', 'RIGHT'],
              'DOWN': ['LEFT', 'RIGHT'],
              'LEFT': ['UP', 'DOWN'],
              'RIGHT': ['UP', 'DOWN']
          }
      for s in self.state_space:
        if s == self.goal_state:
          continue

        for a in self.action_space:
          sprime = self.take_action(s, a)
          rprime = 1 if sprime == self.goal_state else 0
          table.append({'s':s,'a': a, 's_prime': sprime, 'r': rprime, 'p':0.8})

          for a in perpendicular_actions:
            sprime_perpendicular = self.take_action(s, a)
            rprime_perpendicular = 1 if sprime_perpendicular == self.goal_state else 0
            table.append({'s':s,'a': a, 's_prime': sprime, 'r': rprime, 'p':0.1})

      return table


In [25]:
import pandas as pd
env = FourRooms()
table = env.build_table()
df = pd.DataFrame(table, columns=['s', 'a', 's_prime', 'r', 'p'])
df = df.map(lambda x: tuple(x) if isinstance(x, list) else x)
df.drop_duplicates(inplace=True)
df.head()

Unnamed: 0,s,a,s_prime,r,p
0,"(0, 10)",LEFT,"(0, 10)",0,0.8
1,"(0, 10)",UP,"(0, 10)",0,0.1
2,"(0, 10)",DOWN,"(0, 10)",0,0.1
3,"(0, 10)",LEFT,"(0, 10)",0,0.1
4,"(0, 10)",RIGHT,"(0, 10)",0,0.1


In [26]:
df.to_csv('four_rooms_table.csv', index=False)