In [10]:
import itertools
import numpy as np 
from visualizeEnvironment import *

In [6]:
# Epsilon Transition - Agent Acts with probability 1-epsilon and stays with probability epsilon.
#Deterministic when epsilon = 0

class SetupEpsilonTransition(object):
    def __init__(self, stateSet, actionSet):
        self.stateSet = stateSet
        self.actionSet = actionSet

    def __call__(self, epsilon=0):
        transitionTable = {state: {action:  self.getStateActionTransition(state, action, epsilon) \
                                   for action in self.actionSet}\
                           for state in self.stateSet}
        return(transitionTable) 

    
    def getStateActionTransition(self, currentState, action, epsilon):
        nextState = self.getNextState(currentState, action)
        if currentState == nextState or epsilon == 0:
            transitionDistribution = {nextState: 1}
        else:
            transitionDistribution = {nextState: 1-epsilon, currentState:epsilon}
            
        return(transitionDistribution)

    
    def getNextState(self, state, action):
        potentialNextState = tuple([state[i] + action[i] for i in range(len(state))])
        if potentialNextState in self.stateSet:
            return(potentialNextState)
        return(state) 

In [7]:
# Actions correspond to E, N, W, S, Stay respectively
allActions = [(1,0), (0,1), (-1,0), (0,-1), (0,0)]

#all location states in grid
gridWidth = 5
gridHeight = 5
gridSet = set(itertools.product(range(gridWidth), range(gridHeight)))

#set of states to remove from each environment
barriersC = {(2,1), (3,1), (4,1)}

#final environment state sets
stateSetC = list(gridSet.difference(barriersC))

#goal locations
goal1 = (0,4)
goal2 = (4,4)

#probability of ineffective action
epsilon = .1

In [8]:
getTransition = SetupEpsilonTransition(stateSetC, allActions)

# possible environment transitions
environmentC = getTransition(epsilon)

In [9]:
environmentC

{(3, 2): {(1, 0): {(4, 2): 0.9, (3, 2): 0.1},
  (0, 1): {(3, 3): 0.9, (3, 2): 0.1},
  (-1, 0): {(2, 2): 0.9, (3, 2): 0.1},
  (0, -1): {(3, 2): 1},
  (0, 0): {(3, 2): 1}},
 (1, 3): {(1, 0): {(2, 3): 0.9, (1, 3): 0.1},
  (0, 1): {(1, 4): 0.9, (1, 3): 0.1},
  (-1, 0): {(0, 3): 0.9, (1, 3): 0.1},
  (0, -1): {(1, 2): 0.9, (1, 3): 0.1},
  (0, 0): {(1, 3): 1}},
 (0, 0): {(1, 0): {(1, 0): 0.9, (0, 0): 0.1},
  (0, 1): {(0, 1): 0.9, (0, 0): 0.1},
  (-1, 0): {(0, 0): 1},
  (0, -1): {(0, 0): 1},
  (0, 0): {(0, 0): 1}},
 (3, 0): {(1, 0): {(4, 0): 0.9, (3, 0): 0.1},
  (0, 1): {(3, 0): 1},
  (-1, 0): {(2, 0): 0.9, (3, 0): 0.1},
  (0, -1): {(3, 0): 1},
  (0, 0): {(3, 0): 1}},
 (1, 4): {(1, 0): {(2, 4): 0.9, (1, 4): 0.1},
  (0, 1): {(1, 4): 1},
  (-1, 0): {(0, 4): 0.9, (1, 4): 0.1},
  (0, -1): {(1, 3): 0.9, (1, 4): 0.1},
  (0, 0): {(1, 4): 1}},
 (1, 1): {(1, 0): {(1, 1): 1},
  (0, 1): {(1, 2): 0.9, (1, 1): 0.1},
  (-1, 0): {(0, 1): 0.9, (1, 1): 0.1},
  (0, -1): {(1, 0): 0.9, (1, 1): 0.1},
  (0, 0): {(1

In [None]:
class SetupDeterministicTransitionWithBarrier(object):
    def __init__(self,actionSet, stateSet):
        self.stateSet = actionSet
        self.actionSet = stateSet

    def __call__(self, stateNextStateTransitionBarriers):
        
        transitionTable = {state: self.getStateTransition(state) for state in self.stateSet}
        return(transitionTable) 

    def getStateTransition(self, state):
        actionTransitionDistribution = {action: self.getStateActionTransition(state, action) for action in self.actionSet}
        return(actionTransitionDistribution)
    
    def getStateActionTransition(self, currentState, action):
        nextState = self.getNextState(currentState, action)
        if currentState == nextState:
            transitionDistribution = {nextState: 1}
        else:
            transitionDistribution = {nextState: 1-self.epsilon, currentState: self.epsilon}
        return(transitionDistribution)

    
    def getNextState(self, state, action):
        potentialNextState = tuple([state[i] + action[i] for i in range(len(state))])
        if potentialNextState in self.stateSet:
            return(potentialNextState)
        return(state) 