In [3]:
import numpy as np

In [22]:
AGENT_STATES = 3*3
TARGET_STATES = 3*3
CALL_STATES = 2

AGENT_ACTIONS = 5
TARGET_ACTIONS = 5
CALL_ACTIONS = 2 # (IF ON->(ON, OFF)  OFF->(OFF, ON)) 0-> OFF, 1 -> ON

AGENT_STATE_VALUES = tuple(range(AGENT_STATES))
TARGET_STATE_VALUES = tuple(range(TARGET_STATES))
CALL_STATE_VALUES = tuple(range(CALL_STATES))

AGENT_ACTION_VALUES = 5
TARGET_ACTION_VALUES = 5
CALL_ACTIONS_VALUES = 2

NUM_STATES = AGENT_STATES*TARGET_STATES*CALL_STATES
NUM_ACTIONS = AGENT_ACTIONS*TARGET_ACTIONS*CALL_ACTIONS
NUM_OBSERVATIONS = 6

ACTIONS = ['UP', 'RIGHT', 'DOWN', 'LEFT', 'STAY']
ACTION_MAP = {'UP':0, 'RIGHT':1, 'DOWN':2, 'LEFT':3, 'STAY':4}
TARGET_ACTION_PROB = [0.15, 0.15, 0.15, 0.15, 0.40]

CALL = 1
NOT_CALL = 0

X = 0.89

In [13]:
class State:
    def __init__(self, agent, target, call):
        if (agent not in AGENT_STATE_VALUES) or (target not in TARGET_STATE_VALUES) or (call not in CALL_STATE_VALUES):
            raise ValueError
        self.agent = agent
        self.target = target
        self.call = call
    
    def asTuple(self):
        return (self.agent, self.target, self.call)
    
    def asList(self):
        return [self.agent, self.target, self.call]
    
    def getHash(self):
        return (self.agent * TARGET_STATES * CALL_STATES +
                self.target * CALL_STATES +
                self.call)
    @classmethod
    def fromHash(cls, num):
        if type(num)!= int:
            raise ValueError
        if num not in range(0,NUM_STATES):
            raise ValueError
        
        agent = num // (TARGET_STATES*CALL_STATES)
        num = num % (TARGET_STATES*CALL_STATES)
        
        target = num / CALL_STATES
        num = num % CALL_STATES
        
        call = num
        return State(agent, target, call)
            

In [14]:
class Action:
    def __init__(self, agent, target, call):
        if (agent not in AGENT_ACTION_VALUES) or (target not in TARGET_ACTION_VALUES) or (call not in CALL_ACTION_VALUES):
            raise ValueError
        self.agent = agent
        self.target = target
        self.call = call
    
    def asTuple(self):
        return (self.agent, self.target, self.call)
    
    def asList(self):
        return [self.agent, self.target, self.call]
    
    def getHash(self):
        return (self.agent * TARGET_ACTIONS * CALL_ACTIONS +
                self.target * CALL_ACTIONS +
                self.call)
    @classmethod
    def fromHash(cls, num):
        if type(num)!= int:
            raise ValueError
        if num not in range(0,NUM_STATES):
            raise ValueError
        
        agent = num // (TARGET_ACTIONS*CALL_ACTIONS)
        num = num % (TARGET_ACTIONS*CALL_ACTIONS)
        
        target = num / CALL_ACTIONS
        num = num % CALL_ACTIONS
        
        call = num
        return Action(agent, target, call)

In [15]:
def action_valid(current_state, action):
    if action == ACTION_MAP['UP']:
        return current_state not in [0, 1, 2]
    elif action == ACTION_MAP['RIGHT']:
        return current_state not in [2, 5, 8]
    elif action == ACTION_MAP['LEFT']:
        return current_state not in [0, 3, 6]
    elif action == ACTION_MAP['DOWN']:
        return current_state not in [6, 7, 8]
    
    return True

In [18]:
# It is assumed that the move is valid
def move_target(current_state, action):
    if action == ACTION_MAP['UP']:
        return current_state - 3
    elif action == ACTION_MAP['LEFT']:
        return current_state - 1
    elif action == ACTION_MAP['RIGHT']:
        return current_state + 1
    elif action == ACTION_MAP['DOWN']:
        return current_state + 3
    elif action == ACTION_MAP['STAY']:
        return current_state

In [19]:
# return list of list ['prob', 'state'] the agent reaches on taking the action
def move_agent(curr_state, action):
    if action == ACTION_MAP['STAY']:
        return [[1, curr_state]]
    res = []
    if action == ACTION_MAP['UP']:
        if action_valid(curr_state, action):
            res.append([X, curr_state-3])
        else:
            res.append([X, curr_state])
        if action_valid(curr_state, ACTION_MAP['DOWN']):
            res.append([1-X, curr_state+3])
        else:
            res.append([1-X, curr_state])
    elif action == ACTION_MAP['DOWN']:
        if action_valid(curr_state, action):
            res.append([X, curr_state+3])
        else:
            res.append([X, curr_state])
        if action_valid(curr_state, ACTION_MAP['UP']):
            res.append([1-X, curr_state-3])
        else:
            res.append([1-X, curr_state])
    elif action == ACTION_MAP['RIGHT']:
        if action_valid(curr_state, action):
            res.append([X, curr_state+1])
        else:
            res.append([X, curr_state])
        if action_valid(curr_state, ACTION_MAP['LEFT']):
            res.append([1-X, curr_state-1])
        else:
            res.append([1-X, curr_state])
    elif action == ACTION_MAP['LEFT']:
        if action_valid(curr_state, action):
            res.append([X, curr_state-1])
        else:
            res.append([X, curr_state])
        if action_valid(curr_state, ACTION_MAP['RIGHT']):
            res.append([1-X, curr_state+1])
        else:
            res.append([1-X, curr_state])
        
    return res

In [21]:
def generateTransitions():
    '''
    Generates a list of ['action', 'current-state' , 'end-state', 'probability']
    '''
    result = []
    for state in range(NUM_STATES):
        for action in range(NUM_ACTIONS):
            
            state_obj = State.fromHash(state)
            action_obj = Action.fromHash(action)
            
            curr_agent, curr_target, curr_call = state_obj.agent, state_obj.target, state_obj.call
            act_agent, act_target, act_call = action_obj.agent, action_obj.target, action_obj.call
            
            # agent
            res_agent = move_agent(curr_agent, act_agent)
            
            # target
            next_target = -1
            prob = 0.0
            if action_valid(curr_target, act_target):
                prob = TARGET_ACTION_PROB[act_target]
                next_target = move_target(curr_target, act_target)
            else:
                prob = TARGET_ACTION_PROB[act_target]
                next_target = curr_target
            
            # call
            res_call = []
            if curr_call == NOT_CALL:
                res_call.append([0.4, CALL])
                res_call.append([0.6, NOT_CALL])
            elif curr_call == CALL:
                res_call.append([0.2, NOT_CALL])
                res_call.append([0.8, CALL])
                
            # four next states will be formed
            prob1 = res_agent[0][0] * prob * res_call[0][0]
            prob2 = res_agent[0][0] * prob * res_call[1][0]
            prob3 = res_agent[1][0] * prob * res_call[0][0]
            prob4 = res_agent[1][0] * prob * res_call[1][0]
            
            st1 = State(res_agent[0][1], next_target, res_call[0][1])
            st2 = State(res_agent[0][1], next_target, res_call[1][1])
            st3 = State(res_agent[1][1], next_target, res_call[0][1])
            st4 = State(res_agent[1][1], next_target, res_call[1][1])
            
            result.append(action, state, st1.getHash(), prob1)
            result.append(action, state, st2.getHash(), prob2)
            result.append(action, state, st3.getHash(), prob3)
            result.append(action, state, st4.getHash(), prob4)
    
    return result
            
            

In [23]:
'''
O1 -> TARGET == AGENT
O2 -> TARGET RIGHT OF AGENT
O3 -> TARGET BELOW OF AGENT
O4 -> TARGET LEFT OF AGENT
O5 -> TARGET ABOVE OF AGENT
O6 -> NONE OF THE ABOVE
stores ['end-state', 'observation', 'probability']
'''
def generateObservation():
    result = []
    for state in range(NUM_STATES):
        for obs in range(NUM_OBSERVATIONS):
            
            st = State.fromHash(state)
            
            if obs == 0:
                if st.agent == st.target:
                    result.append([state, obs, 1.0])
                else:
                    result.append([state, obs, 0.0])
            elif obs == 1:
                if st.agent + 1 == st.target:
                    result.append([state, obs, 1.0])
                else:
                    result.append([state, obs, 0.0])
            elif obs == 2:
                if st.agent + 3 == st.target:
                    result.append([state, obs, 1.0])
                else:
                    result.append([state, obs, 0.0])
            elif obs == 3:
                if st.agent - 1 == st.target:
                    result.append([state, obs, 1.0])
                else:
                    result.append([state, obs, 0.0])
            elif obs == 4:
                if st.agent - 3 == st.target:
                    result.append([state, obs, 1.0])
                else:
                    result.append([state, obs, 0.0])
            elif obs == 5:
                neighbourHood = [st.agent, st.agent-1, st.agent+1, st.agent+3, st.agent-3]
                if st.target in neighbourHood:
                    result.append([state, obs, 0.0])
                else:
                    result.append([state, obs, 1.0])
                    
    return result
                
    

In [24]:
'''
IF AGENT_POS == TARGET_POS REWARD = 19 ELSE REWARD = -1
return list of ['state', 'reward']
'''
def generateReward():
    result = []
    for state in range(NUM_STATES):
        st = State.fromHash(state)
        if st.agent == st.target:
            result.append([state, 19.0])
        else:
            result.append([state, -1.0])
            
    return result