In [1]:
import itertools
import numpy as np 
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import functools 
from itertools import product
import math
from BuildWorldClass import buildWorld 
from BuildMDPClass import buildMDP
from ValueIterationWithSoftmax import ValueIteration
from ActionInterpretationClass import actionInterpretation 

In [7]:
def buildUtilitySpace(variableColours, variableReward, constantRewardDict):
    utilitySpace = [{key:value for key, value in zip(variableColours, permutations)} for permutations in product(variableReward, repeat = len(variableColours))]
    for i in range(len(utilitySpace)):
        utilitySpace[i].update(constantRewardDict)
    return utilitySpace

def buildPoliciesAndMDPs(dimensions, stateSpace, utilitySpace, beliefSpace, actions, valueTable, hyperparameters):
    convergenceTolerance, gamma, alpha, eps = hyperparameters 
    beliefMDPs = [buildMDP(dimensions, stateSpace, colourReward) for colourReward in utilitySpace] 
    rewardAndTransitionFunctions = [MDP() for MDP in beliefMDPs]
    ValueIterations = [ValueIteration(actions, transitionPdf, rewardFunction, valueTable, convergenceTolerance, gamma, alpha, eps) for rewardFunction, transitionPdf in rewardAndTransitionFunctions]
    ValueAndPolicyTables = [ValueIteration() for ValueIteration in ValueIterations]
    beliefPolicies = [ValueAndPolicyTable[1] for ValueAndPolicyTable in ValueAndPolicyTables]
    return {(belief):(MDP,policy) for belief, MDP, policy in zip(beliefSpace, beliefMDPs, beliefPolicies)}

def buildWorlds(utilitySpace, transitionSpace):
    return [buildWorld(colourReward, isDeterministic) for colourReward, isDeterministic in product(utilitySpace, transitionSpace)]
            
def main():
    dimensions = (5,6)
    goals = [(5,2)]
    actions = {(-1,0),(0,1),(0,-1),(1,0)}
    goalNameDictionary = {(5,2):'goal'}
    stateSpace = {(0,0): 'white',(0,1): 'white',(0,2): 'white',(0,3): 'white',(0,4): 'white', (1,0): 'blue',(1,1): 'orange', (1,2):'orange',(1,3):'orange',(1,4):'orange', (2,0): 'blue',(2,1):'purple', (2,2):'purple', (2,3):'purple', (2,4):'orange', (3,0): 'blue',(3,1):'purple',(3,2): 'blue',(3,3):'purple',(3,4):'orange', (4,0): 'blue', (4,1): 'blue', (4,2): 'blue',(4,3):'purple', (4,4):'orange', (5,0):'white',(5,1):'white', (5,2):'yellow', (5,3):'white', (5,4):'white'}
    convergenceTolerance = 10e-7
    gamma = 0.94
    alpha = 20
    eps = 0.01
    hyperparameters = (convergenceTolerance, gamma, alpha, eps)
    valueTable = {key: 0 for key in stateSpace.keys()}
    colourReward = {'white': 0, 'orange': -2, 'purple': 0, 'blue':0, 'yellow': 10}
    variableColours = ['orange', 'purple', 'blue']
    variableReward = [0, -2]
    constantRewardDict = {'white': 0, 'yellow': 10}
    
    
    utilitySpace = buildUtilitySpace(variableColours, variableReward, constantRewardDict)
    transitionSpace = [True]
    worlds = buildWorlds(utilitySpace, transitionSpace)
    beliefSpace = [(world, goal) for world, goal in product(worlds, goals)]
    beliefPoliciesAndMDPs = buildPoliciesAndMDPs(dimensions, stateSpace, utilitySpace, beliefSpace, actions, valueTable, hyperparameters)
    priors = {(belief): (1/len(beliefSpace)) for belief in beliefSpace}
    
    
    beliefVector = actionInterpretation(beliefPoliciesAndMDPs, priors)  
    trajectory = [(0,2), (1,0), (1,2), (1,0), (2,2), (1,0), (3,2), (1,0), (4,2), (1,0), (5,2)]
    trajectory2 = [(0,2), (0,-1), (0,1), (0,-1), (0,0), (1,0), (1,0), (1,0), (2,0), (0,1), (2,1), (0,1), (2,2),(1,0), (3,2), (1,0), (4,2), (1,0), (5,2)]
    testWorld = buildWorld({'white': 0, 'orange': -2, 'purple': 0, 'blue':0, 'yellow': 10} ,True)
    Probab = beliefVector(trajectory, testWorld, goals[0])
    Probab2 = beliefVector(trajectory2, testWorld, goals[0])
    print("Probability of the given world and goal for trajectory 1 is " + str(Probab))
    print("Probability of the given world and goal for trajectory 2 is " + str(Probab2))
    
    
    
    
if __name__ == "__main__":
    main()

Probability of the given world and goal for trajectory 1 is 0.0038231297086904984
Probability of the given world and goal for trajectory 2 is 0.9999688091347915
