In [1]:
from beliefPOMDPFunctions import *
import sys
sys.path.append('../')
from SetupTransitionTable import SetupDeterministicTransitionByStateSet
from SetupRewardTable import SetupStateActionRewardWithUserSpecifiedCosts
from visualizations import *
from ValueIteration import *

In [None]:
# Actions correspond to E, N, W, S, Stay respectively
allActions = [(1,0), (0,1), (-1,0), (0,-1), (0,0)]

#all location states in grid
gridWidth = 5
gridHeight = 4
gridSet = set(itertools.product(range(gridWidth), range(gridHeight)))
barriersC = {(2,2), (3,2), (4,2)}

#final environment state sets
stateSetC = list(gridSet.difference(barriersC))

#goal locations
goalTruck1 = (0,0)
goalTruck2 = (4,3)

#possible beliefs
beliefSet= list(set([t for t in itertools.permutations([1,0,0,0,0,0], 6)]))
otherHypotheses = [(.5,.5,0,0,0,0),(0,0,.5,.5,0,0),(0,0,0,0,.5,.5),(0,0,.5,0,0.5,0),(.5,0,0,0,0,0.5),(0,.5,0,.5,0,0),(.17,.17,.17,.17,.17,.17)]
beliefSet.extend(otherHypotheses)

In [None]:
getTransition = SetupDeterministicTransitionByStateSet(stateSetC, allActions)
positionTransition = getTransition()

getTransitionC = SetupBeliefTransition(stateSetC, beliefSet, allActions)
beliefTransition_EnvC = getTransitionC(updateBelief)

In [None]:
allWorlds = ['KL', 'KM', 'LK', 'LM', 'MK', 'ML']
allPreferences = [''.join(preference) for preference in itertools.permutations("KLM",3)]
allPreferences

In [None]:
getRewardC = SetupStateActionRewardWithUserSpecifiedCosts(positionTransition, allActions, [goalTruck1,goalTruck2])

rewards_EnvC_PrefKLM = [getRewardC(constructGoalStateRewards(world, 'KLM')) for world in allWorlds]
rewards_EnvC_PrefKML = [getRewardC(constructGoalStateRewards(world, 'KML')) for world in allWorlds]
rewards_EnvC_PrefLKM = [getRewardC(constructGoalStateRewards(world, 'LKM')) for world in allWorlds]
rewards_EnvC_PrefLMK = [getRewardC(constructGoalStateRewards(world, 'LMK')) for world in allWorlds]
rewards_EnvC_PrefMKL = [getRewardC(constructGoalStateRewards(world, 'MKL')) for world in allWorlds]
rewards_EnvC_PrefMLK = [getRewardC(constructGoalStateRewards(world, 'MLK')) for world in allWorlds]

getRewardBeliefs = SetupRewardBeliefTable(stateSetC, beliefSet, allActions)

beliefReward_EnvC_KLM = getRewardBeliefs(beliefTransition_EnvC, rewards_EnvC_PrefKLM)
beliefReward_EnvC_KML = getRewardBeliefs(beliefTransition_EnvC, rewards_EnvC_PrefKML)
beliefReward_EnvC_LKM = getRewardBeliefs(beliefTransition_EnvC, rewards_EnvC_PrefLKM)
beliefReward_EnvC_LMK = getRewardBeliefs(beliefTransition_EnvC, rewards_EnvC_PrefLMK)
beliefReward_EnvC_MKL = getRewardBeliefs(beliefTransition_EnvC, rewards_EnvC_PrefMKL)
beliefReward_EnvC_MLK = getRewardBeliefs(beliefTransition_EnvC, rewards_EnvC_PrefMLK)

# Construct Preference Policies

In [None]:
valueTable = {state:0 for state in beliefTransition_EnvC.keys()}

getPolicy_EnvC_KLM = BoltzmannValueIteration(beliefTransition_EnvC, beliefReward_EnvC_KLM, valueTable, 10e-7, .99, .8)
optimalValues_KLM, policy_EnvC_KLM = getPolicy_EnvC_KLM()

getPolicy_EnvC_KML = BoltzmannValueIteration(beliefTransition_EnvC, beliefReward_EnvC_KML, valueTable, 10e-7, .99, .8)
optimalValues_KML, policy_EnvC_KML = getPolicy_EnvC_KML()

getPolicy_EnvC_LKM = BoltzmannValueIteration(beliefTransition_EnvC, beliefReward_EnvC_LKM, valueTable, 10e-7, .99, .8)
optimalValues_LKM, policy_EnvC_LKM = getPolicy_EnvC_LKM()

getPolicy_EnvC_LMK = BoltzmannValueIteration(beliefTransition_EnvC, beliefReward_EnvC_LMK, valueTable, 10e-7, .99, .8)
optimalValues_LMK, policy_EnvC_LMK = getPolicy_EnvC_LMK()

getPolicy_EnvC_MKL = BoltzmannValueIteration(beliefTransition_EnvC, beliefReward_EnvC_MKL, valueTable, 10e-7, .99, .8)
optimalValues_MKL, policy_EnvC_MKL = getPolicy_EnvC_MKL()

getPolicy_EnvC_MLK = BoltzmannValueIteration(beliefTransition_EnvC, beliefReward_EnvC_MLK, valueTable, 10e-7, .99, .8)
optimalValues_MLK, policy_EnvC_MLK = getPolicy_EnvC_MLK()

In [None]:
print("KLM \n")
[print(allWorlds,"\n",list(b), visualizePolicyOfBeliefByState(stateSetC, policy_EnvC_KLM, b, goalStates = [goalTruck1, goalTruck2])) for b in beliefSet]
print("KML \n")
[print(allWorlds,"\n",list(b), visualizePolicyOfBeliefByState(stateSetC, policy_EnvC_KML, b, goalStates = [goalTruck1, goalTruck2])) for b in beliefSet]
print("LKM \n")
[print(allWorlds,"\n",list(b), visualizePolicyOfBeliefByState(stateSetC, policy_EnvC_LKM, b, goalStates = [goalTruck1, goalTruck2])) for b in beliefSet]
print("LMK \n")
[print(allWorlds,"\n",list(b), visualizePolicyOfBeliefByState(stateSetC, policy_EnvC_LMK, b, goalStates = [goalTruck1, goalTruck2])) for b in beliefSet]
print("MKL \n")
[print(allWorlds,"\n",list(b), visualizePolicyOfBeliefByState(stateSetC, policy_EnvC_MKL, b, goalStates = [goalTruck1, goalTruck2])) for b in beliefSet]
print("MLK \n")
[print(allWorlds,"\n",list(b), visualizePolicyOfBeliefByState(stateSetC, policy_EnvC_MLK, b, goalStates = [goalTruck1, goalTruck2])) for b in beliefSet]

# Example Sampled Trajectories

In [None]:
np.random.seed(130)
path1 = samplePathToGoal((4,1), (.17,.17,.17,.17,.17,.17), policy_EnvC_MLK, beliefTransition_EnvC, [(0,0), (4,3)])
positionTrajectory1 = [pos for pos, belief in path1]

world1 = convertBeliefToTruck(path1[-1][1])
worldNames1 = {(0,0): world1[0], (4,3):world1[1]}

In [None]:
visualizeEnvironmentByState(states = stateSetC, goalStates = [(0,0), (4,3)], trajectory = positionTrajectory1, goalNameDictionary=worldNames1)

In [None]:
np.random.seed(3)
path2 = samplePathToGoal((4,1), (.17,.17,.17,.17,.17,.17), policy_EnvC_LKM, beliefTransition_EnvC, [(0,0), (4,3)])
positionTrajectory2 = [pos for pos, belief in path2]

world2 = convertBeliefToTruck(path2[-1][1])
worldNames2 = {(0,0): world2[0], (4,3):world2[1]}

In [None]:
visualizeEnvironmentByState(states = stateSetC, goalStates = [(0,0), (4,3)], trajectory = positionTrajectory2, goalNameDictionary=worldNames2)

In [None]:
np.random.seed(3)
path3 = samplePathToGoal((4,1), (.17,.17,.17,.17,.17,.17), policy_EnvC_KML, beliefTransition_EnvC, [(0,0), (4,3)])
positionTrajectory3 = [pos for pos, belief in path3]

world3 = convertBeliefToTruck(path3[-1][1])
worldNames3 = {(0,0): world3[0], (4,3):world3[1]}

In [None]:
visualizeEnvironmentByState(states = stateSetC, goalStates = [(0,0), (4,3)], trajectory = positionTrajectory3, goalNameDictionary=worldNames3)

# Perform Inference

In [None]:
preferencePolicies = [policy_EnvC_KLM, policy_EnvC_KML, policy_EnvC_LKM,policy_EnvC_LMK, policy_EnvC_MKL, policy_EnvC_MLK]
#desirePriors = [1/6,1/6,1/6,1/6,1/6,1/6]
#desirePriors = [.160,.163,.166,.169,.172,.175]
desirePriors = [.1666,.167,.1675,.168,.1685,.169]

stateT1 = inferBelief(positionTrajectory1, world1)
stateT2 = inferBelief(positionTrajectory2, world2)
stateT3 = inferBelief(positionTrajectory3, world3)

getPreferencePosterior1 = PerformDesireInference(beliefTransition_EnvC, preferencePolicies, desirePriors, stateT1)
posterior1 = getPreferencePosterior1()

getPreferencePosterior2 = PerformDesireInference(beliefTransition_EnvC, preferencePolicies, desirePriors, stateT2)
posterior2 = getPreferencePosterior2()

getPreferencePosterior3 = PerformDesireInference(beliefTransition_EnvC, preferencePolicies, desirePriors, stateT3)
posterior3 = getPreferencePosterior3()

In [None]:
def plotPosteriors(posteriors, title="", labels=['KLM', 'KML', 'LKM', 'LMK', 'MKL', 'MLK'], subplotRowNumber = 2, subplotColNumber = 3, figDim = (10,5)):
    x, y  = posteriors.shape
    fig, ax = plt.subplots(nrows=subplotRowNumber, ncols=subplotColNumber, figsize = figDim)
    colors = plt.cm.viridis(np.linspace(0,1,y))
    plotIndex = 0 
    for row in ax:
        for col in row:
            col.plot(range(x), posteriors[:,plotIndex], color = colors[plotIndex], label = labels[plotIndex])
            col.legend()
            plotIndex += 1
    fig.suptitle(title)
    plt.show()

In [None]:
plotPosteriors(posterior1)

In [None]:
plotPosteriors(posterior2)

In [None]:
plotPosteriors(posterior3)

In [None]:
print(posterior3)