In [1]:
import numpy as np
import itertools
import sys
import pandas as pd

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from grosseJointPlanner import *
from ValueIteration import BoltzmannValueIteration

In [4]:
gridWidth = 5
gridHeight = 5
states = list(itertools.product(range(gridWidth), range(gridHeight)))
actions = [(-1,0), (0,1), (1,0), (0,-1), (0,0)]

In [5]:
gettransition = SetupDeterministicTransitionByStateSet2Agent(states, actions, (1,1))
transitionTable = gettransition()
getReward = SetupRewardTable2AgentDistanceCost(transitionTable, [(4,2)])

In [6]:
transitionTable = gettransition()
rewardTable = getReward()

In [7]:
convergence = .000001
gamma = .95
valueTable = {state:0 for state in transitionTable.keys()}
beta = 2

performValueIteration = BoltzmannValueIteration(transitionTable, rewardTable, valueTable, convergence, gamma, beta)
optimalValues, policy = performValueIteration()

In [8]:
#Trajectory sampling
def samplePathToGoal(position, policy, transition, goals):
    trajectory = [position]

    while position[0] not in goals and position[1] not in goals:
        
        #take action probabilisitically
        actions = list(policy[position].keys())
        probOfAction = [policy[position][action] for action in actions]
        actionIndex = np.random.choice(len(actions), 1, p = probOfAction)
        sampledAction = actions[int(actionIndex)]       
        
        #get new position
        newPosition = list(transition[position][sampledAction].keys())[0]

        #update to new belief/position and add to trajectory
        position = newPosition
        trajectory.append(position)
    return(trajectory)

In [10]:
trajectory = samplePathToGoal(((0, 0), (4, 4)), policy, transitionTable, [(4,2)])
trajectory

[((0, 0), (4, 4)), ((1, 0), (4, 3)), ((1, 0), (4, 2))]

In [13]:
a = [policy[state] for state in trajectory][0]
a

{((-1, 0), (-1, 0)): 1.6817816027775666e-19,
 ((-1, 0), (0, 1)): 7.655302458420613e-11,
 ((-1, 0), (1, 0)): 7.655302458420613e-11,
 ((-1, 0), (0, -1)): 0.09950596717199087,
 ((-1, 0), (0, 0)): 4.631188110689898e-10,
 ((0, 1), (-1, 0)): 1.6817825683829208e-19,
 ((0, 1), (0, 1)): 7.655306226874469e-11,
 ((0, 1), (1, 0)): 7.655306226874469e-11,
 ((0, 1), (0, -1)): 0.09950601873358794,
 ((0, 1), (0, 0)): 4.631190390471629e-10,
 ((1, 0), (-1, 0)): 1.6817825683829208e-19,
 ((1, 0), (0, 1)): 7.655306226874469e-11,
 ((1, 0), (1, 0)): 7.655306226874469e-11,
 ((1, 0), (0, -1)): 0.09950601873358794,
 ((1, 0), (0, 0)): 4.631190390471629e-10,
 ((0, -1), (-1, 0)): 1.6817816027775666e-19,
 ((0, -1), (0, 1)): 7.655302458420613e-11,
 ((0, -1), (1, 0)): 7.655302458420613e-11,
 ((0, -1), (0, -1)): 0.09950596717199087,
 ((0, -1), (0, 0)): 4.631188110689898e-10,
 ((0, 0), (-1, 0)): 1.0174185808939762e-18,
 ((0, 0), (0, 1)): 4.631188110689898e-10,
 ((0, 0), (1, 0)): 4.631188110689898e-10,
 ((0, 0), (0, -1))

In [18]:
oneStateDist = a
{action: np.sum([oneStateDist[actionPair] for actionPair in oneStateDist.keys() if actionPair[0] == action] )for action in actions}

{(-1, 0): 0.09950596778821573,
 (0, 1): 0.09950601934981311,
 (1, 0): 0.09950601934981311,
 (0, -1): 0.09950596778821573,
 (0, 0): 0.6019760257239425}

In [None]:
trajPolicyDist = [policy[state] for state in trajectory]

In [None]:
for oneStateDist in trajPolicyDist

[((0, 0), (4, 4)), ((0, 0), (4, 3)), ((0, 0), (4, 2))]

In [14]:
b = {((-1, 0), (-1, 0)): 1,
 ((-1, 0), (0, 1)): 2,
 ((-1, 0), (1, 0)): 3,
 ((-1, 0), (0, -1)): 4,
 ((-1, 0), (0, 0)): 6,
 ((0, 1), (-1, 0)): 1 ,
 ((0, 1), (0, 1)): 7 ,
 ((0, 1), (1, 0)): 7 ,
 ((0, 1), (0, -1)): 0 ,
 ((0, 1), (0, 0)): 4 ,
 ((1, 0), (-1, 0)): 1 ,
 ((1, 0), (0, 1)): 7 ,
 ((1, 0), (1, 0)): 7 ,
 ((1, 0), (0, -1)): 0 ,
 ((1, 0), (0, 0)): 4 ,
 ((0, -1), (-1, 0)): 1 ,
 ((0, -1), (0, 1)): 7 ,
 ((0, -1), (1, 0)): 7 ,
 ((0, -1), (0, -1)): 0 ,
 ((0, -1), (0, 0)): 4 ,
 ((0, 0), (-1, 0)): 1 ,
 ((0, 0), (0, 1)): 4 ,
 ((0, 0), (1, 0)): 4 ,
 ((0, 0), (0, -1)): 0,
 ((0, 0), (0, 0)): 2}

In [None]:
actions = [(-1, 0), (0,1), (1,0), (0, -1), (0,0)]

In [15]:
[np.sum([b[key] for key in b.keys() if key[0] == action] )for action in actions] 

[16, 19, 19, 19, 11]