In [31]:
import unittest
import random
from ddt import ddt, data, unpack
import numpy as np

import sys
sys.path.append('../Algorithms/')
import ValueIteration as targetCode
sys.path.append('../Environments/')
from ColoredGridWorld.MDP import MDP

In [39]:

@ddt
class TestValueIteration(unittest.TestCase):
    def setUp(self): 
 
    # set epsilon to 0 and beta to something large, this becomes similar to the max problem of the orifinal
    ## should add a test about the behavior at the terminal site
    ## Add a test for each of the parameters (temp and rationality)
    
        self.convergenceTolerance = .000001
        self.gamma = .9
        self.eps = 0
        self.alpha = 20
    
        dimensions = (3,3)
        self.goalState = (2,2)
        self.actions = [(-1,0),(0,1),(0,-1),(1,0)]
        colourReward = {'green': 10, 'red': -100, 'white': 0}
        stateSpace = {(0,0): 'white', (0,1): 'white', (0,2): 'white', (1,0): 'white', (1,1): 'white', (1,2):'white', (2,0): 'white', (2,1):'red', (2,2):'green'}
        getMDP = MDP(dimensions, stateSpace, colourReward)
        self.deterministicRewardFunction, self.deterministicTransitionFunction = getMDP()
        self.deterministicValueTable = {key:0 for key in stateSpace.keys()}
        
        self.performDeterministicTransitionValueIteration = targetCode.ValueIteration(self.actions, self.deterministicTransitionFunction, self.deterministicRewardFunction, self.deterministicValueTable, self.goalState, self.convergenceTolerance, self.gamma, self.alpha, self.eps, True)


    @data(((1,2), (1,1)), ((1,2), (0,2)), ((0,1), (0,0))) 
    @unpack
    def test_relativeStateValues_DeterministicTransition_FirstStateGreaterValue(self, state1, state2, roundingTolerance = 5):
        optimalValues, _ = self.performDeterministicTransitionValueIteration()

        state1Value = round(optimalValues[state1], roundingTolerance)
        state2Value = round(optimalValues[state2], roundingTolerance)

        calculatedRelativeStateValueSign = np.sign(state1Value - state2Value)
        expectedSign = 1
        self.assertEqual(calculatedRelativeStateValueSign, expectedSign)
    
    @data(((1,1), (0,2)), ((0,0), (2,0)), ((0,1), (1,0)))
    @unpack
    def test_relativeStateValues_DeterministicTransition_EquivalentValueStates(self, state1, state2, roundingTolerance = 5):
        optimalValues, _ = self.performDeterministicTransitionValueIteration()

        state1Value = round(optimalValues[state1], roundingTolerance)
        state2Value = round(optimalValues[state2], roundingTolerance)

        calculatedRelativeStateValueSign = np.sign(state1Value - state2Value)
        expectedSign = 0
        self.assertEqual(calculatedRelativeStateValueSign, expectedSign)

    @data(((2,0), (0,1)), ((1,1), (1,0)), ((2,1),(1,0)))
    @unpack
    def test_isActionNonzeroInProbability_ZeroProb_DeterministicTransition_ToTrapState(self, state, action):
        _, policy = self.performDeterministicTransitionValueIteration()
        probabilityToTrapState = policy[state][action]
        roundedProbabilityToTrapState = round(probabilityToTrapState, 5)
        self.assertEqual(probabilityToTrapState,0)

    
    @data(((0,0), (-1,0)), ((0,0),(0,-1)), ((0,1),(-1,0)))
    @unpack
    def test_isActionNonzeroInProbability_ZeroProb_DeterministicTransition_OffBoard(self, state, action):
        _, policy = self.performDeterministicTransitionValueIteration()
        probabilityOffBoard = policy[state][action]
        roundedProbabilityOffBoard = round(probabilityOffBoard, 5)
        self.assertEqual(roundedProbabilityOffBoard,0) 

    @data(((0,0), (1,0), (0,1)), ((0,1), (0,1), (0,1)))
    @unpack
    def test_nonVital_isActionNonzeroInProbability_NonzeroProb_DeterministicTransition_MultipleOptimalDirections(self, state, action1, action2):
        _, policy = self.performDeterministicTransitionValueIteration()
        probability1 = policy[state][action1]
        probability1Rounded = round(probability1, 5)
        probability2 = policy[state][action2]
        probability2Rounded = round(probability2, 5)
        self.assertEqual(probability1Rounded, probability2Rounded)
      
    #testing for rationality(non-value related randomness) by putting eps=1
    @data((0,0), (1,0), (0,1), (2,1), (1,2))
    def test_Rationality_UniformlyRandom(self, state):
        performRandomValueIteration = targetCode.ValueIteration(self.actions, self.deterministicTransitionFunction, self.deterministicRewardFunction, self.deterministicValueTable, self.goalState, self.convergenceTolerance, self.gamma, self.alpha, 1, True)
        _, policy = performRandomValueIteration()
        probabilityOfAnyAction = random.choice(list(policy[state].values()))
        uniformProbability = 1/len(self.actions)
        self.assertEqual(probabilityOfAnyAction, uniformProbability)
    
    #testing for softmax temp by putting alpha = 0     
    @data((0,0), (1,0), (0,1), (2,1))
    def test_Rationality_UniformlyRandom(self, state):
        performRandomValueIteration = targetCode.ValueIteration(self.actions, self.deterministicTransitionFunction, self.deterministicRewardFunction, self.deterministicValueTable, self.goalState, self.convergenceTolerance, self.gamma, 0, self.eps, True)
        _, policy = performRandomValueIteration()
        probabilityOfAnyAction = random.choice(list(policy[state].values()))
        uniformProbability = 1/len(self.actions)
        self.assertEqual(probabilityOfAnyAction, uniformProbability)
        
    def test_TerminalStateValue(self):
        valueTable,_ = self.performDeterministicTransitionValueIteration()
        TerminalStateValue = valueTable[self.goalState]
        initializedValue = 0
        self.assertEqual(TerminalStateValue, initializedValue)
    
    def tearDown(self):
        pass


if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

...................
----------------------------------------------------------------------
Ran 19 tests in 0.052s

OK
