In [None]:
class ComputeStateValue:
  def __init__(self, allStates, transitionTable):
    self.allStates = allStates.copy()
    self.transitionTable = transitionTable.copy()
    self.allStateValues = {}

  def computeActionValue(self, state, action):
    reward = self.transitionTable[state][action][1]
    nextState = self.transitionTable[state][action][0]
    nextStateValue = self.allStateValues[nextState]
    return reward+nextStateValue

  def updateStateValue(self, state):
    allActions = self.transitionTable[state].keys()
    allActionValues = [self.computeActionValue(state, action) for action in allActions]
    return max(allActionValues)

  def __call__(self, currentState, numFlightsRemaining):
    self.allStateValues = {state: 0 for state in self.allStates}
    for timeStep in range(numFlightsRemaining-1, -1, -1):
      updatedStateValues = {state: self.updateStateValue(state) for state in self.allStates}
      self.allStateValues = updatedStateValues
    
    return self.allStateValues[currentState]

In [None]:
allStates = ["Jonesville", "Williamsville", "Brownsville", "Clarksville", "Bakersville", "Smithsville"]
transitionTable = {"Jonesville": {0: ("Williamsville", -30), 1: ("Clarksville", -70)},
                   "Williamsville": {0: ("Brownsville", -30), 1: ("Bakersville", -70)},
                   "Brownsville": {0: ("Clarksville", -30), 1: ("Jonesville", 30)},
                   "Clarksville": {0: ("Smithsville", -70), 1: ("Bakersville", -30)},
                   "Bakersville": {0: ("Smithsville", -30), 1: ("Williamsville", 30)},
                   "Smithsville": {0: ("Jonesville", 140), 1: ("Brownsville", 30)}
                   }

computeStateValue = ComputeStateValue(allStates, transitionTable)

In [None]:
def computeActionIncentive(currentState, action, numFlightsRemaining):
  reward = transitionTable[currentState][action][1]
  nextState = transitionTable[currentState][action][0]
  nextStateValue = computeStateValue(nextState, numFlightsRemaining-1)
  currentStateValue = computeStateValue(currentState, numFlightsRemaining)
  return reward + nextStateValue - currentStateValue

def computeAllIncentives(currentState, numFlightsRemaining):
  allActions = transitionTable[currentState].keys()
  allIncentives = {transitionTable[currentState][action][0]: computeActionIncentive(currentState, action, numFlightsRemaining) for action in allActions}
  return allIncentives    

In [None]:
print(computeAllIncentives("Smithsville", 3))

{'Jonesville': 140, 'Brownsville': 30}


# **TESTS**

In [None]:
!pip install ddt
import unittest
from ddt import ddt, data, unpack



In [None]:
@ddt
class TestComputeStateValue(unittest.TestCase):
  def setUp(self):
    self.allStates = ["Jonesville", "Williamsville", "Brownsville", "Clarksville", "Bakersville", "Smithsville"]
    self.transitionTable = {"Jonesville": {0: ("Williamsville", -30), 1: ("Clarksville", -70)},
                   "Williamsville": {0: ("Brownsville", -30), 1: ("Bakersville", -70)},
                   "Brownsville": {0: ("Clarksville", -30), 1: ("Jonesville", 30)},
                   "Clarksville": {0: ("Smithsville", -70), 1: ("Bakersville", -30)},
                   "Bakersville": {0: ("Smithsville", -30), 1: ("Williamsville", 30)},
                   "Smithsville": {0: ("Jonesville", 140), 1: ("Brownsville", 30)}
                   }


  @data(({state: 0 for state in allStates}, "Smithsville", 0, 140),
        ({state: 50 for state in allStates}, "Jonesville", 1, -20))
  @unpack
  def testComputeActionValue(self, valueTable, state, action, trueActionValue):
    computeStateValue = ComputeStateValue(self.allStates, self.transitionTable)
    computeStateValue.allStateValues = valueTable

    actionValue = computeStateValue.computeActionValue(state, action)
    self.assertEqual(actionValue, trueActionValue)


  @data(({state: 0 for state in allStates}, "Williamsville", -30),
        ({state: 100 for state in allStates}, "Brownsville", 130))
  @unpack
  def testUpdateStateValue(self, valueTable, state, trueUpdatedStateValue):
    computeStateValue = ComputeStateValue(self.allStates, self.transitionTable)
    computeStateValue.allStateValues = valueTable

    updatedStateValue = computeStateValue.updateStateValue(state)
    self.assertEqual(updatedStateValue, trueUpdatedStateValue)


  @data(("Jonesville", 1, -30), 
       ("Smithsville", 2, 110), 
       ("Clarksville", 3, 80))
  @unpack
  def testComputeStateValue(self, currentState, numFlightsRemaining, trueStateValue):
    computeStateValue = ComputeStateValue(self.allStates, self.transitionTable)
    stateValue = computeStateValue(currentState, numFlightsRemaining)

    self.assertEqual(stateValue, trueStateValue)
    

unittest.main(argv=[''], verbosity=2, exit=False)

testComputeActionValue_1 (__main__.TestComputeStateValue) ... ok
testComputeActionValue_2 (__main__.TestComputeStateValue) ... ok
testComputeStateValue_1___Jonesville___1___30_ (__main__.TestComputeStateValue) ... ok
testComputeStateValue_2___Smithsville___2__110_ (__main__.TestComputeStateValue) ... ok
testComputeStateValue_3___Clarksville___3__80_ (__main__.TestComputeStateValue) ... ok
testUpdateStateValue_1 (__main__.TestComputeStateValue) ... ok
testUpdateStateValue_2 (__main__.TestComputeStateValue) ... ok

----------------------------------------------------------------------
Ran 7 tests in 0.010s

OK


<unittest.main.TestProgram at 0x7f8d1e5144e0>

In [None]:
@ddt
class TestComputeActionIncentive(unittest.TestCase):
  def setUp(self):
    self.allStates = ["Jonesville", "Williamsville", "Brownsville", "Clarksville", "Bakersville", "Smithsville"]
    self.transitionTable = {"Jonesville": {0: ("Williamsville", -30), 1: ("Clarksville", -70)},
                   "Williamsville": {0: ("Brownsville", -30), 1: ("Bakersville", -70)},
                   "Brownsville": {0: ("Clarksville", -30), 1: ("Jonesville", 30)},
                   "Clarksville": {0: ("Smithsville", -70), 1: ("Bakersville", -30)},
                   "Bakersville": {0: ("Smithsville", -30), 1: ("Williamsville", 30)},
                   "Smithsville": {0: ("Jonesville", 140), 1: ("Brownsville", 30)}
                   }


  @data(("Smithsville", 0, 3, 0),
        ("Smithsville", 1, 3, -50),
        ("Williamsville", 0, 2, 0),
        ("Clarksville", 1, 2, -70))
  @unpack
  def testComputeActionIncentive(self, currentState, action, numFlightsRemaining, trueActionIncentive):
    computeStateValue = ComputeStateValue(self.allStates, self.transitionTable)
    actionIncentive = computeActionIncentive(currentState, action, numFlightsRemaining)
    self.assertEqual(actionIncentive, trueActionIncentive)

unittest.main(argv=[''], verbosity=2, exit=False)

testComputeActionIncentive_1___Smithsville___0__3__0_ (__main__.TestComputeActionIncentive) ... ok
testComputeActionIncentive_2___Smithsville___1__3___50_ (__main__.TestComputeActionIncentive) ... ok
testComputeActionIncentive_3___Williamsville___0__2__0_ (__main__.TestComputeActionIncentive) ... ok
testComputeActionIncentive_4___Clarksville___1__2___70_ (__main__.TestComputeActionIncentive) ... ok
testComputeActionValue_1 (__main__.TestComputeStateValue) ... ok
testComputeActionValue_2 (__main__.TestComputeStateValue) ... ok
testComputeStateValue_1___Jonesville___1___30_ (__main__.TestComputeStateValue) ... ok
testComputeStateValue_2___Smithsville___2__110_ (__main__.TestComputeStateValue) ... ok
testComputeStateValue_3___Clarksville___3__80_ (__main__.TestComputeStateValue) ... ok
testUpdateStateValue_1 (__main__.TestComputeStateValue) ... ok
testUpdateStateValue_2 (__main__.TestComputeStateValue) ... ok

----------------------------------------------------------------------
Ran 11 t

<unittest.main.TestProgram at 0x7f8d1e51add8>