In [29]:
from gym import *
import unittest
import numpy as np

In [1]:
class BraessEnv(Env):
    """Traffic Environment that uses the Braess's network. 
       See https://github.com/openai/gym/blob/master/gym/core.py for more details.
    """
    action_space = "CHANGE THIS" # This will be a box
    observation_space = "CHANGE THIS" # This will be a box
    reward_range = "CHANGE THIS"  # (-float('inf'), 0)
    
    def __init__(self):
        return

    def step(self, action):
        """Run one timestep of the environment's dynamics. 
        
        Env 
            (1) takes in routing distribution - comes from the action, 
            (2) calculate travel times for each path given flow on each path, 
            (3) return the reward which is the negative of the travel time.
            
        Args:
            action (array): Distribution of flow for each path for o-d pair. First element corresponds to 
                            distribution for path 1, second element is for path 2, etc.
        
        Returns:
            next_observation (array): the travel times determined for each path
            reward (float): -1*travel_time_of_agent
            done (boolean): _
            info (dict): other information needed - don't really need now though
        """
        return {}, float("inf"), True, {}
    
    def reset(self):
        """Resets the state of the environment and returns an initial observation.
        
        For the initial observation: Make an array of 3 elements corresponding to 3 paths in
        the Braess network. It should have the format ---
        
        state = [<traveltime_ABD>, <traveltime_ACD>, <traveltime_ABCD>] = [2, 2, 0.25]
        """
        self.state = []
        return np.array(self.state)
    
    def render(self):
        return
    
    def close(self):
        return

NameError: name 'Env' is not defined

In [23]:
class BraessNetwork(object):
    """Stores the cost for all links. Handles calculating the cost of a path given action
       of every car.
    """
    def __init__(self):
        self.__links = {
            "AB": lambda f: 1 + (f/100),
            "AC": lambda _: 2,
            "BD": lambda _: 2,
            "CD": lambda f: 1 + (f/100),
            "BC": lambda _: 0.25
        } # Dictionary of links and their congestion functions
        self.__paths = {
            "ABD": ("AB", "BD"),
            "ACD": ("AC", "CD"),
            "ABCD": ("AB", "BC", "CD")
        } # Dictionaries of paths to links
        return 
    
    def paths(self):
        """Gives a list of all possible paths in the network to the environment. 
           The environment could then assign an action number to each path. 
        """
        return ("ABD", "ACD", "ABCD")
    
    def calculate_ttime(self, flows):
        """Given a dictionary of paths and flows, this function returns a dictionary of 
           paths and travel time (secs), a.k.a ttime.
           
           Returns: 
               travel_times (dictionary): A dictionary of paths to their travel times
        """
        congestion = {}
        for path in flows:
            links = self.__paths[path]
            for link in links:
                if link not in congestion:
                    congestion[link] = 0
                congestion[link] += flows[path]
        
        t_time = {}
        for path in flows:
            total_time = 0
            # Calculate travel time of path by adding the congestion time of every 
            # link in that path
            links = self.__paths[path]
            for link in links:
                t_time_func = self.__links[link]
                total_time += t_time_func(congestion[link])
            t_time[path] = total_time
        
        return t_time

In [24]:
class NoLearningAgent(Env):
    """Agent that chooses some random action to do."""
    def __init__(self, action_space):
        self.__action_space = action_space
        return
    
    def take_first_action(self, obs):
        return self.__action_space.sample()
    
    def take_action(self, new_obs, old_reward):
        return self.__action_space.sample()
    
    def update_policy():
        return

In [25]:
num_iter = 100    # Should change this depending on how many times you want to run the routing game

def learn():
    """Executes our learning framework.""" 
    env = BraessEnv()
    agent = SimpleAgent(env.action_space)
    obs = env.reset()
    for i in range(num_iter):
        if i == 0:
            action = agent.take_first_action(obs)
        else:
            action = agent.take_action(obs, rw)
        obs, rw, _, _ = env.step(action)
    return

In [26]:
# Tests
network = BraessNetwork()

# Test 1 for calculate ttime
#
#                  B
#                / | \                   
#             /    |    \
#          A       |       D
#             \    |    /
#                \ | /
#                  C
#
# Out of 100 cars, we will do: 
#     ABD = 25; 
#     ACD = 25; 
#     ABCD = 50 
# 
# The travel time on each path should result as:
#     ABD = 3.75 (units)
#     ACD = 3.75 (units)
#     ABCD = 3.75 (units)
#
flows = {
    "ABD": 25,
    "ACD": 25,
    "ABCD": 50
}
times = network.calculate_ttime(flows)
print(times)


# Test 2 for calculate ttime
# Out of 100 cars, we will do: 
#     ABD = 50; 
#     ACD = 50; 
#     ABCD = 0 
# 
# The travel time on each path should result as:
#     ABD = 3.5 (units)
#     ACD = 3.5 (units)
#     ABCD = 3.25 (units)  - Even though no one's using this path
flows = {
    "ABD": 50,
    "ACD": 50,
    "ABCD": 0
}
times = network.calculate_ttime(flows)
print(times)

{'ABD': 3.75, 'ACD': 3.75, 'ABCD': 3.75}
{'ABD': 3.5, 'ACD': 3.5, 'ABCD': 3.25}


In [33]:
### 
# MDP: (All characteristics of this MDP is given in W. Krichene's Paper 
#                            -- "Learning Nash Equilibria in Congestion Games")
#
#  - Observations/States: Each player will observe the cost (travel time) on all of the paths (according to paper)
#                         If the player only observes the loss she incurs then it becomes a multiarmed bandit 
#                         setting.
#  - Actions: Each player will choose a path, using a randomized/mixed strategy. 
#             This means we have a stochastic policy.
#  - Reward: The *cost* of each player will be the travel time that they've incurred on their path. T
#            Each player wants to minimize their travel time. For reward, we can maximize the negative cost.
#  - Model of the environment: We don't have one in this case
#
###

# Test 1: Test that reset() returns an initial observation. 
#         The initial observation should be:
#                "ABD": 3
#                "ACD": 3
#                "ABCD": 2.25
class TestBraessEnv(unittest.TestCase):
    def testResetReturnsObservation(self):
        env = BraessEnv()
        init = env.reset()
        expected = {
            "ABD": 3,
            "ACD": 3,
            "ABCD": 2.25
        }
        self.assertDictEqual(expected, init)
        
    def testStepReturnsCorrectInformation(self):
        # Test that step returns correct next observations, reward, and termination signal
        env = BraessEnv()
        _ = env.reset()
        # Note: Currently, it's expected that the single car will specify a number that corresponds to a route.
        #       This will have to change when we switch to the nonatomic setting.
        action = 1  # Representing path "ABD"
        next_obs, reward, done, _ = env.step(action)
        expected_obs = {
            "ABD": 3.01,
            "ACD": 3,
            "ABCD": 2.25
        }
        expected_reward = -3.01
        expected_done = True
        
        self.assertDictEqual(expected_obs, next_obs)
        self.assertEqual(expected_reward, reward)
        self.assertEqual(expected_done, done)
        
    def testStepSavesNoPrevInfo(self):
        env = BraessEnv()
        env.reset()
        action = 1  # Representing path "ABD"
        next_obs, reward, done, _ = env.step(action)
        
        action = 3
        next_obs, reward, done, _ = env.step(action)
        expected_obs = {
            "ABD": 3,
            "ACD": 3,
            "ABCD": 2.27
        }
        expected_reward = -2.27
        expected_done = True
        
        self.assertDictEqual(expected_obs, next_obs)
        self.assertEqual(expected_reward, reward)
        self.assertEqual(expected_done, done)

In [34]:
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

FFF
FAIL: testResetReturnsObservation (__main__.TestBraessEnv)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-33-78c470cd245e>", line 30, in testResetReturnsObservation
    self.assertDictEqual(expected, init)
AssertionError: {'ABD': 3, 'ACD': 3, 'ABCD': 2.25} != {}
- {'ABCD': 2.25, 'ABD': 3, 'ACD': 3}
+ {}

FAIL: testStepReturnsCorrectInformation (__main__.TestBraessEnv)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-33-78c470cd245e>", line 48, in testStepReturnsCorrectInformation
    self.assertDictEqual(expected_obs, next_obs)
AssertionError: {'ABD': 3.01, 'ACD': 3, 'ABCD': 2.25} != {}
- {'ABCD': 2.25, 'ABD': 3.01, 'ACD': 3}
+ {}

FAIL: testStepSavesNoPrevInfo (__main__.TestBraessEnv)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "<ipython-input-33-78c470c