In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as gnn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
edges_dict = {"b_c": 0, "c_r": 1, "r_c": 2, "c_t": 3, "t_c": 4, "c_l": 5,
              "l_c": 6, "c_b": 7, ":center_0": 8, ":center_1": 9, ":center_2": 10,
              ":center_12": 10, ":center_3": 11, ":center_4": 12, ":center_5": 13,
              ":center_6": 14, ":center_7": 15, ":center_8": 16, ":center_13": 16,
              ":center_9": 17, ":center_10": 18, ":center_11": 19}

routes_dict = {('t_c', 'c_l'): 0, ('t_c', 'c_b'): 1, ('t_c', 'c_r'): 2, ('r_c', 'c_t'): 3,
               ('r_c', 'c_l'): 4, ('r_c', 'c_b'): 5, ('b_c', 'c_r'): 6, ('b_c', 'c_t'): 7,
               ('b_c', 'c_l'): 8, ('l_c', 'c_b'): 9, ('l_c', 'c_r'): 10, ('l_c', 'c_t'): 11}

all_vehicles = {}

import numpy as np

conflicting_routes_matrix = np.zeros((12,12))
for i in range(12):
    for j in range(12):
        if i == 0:
            if j in (4,8):
                conflicting_routes_matrix[i][j] = 1
        elif i == 1:
            if j in (4,5,8,9,10,11):
                conflicting_routes_matrix[i][j] = 1
        elif i == 2:
            if j in (4,5,6,7,8,10,11):
                conflicting_routes_matrix[i][j] = 1
        elif i == 3:
            if j in (7,11):
                conflicting_routes_matrix[i][j] = 1
        elif i == 4:
            if j in (0,1,2,7,8,11):
                conflicting_routes_matrix[i][j] = 1
        elif i == 5:
            if j in (1,2,7,8,9,10,11):
                conflicting_routes_matrix[i][j] = 1
        elif i == 6:
            if j in (2,10):
                conflicting_routes_matrix[i][j] = 1
        elif i == 7:
            if j in (2,3,4,5,10,11):
                conflicting_routes_matrix[i][j] = 1
        elif i == 8:
            if j in (0,1,2,4,5,10,11):
                conflicting_routes_matrix[i][j] = 1
        elif i == 9:
            if j in (1,5):
                conflicting_routes_matrix[i][j] = 1
        elif i == 10:
            if j in (1,2,5,6,7,8):
                conflicting_routes_matrix[i][j] = 1
        else:
            if j in (1,2,3,4,5,7,8):
                conflicting_routes_matrix[i][j] = 1
                
routes_edges_matrix = np.zeros((12,20))
for i in range(12):
    if i == 0:
        routes_edges_matrix[i][4] = 1
        routes_edges_matrix[i][8] = 2
        routes_edges_matrix[i][5] = 3
    elif i == 1:
        routes_edges_matrix[i][4] = 1
        routes_edges_matrix[i][9] = 2
        routes_edges_matrix[i][7] = 3
    elif i == 2:
        routes_edges_matrix[i][4] = 1
        routes_edges_matrix[i][10] = 2
        routes_edges_matrix[i][1] = 3
    elif i == 3:
        routes_edges_matrix[i][2] = 1
        routes_edges_matrix[i][11] = 2
        routes_edges_matrix[i][3] = 3
    elif i == 4:
        routes_edges_matrix[i][2] = 1
        routes_edges_matrix[i][12] = 2
        routes_edges_matrix[i][5] = 3
    elif i == 5:
        routes_edges_matrix[i][2] = 1
        routes_edges_matrix[i][13] = 2
        routes_edges_matrix[i][7] = 3
    elif i == 6:
        routes_edges_matrix[i][0] = 1
        routes_edges_matrix[i][14] = 2
        routes_edges_matrix[i][1] = 3
    elif i == 7:
        routes_edges_matrix[i][0] = 1
        routes_edges_matrix[i][15] = 2
        routes_edges_matrix[i][3] = 3
    elif i == 8:
        routes_edges_matrix[i][0] = 1
        routes_edges_matrix[i][16] = 2
        routes_edges_matrix[i][5] = 3
    elif i == 9:
        routes_edges_matrix[i][6] = 1
        routes_edges_matrix[i][17] = 2
        routes_edges_matrix[i][7] = 3
    elif i == 10:
        routes_edges_matrix[i][6] = 1
        routes_edges_matrix[i][18] = 2
        routes_edges_matrix[i][1] = 3
    else:
        routes_edges_matrix[i][6] = 1
        routes_edges_matrix[i][19] = 2
        routes_edges_matrix[i][3] = 3

In [3]:
import numpy as np
from numpy import pi, sin, cos, linspace

from flow.core.params import InitialConfig
from flow.core.params import TrafficLightParams
from flow.networks.base import Network

ADDITIONAL_NET_PARAMS = {
    # radius of the circular components
    "radius_ring": 30,
    # number of lanes
    "lanes": 1,
    # speed limit for all edges
    "speed_limit": 30,
    # resolution of the curved portions
    "resolution": 40
}

class IntersectionNetwork(Network):
    """Figure eight network class.

    The figure eight network is an extension of the ring road network: Two
    rings, placed at opposite ends of the network, are connected by an
    intersection with road segments of length equal to the diameter of the
    rings. Serves as a simulation of a closed ring intersection.

    Requires from net_params:

    * **ring_radius** : radius of the circular portions of the network. Also
      corresponds to half the length of the perpendicular straight lanes.
    * **resolution** : number of nodes resolution in the circular portions
    * **lanes** : number of lanes in the network
    * **speed** : max speed of vehicles in the network

    Usage
    -----
    >>> from flow.core.params import NetParams
    >>> from flow.core.params import VehicleParams
    >>> from flow.core.params import InitialConfig
    >>> from flow.networks import FigureEightNetwork
    >>>
    >>> network = FigureEightNetwork(
    >>>     name='figure_eight',
    >>>     vehicles=VehicleParams(),
    >>>     net_params=NetParams(
    >>>         additional_params={
    >>>             'radius_ring': 50,
    >>>             'lanes': 75,
    >>>             'speed_limit': 30,
    >>>             'resolution': 40
    >>>         },
    >>>     )
    >>> )
    """

    def __init__(self,
                 name,
                 vehicles,
                 net_params,
                 initial_config=InitialConfig(),
                 traffic_lights=TrafficLightParams()):
        """Initialize a figure 8 network."""
        for p in ADDITIONAL_NET_PARAMS.keys():
            if p not in net_params.additional_params:
                raise KeyError('Network parameter "{}" not supplied'.format(p))

        ring_radius = net_params.additional_params["radius_ring"]
        self.ring_edgelen = ring_radius * np.pi / 2.
        self.intersection_len = 2 * ring_radius
        self.junction_len = 2.9 + 3.3 * net_params.additional_params["lanes"]
        self.inner_space_len = 0.28

        # # instantiate "length" in net params
        # net_params.additional_params["length"] = \
        #     6 * self.ring_edgelen + 2 * self.intersection_len + \
        #     2 * self.junction_len + 10 * self.inner_space_len

        super().__init__(name, vehicles, net_params, initial_config,
                         traffic_lights)

    def specify_nodes(self, net_params):
        """See parent class."""
        r = net_params.additional_params["radius_ring"]

        nodes = [{
            "id": "center",
            "x": 0,
            "y": 0,
            #"radius": 10,
            "radius": (2.9 + 3.3 * net_params.additional_params["lanes"])/2,
            "type": "priority"
        }, {
            "id": "right",
            "x": r,
            "y": 0,
            "type": "priority"
        }, {
            "id": "top",
            "x": 0,
            "y": r,
            "type": "priority"
        }, {
            "id": "left",
            "x": -r,
            "y": 0,
            "type": "priority"
        }, {
            "id": "bottom",
            "x": 0,
            "y": -r,
            "type": "priority"
        }]

        return nodes

    def specify_edges(self, net_params):
        """See parent class."""
        r = net_params.additional_params["radius_ring"]
        resolution = net_params.additional_params["resolution"]
        ring_edgelen = 3 * r * pi / 2.
        intersection_edgelen = 2 * r

        # intersection edges
        edges = [{
            "id": "b_c",
            "type": "edgeType",
            #"priority": "78",
            "from": "bottom",
            "to": "center",
            "length": intersection_edgelen / 2
        }, {
            "id": "c_t",
            "type": "edgeType",
            #"priority": 78,
            "from": "center",
            "to": "top",
            "length": intersection_edgelen / 2
        }, {
            "id": "r_c",
            "type": "edgeType",
            "priority": 78,
            "from": "right",
            "to": "center",
            "length": intersection_edgelen / 2
        }, {
            "id": "c_l",
            "type": "edgeType",
            #"priority": 46,
            "from": "center",
            "to": "left",
            "length": intersection_edgelen / 2
        }, {
            "id": "t_c",
            "type": "edgeType",
            #"priority": 78,
            "from": "top",
            "to": "center",
            "length": intersection_edgelen / 2
        }, {
            "id": "c_r",
            "type": "edgeType",
            #"priority": 46,
            "from": "center",
            "to": "right",
            "length": intersection_edgelen / 2
        }, {
            "id": "l_c",
            "type": "edgeType",
            "priority": 78,
            "from": "left",
            "to": "center",
            "length": intersection_edgelen / 2
        }, {
            "id": "c_b",
            "type": "edgeType",
            #"priority": "78",
            "from": "center",
            "to": "bottom",
            "length": intersection_edgelen / 2
        }]

        return edges

    def specify_types(self, net_params):
        """See parent class."""
        lanes = net_params.additional_params["lanes"]
        speed_limit = net_params.additional_params["speed_limit"]
        types = [{
            "id": "edgeType",
            "numLanes": lanes,
            "speed": speed_limit
        }]

        return types

    def specify_routes(self, net_params):
        """See parent class."""
        rts = {
            "r_c":
                [(["r_c", "c_l"], 1/3), (["r_c", "c_t"], 1/3),
                    (["r_c", "c_b"], 1/3)],
            "b_c":
                [(["b_c", "c_t"], 1/3), (["b_c", "c_l"], 1/3),
                    (["b_c", "c_r"], 1/3)],
            "t_c":
                [(["t_c", "c_b"], 1/3), (["t_c", "c_l"], 1/3),
                    (["t_c", "c_r"], 1/3)],
            "l_c":
                [(["l_c", "c_r"], 1/3), (["l_c", "c_t"], 1/3),
                    (["l_c", "c_b"], 1/3)],
            "c_r":
                ["c_r"],
            "c_l":
                ["c_l"],
            "c_t":
                ["c_t"],
            "c_b":
                ["c_b"],
            "human_0":
                ["r_c", "c_l"],
            "human_1":
                ["b_c", "c_l"],
            }

        return rts

In [4]:
from flow.envs.base import Env
from gym.spaces.box import Box
from gym.spaces import Tuple
from gym.spaces import Discrete
import numpy as np
from numpy.linalg import inv

ADDITIONAL_ENV_PARAMS = {
    "max_accel": 5,
    "max_decel": -5,
}

class myEnv(Env):

    @property
    def action_space(self):
        num_actions = self.initial_vehicles.num_rl_vehicles
        accel_ub = self.env_params.additional_params["max_accel"]
        accel_lb = - abs(self.env_params.additional_params["max_decel"])

        return Box(low=accel_lb,
                   high=accel_ub,
                   shape=(num_actions,))
    
    @property
    def observation_space(self):
        nodes = {}
        for i in self.k.vehicle.get_ids():
            nodes[i] = (Box(low=-float("inf"), high=float("inf"), shape=(1,)), # POSITION
                        Box(low=0, high=float("inf"), shape=(1,)),             # VELOCITY 
                        Box(low=0, high=float("inf"), shape=(1,)),             # ACCELERATION
                        Discrete(2),                                           # CONTROLLABLE
                        Box(low=-float("inf"), high=float("inf"), shape=(2,)), # COORDINATES
                        Box(low=-float("inf"), high=float("inf"), shape=(1,)), # HEADING ANGLE
                        Discrete(20),                                          # EDGE
                        Discrete(12),                                          # ROUTE
                       )
            
        return nodes
    
    def _apply_rl_actions(self, rl_actions):
        # the names of all autonomous (RL) vehicles in the network
        rl_ids = self.k.vehicle.get_rl_ids()
        # use the base environment method to convert actions into accelerations for the rl vehicles
        self.k.vehicle.apply_acceleration(rl_ids, rl_actions)
        
    def get_state(self, **kwargs):
        
        # the get_ids() method is used to get the names of all vehicles in the network
        ids = self.k.vehicle.get_ids()
        state = {}
        
        for q in ids:
            
            # POSITION
            if q not in all_vehicles.keys():
                all_vehicles[q] = False
            
            pos = -42
            old_pos = -12
            raw_pos = self.k.vehicle.get_position(q)
            if self.k.vehicle.get_route(q) == '':
                i = 0
            else:
                i = routes_dict[self.k.vehicle.get_route(q)]
            if self.k.vehicle.get_edge(q) == '':
                j = 5
            else:
                j = edges_dict[self.k.vehicle.get_edge(q)]
            if routes_edges_matrix[i][j] == 1:
                pos = raw_pos - 42
            elif routes_edges_matrix[i][j] == 2:
                if i in (1,4,7,10):
                    pos = raw_pos - 12
                elif i in (2,5,8,11):
                    if not all_vehicles[q]:
                        if abs(pos+12-raw_pos) > 3:
                            all_vehicles[q] = True
                            pos = pos + raw_pos
                        else:
                            pos = raw_pos - 12
                    else:
                        pos = raw_pos + 4 - 12
                else:
                    old_pos = pos
                    pos = raw_pos - 12
                    ang_coeff = 12/7
                    rel_displ = ang_coeff*(pos-old_pos)
                    pos = old_pos + rel_displ
            else:
                pos = raw_pos
                
            # VELOCITY
            vel = self.k.vehicle.get_speed(q)
            
            # ACCELERATION
            acc = self.k.vehicle.get_realized_accel(q)
            if acc == None:
                acc = 0
            
            # CONTROLLABLE
            if self.k.vehicle.get_type(q) == 'human':
                contr = 0
            else:
                contr = 1
                
            # COORDINATES
            coord = self.k.vehicle.get_2d_position(q)
            
            # HEADING ANGLE
            angle = self.k.vehicle.get_orientation(q)[2]
            
            # EDGE
            if self.k.vehicle.get_edge(q) == '':
                edge = 5
            else:
                edge = edges_dict[self.k.vehicle.get_edge(q)]
                              
            # ROUTE
            if self.k.vehicle.get_route(q) == '':
                route = 0
            else:
                route = routes_dict[self.k.vehicle.get_route(q)]

                                
            state[q] = (pos, vel, acc, contr, coord, angle, edge, route)
                                
        return state
                                
    def compute_reward(self, rl_actions, state=None, **kwargs):
        speed_limit = 25
        w_v = 0.03
        w_a = 0.01
        w_i = 0.01
        w_c = 1
        #w_r = 0.01 IN STALLO PER ORA
        
        # the get_ids() method is used to get the names of all vehicles in the network
        ids = self.k.vehicle.get_ids()
        crash = self.k.simulation.check_collision()
        
        # VELOCITY TERM
        speeds = self.k.vehicle.get_speed(ids)
        if speeds == []:
            mean_speed = 0
        else:
            mean_speed = np.mean(speeds)
        """
        rel_speed = mean_speed/speed_limit
        if rel_speed <= 0.5:
            Rv = 2*rel_speed
        elif 0.5 < rel_speed <= 1:
            Rv = 1
        else:
            Rv = 6-5*rel_speed
        """
        if crash:
            Rv = 0
        else:
            Rv = mean_speed
            
        # ACTION TERM
        if speeds == [] or len(rl_actions) == 0:
            Ra = 0
        else:
            Ra = -np.mean(np.abs(rl_actions))

        # IDLE TERM
        if len(speeds) > 0:
            if max(speeds) < 0.3:
                Ri = -1
            else:
                Ri = 0
        else:
            Ri = 0
        
        # COLLISION TERM
        if crash:
            Rc = -1
        else:
            Rc = 0
            
        # RELUCTANCE TERM
        # IN STALLO PER ORA

        R = w_v*Rv + w_a*Ra + w_i*Ri + w_c*Rc

        return R

In [5]:
from flow.core.params import VehicleParams
from flow.controllers import IDMController, ContinuousRouter
from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams
from flow.controllers import RLController

vehicles = VehicleParams()

vehicles.add(veh_id="human",
             acceleration_controller=(IDMController, {}),
             routing_controller=(ContinuousRouter, {}),
             num_vehicles=2,
             color='green')

from flow.core.params import InFlows

inflow = InFlows()

inflow.add(veh_type="rl",
           edge="b_c",
           probability=0.05,
           #depart_speed="random",
          )
inflow.add(veh_type="rl",
           edge="t_c",
           probability=0.1,
           #depart_speed="random",
          )
inflow.add(veh_type="rl",
           edge="l_c",
           probability=0.1,
           #depart_speed="random",
          )
inflow.add(veh_type="rl",
           edge="r_c",
           probability=0.05,
           #depart_speed="random",
          )


sim_params = SumoParams(sim_step=0.1, render=True)

initial_config = InitialConfig()

env_params = EnvParams(additional_params=ADDITIONAL_ENV_PARAMS)

additional_net_params = ADDITIONAL_NET_PARAMS.copy()
net_params = NetParams(additional_params=additional_net_params)

In [6]:
import numpy as np

def compute_edges(env, state):
    
    dimensions_matrix = np.array([[25/4,0], [0,1]])
    edges = {}
    edges_type = {}
    for i in env.k.vehicle.get_ids():
        for j in env.k.vehicle.get_ids():
            if conflicting_routes_matrix[state[i][7]][state[j][7]] == 1:
                if (routes_edges_matrix[state[i][7]][state[i][6]] != 3) and (routes_edges_matrix[state[j][7]][state[j][6]] != 3):
                    # DISTANCE
                    rotation_matrix = np.array([[np.cos(state[i][5]), -np.sin(state[i][5])],
                                               [np.sin(state[i][5]), np.cos(state[i][5])]])
                    sigma_matrix = np.matmul(np.matmul(rotation_matrix, dimensions_matrix),
                                             rotation_matrix.transpose())
                    cartesian_dist = np.array(state[j][4]) - np.array(state[i][4])
                    d_ij = np.matmul(np.matmul(cartesian_dist.transpose(), inv(sigma_matrix)),
                                     cartesian_dist)
                    d_ij = 1/np.sqrt(d_ij)
                
                    # BEARING
                    coord_j = np.array(state[j][4])
                    coord_i = np.array(state[i][4])
                    py = coord_i[1] - coord_j[1]
                    px = coord_i[0] - coord_j[1]
                    chi_ij = np.arctan(py/px) - state[j][5]
                    
                    # PRIORITY
                    # IN STALLO PER ORA
                    
                    edges_type[(i,j)] = 'crossing'
                    
                    edges[(i,j)] = (d_ij, chi_ij)
                
                elif np.argmax(routes_edges_matrix[state[i][7]]) == np.argmax(routes_edges_matrix[state[j][7]]):
                    if state[i][0] > state[j][0]:
                        # DISTANCE
                        rotation_matrix = np.array([[np.cos(state[i][5]), -np.sin(state[i][5])],
                                                   [np.sin(state[i][5]), np.cos(state[i][5])]])
                        sigma_matrix = np.matmul(np.matmul(rotation_matrix, dimensions_matrix),
                                                 rotation_matrix.transpose())
                        cartesian_dist = np.array(state[j][4]) - np.array(state[i][4])
                        d_ij = np.matmul(np.matmul(cartesian_dist.transpose(), inv(sigma_matrix)),
                                         cartesian_dist)
                        d_ij = 1/np.sqrt(d_ij)

                        # BEARING
                        coord_j = np.array(state[j][4])
                        coord_i = np.array(state[i][4])
                        py = coord_i[1] - coord_j[1]
                        px = coord_i[0] - coord_j[1]
                        chi_ij = np.arctan(py/px) - state[j][5]

                        # PRIORITY
                        # IN STALLO PER ORA
                        
                        edges_type[(i,j)] = 'same_lane'
                        
                        edges[(i,j)] = (d_ij, chi_ij)
            
            elif state[i][7] == state[j][7]:
                if state[i][0] > state[j][0]:
                    # DISTANCE
                    rotation_matrix = np.array([[np.cos(state[i][5]), -np.sin(state[i][5])],
                                               [np.sin(state[i][5]), np.cos(state[i][5])]])
                    sigma_matrix = np.matmul(np.matmul(rotation_matrix, dimensions_matrix),
                                             rotation_matrix.transpose())
                    cartesian_dist = np.array(state[j][4]) - np.array(state[i][4])
                    d_ij = np.matmul(np.matmul(cartesian_dist.transpose(), inv(sigma_matrix)),
                                     cartesian_dist)
                    d_ij = 1/np.sqrt(d_ij)

                    # BEARING
                    coord_j = np.array(state[j][4])
                    coord_i = np.array(state[i][4])
                    py = coord_i[1] - coord_j[1]
                    px = coord_i[0] - coord_j[1]
                    chi_ij = np.arctan(py/px) - state[j][5]

                    # PRIORITY
                    # IN STALLO PER ORA
                    
                    edges_type[(i,j)] = 'same_lane'
                    
                    edges[(i,j)] = (d_ij, chi_ij)
            
            elif state[i][6] == state[j][6]:
                if state[i][0] > state[j][0]:
                    # DISTANCE
                    rotation_matrix = np.array([[np.cos(state[i][5]), -np.sin(state[i][5])],
                                               [np.sin(state[i][5]), np.cos(state[i][5])]])
                    sigma_matrix = np.matmul(np.matmul(rotation_matrix, dimensions_matrix),
                                             rotation_matrix.transpose())
                    cartesian_dist = np.array(state[j][4]) - np.array(state[i][4])
                    d_ij = np.matmul(np.matmul(cartesian_dist.transpose(), inv(sigma_matrix)),
                                     cartesian_dist)
                    d_ij = 1/np.sqrt(d_ij)

                    # BEARING
                    coord_j = np.array(state[j][4])
                    coord_i = np.array(state[i][4])
                    py = coord_i[1] - coord_j[1]
                    px = coord_i[0] - coord_j[1]
                    chi_ij = np.arctan(py/px) - state[j][5]

                    # PRIORITY
                    # IN STALLO PER ORA

                    edges_type[(i,j)] = 'same_lane'
                    
                    edges[(i,j)] = (d_ij, chi_ij)
                    
    return edges, edges_type

def compute_rp(edges):
    d = np.inf
    for i in range(len(list(edges.values()))):
        d_i = 1/list(edges.values())[i][0]
        if d_i < d:
            d = d_i
    
    return -1/d

In [7]:
class Graph:
    def __init__(self, nodes_list, edges_list):
        self.nodes = [i for i in range(len(nodes_list))]
        self.edges = []
        for e in edges_list:
            self.edges = self.edges + [(nodes_list.index(e[0]), nodes_list.index(e[1]))]
        
        self.edata = {}
        self.ndata = {}
        self.sparse_adj = torch.zeros([2,len(self.edges)], dtype=torch.int64, device=device)
        for k in range(len(self.edges)):
            self.sparse_adj[0][k] = self.edges[k][0]
            self.sparse_adj[1][k] = self.edges[k][1]
        
    def num_nodes(self):
        return len(self.nodes)
    
    def num_edges(self):
        return len(self.edges)
    
    def insert_node_features(self, nodes_feat):
        self.ndata['x'] = nodes_feat
        
    def insert_edge_features(self, edges_feat, edges_types): 
        self.edata['x'] = edges_feat
        self.edata['type'] = list(edges_types.values())

In [8]:
class RGCNLayer(nn.Module):
    def __init__(self, in_feat, out_feat, num_rels):
        super(RGCNLayer, self).__init__()
        self.in_feat = in_feat # encoded_nodes_features_dim + encoded_edges_features_dim
        self.out_feat = out_feat # encoded_nodes_features_dim
        self.num_rels = num_rels # 2 per ora
        
        # weight tensors
        self.weight = nn.Parameter(torch.Tensor(self.num_rels, self.out_feat,
                                                self.in_feat))
        self.weight_0 = nn.Parameter(torch.Tensor(self.out_feat, self.out_feat))
            
        # initialize trainable parameters
        nn.init.xavier_uniform_(self.weight,
                                gain=nn.init.calculate_gain('relu'))
        nn.init.xavier_uniform_(self.weight_0,
                                gain=nn.init.calculate_gain('relu'))
            
    def forward(self, g):
        
        weight = self.weight
        
        enh_adj = {}
        for i in range(g.num_nodes()):
            for j in range(g.num_nodes()):
                if (j,i) in g.edges:
                    enh_adj[(i,j)] = torch.cat((g.ndata['x'][j], g.edata['x'][g.edges.index((j,i))]))
                else:
                    enh_adj[(i,j)] = torch.zeros([1,], device=device)
        
        types = ('same_lane', 'crossing')
        out = torch.zeros([g.num_nodes(),64], device=device)
        for i in range(g.num_nodes()):
            message = torch.zeros([64,], device=device)
            for r in types:
                max_value = -np.inf*torch.ones([64,], device=device)
                for j in range(g.num_nodes()):
                    if torch.sum(enh_adj[(i,j)]) != 0:
                        if g.edata['type'][g.edges.index((j,i))] == r:
                            temp = torch.matmul(weight[types.index(r)], enh_adj[(i,j)])
                            max_value = torch.maximum(max_value, temp)
                if torch.sum(max_value) != -np.inf:
                    message = message + max_value
            out[i] = message + torch.matmul(self.weight_0, g.ndata['x'][i])
        
        return out

In [9]:
import copy
import numpy as np

class Actor(nn.Module):
    def __init__(self, node_dim=4, edge_dim=2, action_dim=1, max_action=5):
        super(Actor, self).__init__()
        
        # node encoder
        self.n_enc = nn.Linear(node_dim, 64)
        # edge encoder
        self.e_enc = nn.Linear(edge_dim, 32)
        
        # first RGCN layer
        self.RGCN1 = RGCNLayer(96, 64, 2)
        # GAT layer
        self.GAT = gnn.GATConv(64, 64, add_self_loops=False, edge_dim=32)
        # second RGCN layer
        self.RGCN2 = RGCNLayer(96, 64, 2)
        
        # node decoder
        self.n_dec = nn.Linear(64, action_dim)
        
        self.max_action = max_action
        
        self.to(device)
        
    def forward(self, nodes, edges, edges_type):
        n_feat = list(nodes.values())
        n_feat = torch.as_tensor(n_feat, dtype=torch.float32, device=device)
        e_feat = list(edges.values())
        e_feat = torch.as_tensor(e_feat, dtype=torch.float32, device=device)
        
        # node encoding
        if n_feat.size()[0] == 0:
            n = n_feat
        else:
            n = self.n_enc(n_feat) # n should be num_nodes*64
            n = F.relu(n)
        
        # edge encoding
        if e_feat.size()[0] == 0:
            e = e_feat
        else:
            e = self.e_enc(e_feat) # e should be num_edges*32
            e = F.relu(e)
        
        # graph embedding
        g = Graph(list(nodes.keys()), list(edges.keys()))
        g.insert_node_features(n)
        g.insert_edge_features(e, edges_type)
        
        # first RGCN layer
        h = self.RGCN1(g)
        h = F.relu(h)
        
        # GAT layer
        h = self.GAT(h, g.sparse_adj, e)
        h = F.relu(h)
        
        # graph embedding
        g = Graph(list(nodes.keys()), list(edges.keys()))
        g.insert_node_features(h)
        g.insert_edge_features(e, edges_type)
        
        # second RGCN layer
        h = self.RGCN2(g)
        h = F.relu(h)
        
        # decoding
        out = self.n_dec(h)
        out = self.max_action*torch.tanh(out)
        
        return out
    
class Critic(nn.Module):
    def __init__(self, node_dim=4, edge_dim=2, action_dim=1, max_action=5, aggr_func='mean'):
        super(Critic, self).__init__()
        
        # node encoder
        self.n_enc = nn.Linear(node_dim+action_dim, 64)
        # edge encoder
        self.e_enc = nn.Linear(edge_dim, 32)
        
        # first RGCN layer
        self.RGCN1 = RGCNLayer(96, 64, 2)
        # GAT layer
        self.GAT = gnn.GATConv(64, 64, add_self_loops=False, edge_dim=32)
        # second RGCN layer
        self.RGCN2 = RGCNLayer(96, 64, 2)
        
        # node decoder
        self.n_dec = nn.Linear(64, 1)
        
        self.max_action = max_action
        self.aggr_func = aggr_func
        
        self.to(device)
        
    def forward(self, nodes, edges, edges_type, actions):
        n_feat = list(nodes.values())
        n_feat = torch.as_tensor(n_feat, dtype=torch.float32, device=device)
        e_feat = list(edges.values())
        e_feat = torch.as_tensor(e_feat, dtype=torch.float32, device=device)
        
        # node encoding
        if n_feat.size()[0] == 0:
            n = n_feat
        else:
            n = torch.cat((n_feat,actions), 1)
            n = self.n_enc(n) # n should be num_nodes*64
            n = F.relu(n)
        
        # edge encoding
        if e_feat.size()[0] == 0:
            e = e_feat
        else:
            e = self.e_enc(e_feat) # e should be num_edges*32
            e = F.relu(e)
        
        # graph embedding
        g = Graph(list(nodes.keys()), list(edges.keys()))
        g.insert_node_features(n)
        g.insert_edge_features(e, edges_type)
        
        # first RGCN layer
        h = self.RGCN1(g)
        h = F.relu(h)
        
        # GAT layer
        h = self.GAT(h, g.sparse_adj, e)
        h = F.relu(h)

        # graph embedding
        g = Graph(list(nodes.keys()), list(edges.keys()))
        g.insert_node_features(h)
        g.insert_edge_features(e, edges_type)
        
        # second RGCN layer
        h = self.RGCN2(g)
        h = F.relu(h)

        # decoding
        if self.aggr_func == 'mean':
            if g.num_nodes() > 0:
                h = torch.sum(h, dim=0)/g.num_nodes()
            else:
                h = torch.sum(h, dim=0)
        out = self.n_dec(h)
        
        return out

In [10]:
import numpy as np

class ReplayBuffer:
    def __init__(self, size):
        
        self.size = size
        self.buffer = []
        self.index = 0
        self.length = 0
        
    def add(self, nodes, edges, edges_type, action, reward, nodes_, edges_, edges_type_, done):
        
        data = (nodes, edges, edges_type, action, reward, nodes_, edges_, edges_type_, done)
        
        if self.index >= len(self.buffer):
            self.buffer.append(data)
        else:
            self.buffer[self.index] = data
            
        self.index = (self.index + 1) % self.size
        
        self.length = min(self.length + 1, self.size)
        
    def sample(self, batch_size, n_steps=1):
        
        samples = {'weights': np.ones(shape=batch_size, dtype=np.float32),
                   'indexes': np.random.choice(self.length - n_steps + 1, batch_size, replace=False)}
        
        sample_data = []
        if n_steps == 1:
            for i in samples['indexes']:
                data_i = self.buffer[i]
                sample_data.append(data_i)
        else:
            for i in samples['indexes']:
                data_i = self.buffer[i: i + n_steps]
                sample_data.append(data_i)
                
        return samples, sample_data

In [11]:
import collections
import copy

class AIM():
    
    def __init__(self, actor_model, actor_optimizer, critic_model_1, critic_optimizer_1,
                 critic_model_2, critic_optimizer_2, explore_noise, warmup, replay_buffer,
                 batch_size, update_interval, update_interval_actor, target_update_interval,
                 soft_update_tau, n_steps, gamma, model_name, evaluate):
        
        self.actor_model = actor_model
        self.actor_optimizer = actor_optimizer
        self.critic_model_1 = critic_model_1
        self.critic_optimizer_1 = critic_optimizer_1
        self.critic_model_2 = critic_model_2
        self.critic_optimizer_2 = critic_optimizer_2
        
        self.explore_noise = explore_noise
        self.warmup = warmup
        self.replay_buffer = replay_buffer
        self.batch_size = batch_size
        self.update_interval = update_interval
        self.update_interval_actor = update_interval_actor
        self.target_update_interval = target_update_interval
        self.soft_update_tau = soft_update_tau
        self.n_steps = n_steps
        self.gamma = gamma
        self.model_name = model_name
        
        self.actor_model_target = copy.deepcopy(self.actor_model)
        self.critic_model_target_1 = copy.deepcopy(self.critic_model_1)
        self.critic_model_target_2 = copy.deepcopy(self.critic_model_2)
        
        self.time_counter = 0
        
        self.loss_record = collections.deque(maxlen=100)
        
        self.device = device
        self.eval = evaluate
        
    def store_transition(self, nodes, edges, edges_type, action, reward, nodes_, edges_, edges_type_, done):
        
        self.replay_buffer.add(nodes, edges, edges_type, action, reward, nodes_, edges_, edges_type_, done)
        
    def sample_memory(self):
        
        samples, data_sample = self.replay_buffer.sample(self.batch_size, self.n_steps)
        
        return samples, data_sample
    
    def choose_action(self, nodes, edges, edges_type):
        
        if self.time_counter < self.warmup:
            action = np.random.normal(scale=2,
                                      size=(len(list(nodes.keys())),1))
            action = torch.as_tensor(action, dtype=torch.float32).to(self.device)
        else:
            action = self.actor_model.forward(nodes, edges, edges_type)
            if not self.eval:
                noise = torch.as_tensor(np.random.normal(scale=self.explore_noise)).to(self.device)
                action = action + noise
   
        action = torch.clamp(action, -self.actor_model.max_action, self.actor_model.max_action)
        
        return action
    
    def test_action(self, nodes, edges, edges_type):
        
        action = self.actor_model.forward(nodes, edges, edges_type)
        
        return action
    
    def loss_process(self, loss, weight):
        
        #weight = torch.as_tensor(weight, dtype=torch.float32).to(self.device) in teoria non serve più
        #loss = torch.mean(loss*weight.detach()) in teoria non serve più
        
        return torch.mean(loss)
    
    def learn_onestep(self, info_batch, data_batch):
        def safe(el):
            return torch.as_tensor(el, dtype=torch.float32).detach()
        actor_loss = []
        critic_loss_1 = []
        critic_loss_2 = []
        self.critic_optimizer_1.zero_grad()
        self.critic_optimizer_2.zero_grad()
        
        # SANITY CHECK
        #tmp = [el.cpu().detach().numpy() for el in [self.critic_model_1.RGCN1.weight]]
        
        for elem in data_batch:
            nodes, edges, edges_type, action, reward, nodes_, edges_, edges_type_, done = elem
            action, reward, done = \
                [safe(el) for el in [action, reward, done]]
            action = torch.as_tensor(action, dtype=torch.float32).to(self.device)
            with torch.no_grad():
                action_target = self.actor_model_target.forward(nodes_, edges_, edges_type_)
                action_target = action_target + \
                                torch.clamp(torch.as_tensor(np.random.normal(scale=0.1)), -0.5, 0.5)
                action_target = torch.clamp(action_target,
                                            -self.actor_model.max_action,
                                            self.actor_model.max_action)

                q1_next = self.critic_model_target_1.forward(nodes_, edges_, edges_type_, action_target)
                q2_next = self.critic_model_target_2.forward(nodes_, edges_, edges_type_, action_target)
                critic_value_next = torch.min(q1_next, q2_next)

                critic_target = reward + self.gamma * critic_value_next * (1 - done)

            q1 = self.critic_model_1.forward(nodes, edges, edges_type, action)
            #q1 = q1.detach() in teoria non serve più
            q2 = self.critic_model_2.forward(nodes, edges, edges_type, action)
            #q2 = q2.detach() in teoria non serve più

            q1_loss = F.smooth_l1_loss(critic_target, q1)
            q2_loss = F.smooth_l1_loss(critic_target, q2)
            critic_loss_1.append(q1_loss)
            critic_loss_2.append(q2_loss)
            
        critic_loss_e_1 = torch.stack(critic_loss_1)
        critic_loss_e_2 = torch.stack(critic_loss_2)
        critic_loss_total_1 = self.loss_process(critic_loss_e_1, info_batch['weights'])
        critic_loss_total_2 = self.loss_process(critic_loss_e_2, info_batch['weights'])
        
        (critic_loss_total_1 + critic_loss_total_2).backward(retain_graph=True)
        self.critic_optimizer_1.step()
        self.critic_optimizer_2.step()
        
        # SANITY CHECK
        #diff = np.mean([((t1-t2) ** 2).mean() for t1, t2 in zip(tmp, [el.cpu().detach().numpy() \
        #                            for el in [self.critic_model_1.RGCN1.weight]])])
        #print(f"diff : {diff}")
        #print(self.critic_model_1.RGCN1.weight)
    
        if self.time_counter % self.update_interval_actor != 0:
            return
        
        for elem in data_batch:
            nodes, edges, edges_type, action, reward, nodes_, edges_, edges_type_, done = elem
            
            mu = self.actor_model.forward(nodes, edges, edges_type)
            actor_loss_sample = -1 * self.critic_model_1.forward(nodes, edges, edges_type, mu)
            actor_loss_s = actor_loss_sample.mean()
            actor_loss.append(actor_loss_s)
            
        actor_loss_e = torch.stack(actor_loss)
        actor_loss_total = self.loss_process(actor_loss_e, info_batch['weights'])
        self.actor_optimizer.zero_grad()
        actor_loss_total.backward(retain_graph=True)
        self.actor_optimizer.step()
        
        self.loss_record.append(float((critic_loss_total_1 +
                                       critic_loss_total_2 +
                                       actor_loss_total).detach().cpu().numpy()))
        
    def synchronize_target(self):

        assert 0.0 < self.soft_update_tau <= 1.0

        for target_param, source_param in zip(self.critic_model_target_1.parameters(),
                                              self.critic_model_1.parameters()):
            target_param.data.copy_((1 - self.soft_update_tau) *
                                target_param.data + self.soft_update_tau * source_param.data)

        for target_param, source_param in zip(self.critic_model_target_2.parameters(),
                                              self.critic_model_2.parameters()):
            target_param.data.copy_((1 - self.soft_update_tau) *
                                target_param.data + self.soft_update_tau * source_param.data)

        for target_param, source_param in zip(self.actor_model_target.parameters(),
                                              self.actor_model.parameters()):
            target_param.data.copy_((1 - self.soft_update_tau) *
                                target_param.data + self.soft_update_tau * source_param.data)

    def learn(self):

        if self.time_counter <= self.warmup or \
            (self.time_counter % self.update_interval != 0):
            self.time_counter += 1
            return

        samples, data_sample = self.sample_memory()

        if self.n_steps == 1: # FOR THE MOMENT ALWAYS THE CASE FOR US
            self.learn_onestep(samples, data_sample)

        if self.time_counter % self.target_update_interval == 0:
            self.synchronize_target()

        self.time_counter += 1

    def get_statistics(self):

        loss_statistics = np.mean(self.loss_record) if self.loss_record else np.nan
        return [loss_statistics]

    def save_model(self, save_path):
        """
           <Model saving function>
           Used to save the trained model
        """
        save_path_actor = save_path + "/" + self.model_name + "_actor" + ".pt"
        save_path_critic_1 = save_path + "/" + self.model_name + "_critic_1" + ".pt"
        save_path_critic_2 = save_path + "/" + self.model_name + "_critic_2" + ".pt"
        torch.save(self.actor_model, save_path_actor)
        torch.save(self.critic_model_1, save_path_critic_1)
        torch.save(self.critic_model_2, save_path_critic_2)

    def load_model(self, load_path):
        """
           <model reading function>
           Used to read the trained model
        """
        load_path_actor = load_path + "/" + self.model_name + "_actor" + ".pt"
        load_path_critic_1 = load_path + "/" + self.model_name + "_critic_1" + ".pt"
        load_path_critic_2 = load_path + "/" + self.model_name + "_critic_2" + ".pt"
        self.actor_model = torch.load(load_path_actor)
        self.critic_model_1 = torch.load(load_path_critic_1)
        self.critic_model_2 = torch.load(load_path_critic_2)

In [12]:
from flow.utils.registry import make_create_env
from datetime import datetime
import logging
import time
import numpy as np

def evaluate(aim):
    flow_params = dict(
    exp_tag='test_network',
    env_name=myEnv,
    network=IntersectionNetwork,
    simulator='traci',
    sim=sim_params,
    env=env_params,
    net=net_params,
    veh=vehicles,
    initial=initial_config,
    )

    # number of time steps
    flow_params['env'].horizon = 1000

    # Get the env name and a creator for the environment.
    create_env, _ = make_create_env(flow_params)

    # Create the environment.
    env = create_env()
    num_steps = env.env_params.horizon
    
    ret = 0
    state = env.reset()
    
    veh_ids = env.k.vehicle.get_ids()
    edges, edges_type = compute_edges(env, state)
    nodes = {}
    for node in list(state.keys()):
        nodes[node] = state[node][:4]
    aim.eval = True
    eval_steps = 0
    
    for j in range(num_steps):
        
        actions = aim.choose_action(nodes, edges, edges_type)
        
        state_, reward, done, _ = env.step(rl_actions=actions.cpu().detach().numpy())

        veh_ids = env.k.vehicle.get_ids()
        edges_, edges_type_ = compute_edges(env, state_)
        nodes_ = {}
        for node in list(state_.keys()):
            nodes_[node] = state_[node][:4]
        
        proximity_reward = compute_rp(edges)
        w_p = 0.2
        reward += proximity_reward*w_p
        
        nodes = nodes_
        edges = edges_
        edges_type = edges_type_
        
        ret += reward
        eval_steps += 1
        
        if done:
            #print('CRASH')
            break
    
    # Store the information from the run in info_dict.
    outflow = env.k.vehicle.get_outflow_rate(int(eval_steps))

    print("Return: {0}".format(ret/eval_steps))
    print("Duration of the episode: {0}".format(eval_steps))
    print("Outflow: {0}".format(outflow))
    print('-----------------------')
    env.terminate()

In [13]:
from flow.utils.registry import make_create_env
from datetime import datetime
import logging
import time
import numpy as np

flow_params = dict(
    exp_tag='test_network',
    env_name=myEnv,
    network=IntersectionNetwork,
    simulator='traci',
    sim=sim_params,
    env=env_params,
    net=net_params,
    veh=vehicles,
    initial=initial_config,
)

# number of time steps
flow_params['env'].horizon = 1000

# Get the env name and a creator for the environment.
create_env, _ = make_create_env(flow_params)

# Create the environment.
env = create_env()

logging.info(" Starting experiment {} at {}".format(
    env.network.name, str(datetime.utcnow())))

logging.info("Initializing environment.")

finished = False

num_steps = env.env_params.horizon

# raise an error if convert_to_csv is set to True but no emission
# file will be generated, to avoid getting an error at the end of the
# simulation
convert_to_csv = False
if convert_to_csv and env.sim_params.emission_path is None:
    raise ValueError(
        'The experiment was run with convert_to_csv set '
        'to True, but no emission file will be generated. If you wish '
        'to generate an emission file, you should set the parameter '
        'emission_path in the simulation parameters (SumoParams or '
        'AimsunParams) to the path of the folder where emissions '
        'output should be generated. If you do not wish to generate '
        'emissions, set the convert_to_csv parameter to False.')

# used to store
outflows = []
returns = []

# time profiling information
t = time.time()
times = []

# RL agent initialization - inizio
actor = Actor()
critic_1 = Critic()
critic_2 = Critic()

lr = 3e-4
actor_optimizer = torch.optim.Adam(actor.parameters(), lr=lr)
critic_optimizer_1 = torch.optim.Adam(critic_1.parameters(), lr=lr)
critic_optimizer_2 = torch.optim.Adam(critic_2.parameters(), lr=lr)

explore_noise = 0.1

replay_buffer = ReplayBuffer(size=10**6)

gamma = 0.99

warmup = 25000
#warmup = 25000 # now
# RL agent initialization - fine

aim = AIM(actor,
          actor_optimizer,
          critic_1,
          critic_optimizer_1,
          critic_2,
          critic_optimizer_2,
          explore_noise,
          warmup,
          replay_buffer,
          # batch_size=32, before
          batch_size=256, # now
          #update_interval=100,
          update_interval=1, # now
          #update_interval_actor=500, before
          update_interval_actor=2, # now
          #target_update_interval=5000, before
          target_update_interval=2, # now
          soft_update_tau=0.005,
          n_steps=1,
          gamma=gamma,
          model_name='AIM_model',
          evaluate=False)

st = 0

while not finished:
    ep_steps = 0
    ret = 0
    state = env.reset()
    
    veh_ids = env.k.vehicle.get_ids()
    edges, edges_type = compute_edges(env, state)
    nodes = {}
    for node in list(state.keys()):
        nodes[node] = state[node][:4]
    
    for j in range(num_steps):
        
        actions = aim.choose_action(nodes, edges, edges_type)

        t0 = time.time()
        
        state_, reward, done, _ = env.step(rl_actions=actions.cpu().detach().numpy())

        veh_ids = env.k.vehicle.get_ids()
        edges_, edges_type_ = compute_edges(env, state_)
        nodes_ = {}
        for node in list(state_.keys()):
            nodes_[node] = state_[node][:4]
        
        Rp = compute_rp(edges)
        w_p = 0.2
        reward += Rp*w_p

        if nodes != {}:
            aim.store_transition(nodes, edges, edges_type, actions, reward, nodes_, edges_, edges_type_, done)
        aim.learn()
        
        nodes = nodes_
        edges = edges_
        edges_type = edges_type_
        
        t1 = time.time()
        times.append(1 / (t1 - t0))
        
        ret += reward
        st += 1
        ep_steps += 1
        
        if done:
            #print('CRASH')
            break
        if st == 500000:
            finished = True
            break
        if st % 5000 == 0:
            print('EVALUATION RUN')
            evaluate(aim)
            print('END EVALUATION')
            break
    
    # Store the information from the run in info_dict.
    outflow = env.k.vehicle.get_outflow_rate(int(ep_steps))
    outflows.append(outflow)
    returns.append(ret)

    print("Weighted return: {0}".format(ret/ep_steps))
    print("Duration of the episode: {0}".format(ep_steps))
    print("Outflow: {0}".format(outflow))
    print('-----------------------')
    
    aim.save_model('../TrainedModels/TD3')

    # Save emission data at the end of every rollout. This is skipped
    # by the internal method if no emission path was specified.
    if env.simulator == "traci":
        env.k.simulation.save_emission(run_id=i)

# Print the averages/std for all variables in the info_dict.
print("Total time:", time.time() - t)
print("steps/second:", np.mean(times))
env.terminate()

Traceback (most recent call last):
  File "/home/matteo/.local/bin/sumo-gui", line 8, in <module>
    sys.exit(sumo_gui())
  File "/home/matteo/.local/lib/python3.8/site-packages/sumo/__init__.py", line 29, in <lambda>
    return lambda: sys.exit(subprocess.call([os.path.join(SUMO_HOME, 'bin', app)] + sys.argv[1:], env=ENV))
  File "/usr/lib/python3.8/subprocess.py", line 342, in call
    return p.wait(timeout=timeout)
  File "/usr/lib/python3.8/subprocess.py", line 1083, in wait
    return self._wait(timeout=timeout)
  File "/usr/lib/python3.8/subprocess.py", line 1806, in _wait
    (pid, sts) = self._try_wait(0)
  File "/usr/lib/python3.8/subprocess.py", line 1764, in _try_wait
    (pid, sts) = os.waitpid(self.pid, wait_flags)
KeyboardInterrupt


KeyboardInterrupt: 