In [1]:
import os
import sys
import time
import optparse
import random
import numpy as np
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import matplotlib.pyplot as plt


from sumolib import checkBinary  
import traci 

def tally_vehicles_in_lanes(road_lanes):
    lane_vehicle_count = {}
    for lane_id in road_lanes:
        lane_vehicle_count[lane_id] = sum(
            1 for veh_id in traci.lane.getLastStepVehicleIDs(lane_id)
            if traci.vehicle.getLanePosition(veh_id) > 10
        )
    return lane_vehicle_count

def compute_total_waiting_time(road_lanes):
    return sum(traci.lane.getWaitingTime(single_lane) for single_lane in road_lanes)

def configure_trafficlight_phase(junction_id, phase_duration, phase_config):
    traci.trafficlight.setRedYellowGreenState(junction_id, phase_config)
    traci.trafficlight.setPhaseDuration(junction_id, phase_duration)




class NeuralNetwork(nn.Module):
    def __init__(self, learning_rate, input_dimensions, layer1_dimensions, layer2_dimensions, action_count):
        super(NeuralNetwork, self).__init__()
        self.learning_rate = learning_rate
        self.input_dimensions = input_dimensions
        self.layer1_dimensions = layer1_dimensions
        self.layer2_dimensions = layer2_dimensions
        self.action_count = action_count

        self.layer1 = nn.Linear(self.input_dimensions, self.layer1_dimensions)
        self.layer2 = nn.Linear(self.layer1_dimensions, self.layer2_dimensions)
        self.output_layer = nn.Linear(self.layer2_dimensions, self.action_count)

        self.optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
        self.loss_function = nn.MSELoss()
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(self.device)

    def forward(self, input_state):
        layer1_output = F.relu(self.layer1(input_state))
        layer2_output = F.relu(self.layer2(layer1_output))
        action_values = self.output_layer(layer2_output)
        return action_values


class RLAgent:
    def __init__(
        self,
        discount_factor,
        exploration_rate,
        learning_rate,
        input_dimensions,
        layer1_dimensions,
        layer2_dimensions,
        batch_size,
        action_count,
        junctions,
        max_memory_size=100000,
        exploration_rate_decay=5e-4,
        exploration_rate_end=0.05,
    ):
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.input_dimensions = input_dimensions
        self.layer1_dimensions = layer1_dimensions
        self.layer2_dimensions = layer2_dimensions
        self.action_count = action_count
        self.action_space = [i for i in range(action_count)]
        self.junctions = junctions
        self.max_memory = max_memory_size
        self.exploration_rate_decay = exploration_rate_decay
        self.exploration_rate_end = exploration_rate_end
        self.memory_counter = 0
        self.iteration_counter = 0
        self.target_replace = 100

        self.eval_net = NeuralNetwork(
            self.learning_rate, self.input_dimensions, self.layer1_dimensions, self.layer2_dimensions, self.action_count
        )
        self.memory = dict()
        for junction in junctions:
            self.memory[junction] = {
                "state_memory": np.zeros(
                    (self.max_memory, self.input_dimensions), dtype=np.float32
                ),
                "new_state_memory": np.zeros(
                    (self.max_memory, self.input_dimensions), dtype=np.float32
                ),
                "reward_memory": np.zeros(self.max_memory, dtype=np.float32),
                "action_memory": np.zeros(self.max_memory, dtype=np.int32),
                "terminal_memory": np.zeros(self.max_memory, dtype=bool),
                "memory_counter": 0,
                "iteration_counter": 0,
            }

    def store_transition(self, state, new_state, action, reward, done, junction):
        index = self.memory[junction]["memory_counter"] % self.max_memory
        self.memory[junction]["state_memory"][index] = state
        self.memory[junction]["new_state_memory"][index] = new_state
        self.memory[junction]['reward_memory'][index] = reward
        self.memory[junction]['terminal_memory'][index] = done
        self.memory[junction]["action_memory"][index] = action
        self.memory[junction]["memory_counter"] += 1

    def select_action(self, observation):
        state = torch.tensor([observation], dtype=torch.float).to(self.eval_net.device)
        if np.random.random() > self.exploration_rate:
            actions = self.eval_net.forward(state)
            action = torch.argmax(actions).item()
        else:
            action = np.random.choice(self.action_space)
        return action

    def reset(self, junction_numbers):
        for junction_number in junction_numbers:
            self.memory[junction_number]['memory_counter'] = 0

    def save(self, model_name):
        torch.save(self.eval_net.state_dict(), f'trained_model/{model_name}.bin')

    def learn(self, junction):
        self.eval_net.optimizer.zero_grad()

        batch = np.arange(self.memory[junction]['memory_counter'], dtype=np.int32)

        state_batch = torch.tensor(self.memory[junction]["state_memory"][batch]).to(
            self.eval_net.device
        )
        new_state_batch = torch.tensor(
            self.memory[junction]["new_state_memory"][batch]
        ).to(self.eval_net.device)
        reward_batch = torch.tensor(
            self.memory[junction]['reward_memory'][batch]).to(self.eval_net.device)
        terminal_batch = torch.tensor(self.memory[junction]['terminal_memory'][batch]).to(self.eval_net.device)
        action_batch = self.memory[junction]["action_memory"][batch]

        q_eval = self.eval_net.forward(state_batch)[batch, action_batch]
        q_next = self.eval_net.forward(new_state_batch)
        q_next[terminal_batch] = 0.0
        q_target = reward_batch + self.discount_factor * torch.max(q_next, dim=1)[0]
        loss_function = self.eval_net.loss_function(q_target, q_eval).to(self.eval_net.device)

        loss_function.backward()
        self.eval_net.optimizer.step()

        self.iteration_counter += 1
        self.exploration_rate = (
            self.exploration_rate - self.exploration_rate_decay
            if self.exploration_rate > self.exploration_rate_end
            else self.exploration_rate_end
        )



def run_simulation(train=True, model_name="model", epochs=50, steps=500):
    
    epochs = epochs
    steps = steps
    best_time = np.inf
    total_time_list = list()
    
    traci.start(
        [checkBinary("sumo"), "-c", "configuration.sumocfg", "--tripinfo-output", "network/tripinfo.xml"]
    )
    
    all_junctions = traci.trafficlight.getIDList()
    junction_numbers = list(range(len(all_junctions)))

    rl_agent = RLAgent(
        discount_factor=0.99,
        exploration_rate=0.0,
        learning_rate=0.1,
        input_dimensions=4,
        layer1_dimensions=256,
        layer2_dimensions=256,
        batch_size=1024,
        action_count=4,
        junctions=junction_numbers,
    )

    if not train:
        rl_agent.eval_net.load_state_dict(torch.load(f'trained_model/{model_name}.bin', map_location=rl_agent.eval_net.device))

    
    traci.close()

    for e in range(epochs):
        if train:
            traci.start(
            [checkBinary("sumo"), "-c", "configuration.sumocfg", "--tripinfo-output", "tripinfo.xml"]
            )
        else:
            traci.start(
            [checkBinary("sumo-gui"), "-c", "configuration.sumocfg", "--tripinfo-output", "tripinfo.xml"]
            )

        print(f"Simulating epoch number: {e}")
#         select_lane = [
#             ["yyyrrrrrrrrr", "GGGrrrrrrrrr"],
#             ["rrryyyrrrrrr", "rrrGGGrrrrrr"],
#             ["rrrrrryyyrrr", "rrrrrrGGGrrr"],
#             ["rrrrrrrrryyy", "rrrrrrrrrGGG"],
#         ]

        select_lane = [
            ["yyyyrrrrrrrrrrrr", "GGGGrrrrrrrrrrrr"],
            ["rrrryyyyrrrrrrrr", "rrrrGGGGrrrrrrrr"],
            ["rrrrrrrryyyyrrrr", "rrrrrrrrGGGGrrrr"],
            ["rrrrrrrrrrrryyyy", "rrrrrrrrrrrrGGGG"],
        ]

        step = 0
        total_time = 0
        min_duration = 5
        
        traffic_lights_time = dict()
        prev_wait_time = dict()
        prev_vehicles_per_lane = dict()
        prev_action = dict()
        all_lanes = list()
        
        for junction_number, junction in enumerate(all_junctions):
            prev_wait_time[junction] = 0
            prev_action[junction_number] = 0
            traffic_lights_time[junction] = 0
            prev_vehicles_per_lane[junction_number] = [0] * 4
            # prev_vehicles_per_lane[junction_number] = [0] * (len(all_junctions) * 4) 
            all_lanes.extend(list(traci.trafficlight.getControlledLanes(junction)))

        while step <= steps:
            traci.simulationStep()
            for junction_number, junction in enumerate(all_junctions):
                controled_lanes = traci.trafficlight.getControlledLanes(junction)
                waiting_time = compute_total_waiting_time(controled_lanes)
                total_time += waiting_time
                if traffic_lights_time[junction] == 0:
                    vehicles_per_lane = tally_vehicles_in_lanes(controled_lanes)
                    # vehicles_per_lane = get_vehicle_numbers(all_lanes)

                    #storing previous state and current state
                    reward = -1 *  waiting_time
                    state_ = list(vehicles_per_lane.values()) 
                    state = prev_vehicles_per_lane[junction_number]
                    prev_vehicles_per_lane[junction_number] = state_
                    rl_agent.store_transition(state, state_, prev_action[junction_number],reward,(step==steps),junction_number)

                    #selecting new action based on current state
                    lane = rl_agent.select_action(state_)
                    prev_action[junction_number] = lane
                    configure_trafficlight_phase(junction, 6, select_lane[lane][0])
                    configure_trafficlight_phase(junction, min_duration + 10, select_lane[lane][1])

                    

                    traffic_lights_time[junction] = min_duration + 10
                    if train:
                        rl_agent.learn(junction_number)
                else:
                    traffic_lights_time[junction] -= 1
            step += 1
        print("Total time every cars waited at junction for: ",total_time)
        total_time_list.append(total_time)

        if total_time < best_time:
            best_time = total_time
            if train:
                rl_agent.save(model_name)

        traci.close()
        sys.stdout.flush()
        if not train:
            break
    if train:
        plt.title("Waiting time of cars per lane vs Episodes")
        plt.plot(list(range(len(total_time_list))),total_time_list, linestyle='--', color='red', marker='o')
        plt.xlabel("Episode")
        plt.ylabel("Cars Waiting Time at each lane")
        plt.grid(True)
        plt.show()
        


if __name__ == "__main__":
    model_name = 'model'
    training = True
    num_epochs = 200
    num_steps = 500
    run_simulation(train=training, model_name=model_name, epochs=num_epochs, steps=num_steps)


FatalTraCIError: connection closed by SUMO