<a href="https://colab.research.google.com/github/nyculescu/phd_speed_harmo_v3/blob/simplified_project_ipynb/SpeedHarmoDRL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!python main.py

# Intro

# Traffic Flow Optimization Using Reinforcement Learning
This notebook demonstrates how to use SUMO for traffic simulation and Stable Baselines 3 (SB3) for training a DQN agent to optimize Variable Speed Limits (VSL) in a Joint Lane Merge scenario.

# Setup

In [3]:
!pip install stable-baselines3

# Import necessary libraries
import os
import sys
import logging
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnNoModelImprovement, CheckpointCallback
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
from gymnasium.wrappers import TimeLimit

# Configure logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

# Set SUMO_HOME environment variable
if 'SUMO_HOME' not in os.environ:
    raise EnvironmentError("Please set the SUMO_HOME environment variable.")
tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
sys.path.append(tools)

# Paths for output files
output_dir = "./traffic_environment/sumo"
os.makedirs(output_dir, exist_ok=True)


Collecting stable-baselines3
  Downloading stable_baselines3-2.4.0-py3-none-any.whl.metadata (4.5 kB)
Collecting gymnasium<1.1.0,>=0.29.1 (from stable-baselines3)
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium<1.1.0,>=0.29.1->stable-baselines3)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading stable_baselines3-2.4.0-py3-none-any.whl (183 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m55.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium, stable-baselines3
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0 stable-baselines3-2.

OSError: Please set the SUMO_HOME environment variable.

# SUMO Configuration

In [1]:
# Generate SUMO configuration files dynamically
def create_sumocfg(model):
    sumocfg_template = """<?xml version="1.0" encoding="UTF-8"?>
    <configuration xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://sumo.dlr.de/xsd/sumoConfiguration.xsd">
        <input>
            <net-file value="3_2_merge.net.xml"/>
            <route-files value="generated_flows_{model}_{index}.rou.xml"/>
            <additional-files value="loops_detectors.add.xml"/>
            <gui-settings-file value="colored.view.xml"/>
        </input>
    </configuration>
    """
    for i in range(num_envs_per_model):
        filename = f"3_2_merge_{model}_{i}.sumocfg"
        filepath = os.path.join(output_dir, filename)
        content = sumocfg_template.format(model=model, index=i)
        with open(filepath, 'w') as file:
            file.write(content)


# Traffic Simulation

In [None]:
# Visualize vehicle distributions (bimodal pattern)
def plot_vehicle_distributions():
    x_hours = np.arange(0, 24, 1)
    y_vehicles = bimodal_distribution_24h()
    plt.figure(figsize=(10, 6))
    plt.plot(x_hours, y_vehicles, marker='o')
    plt.title('Bimodal Distribution Over 24 Hours')
    plt.xlabel('Hour of the Day')
    plt.ylabel('Vehicle Density')
    plt.grid(True)
    plt.show()


# Reinforcement Learning Framework

In [None]:
# Define TrafficEnv class (simplified for clarity)
class TrafficEnv(gym.Env):
    def __init__(self, port, model_idx):
        super().__init__()
        self.speed_limits = np.arange(50, 135, 5)  # Discrete action space: [50 km/h to 130 km/h]
        self.action_space = gym.spaces.Discrete(len(self.speed_limits))
        self.observation_space = gym.spaces.Box(low=np.array([0]), high=np.array([130/3.6]), dtype=np.float64)

    def step(self, action):
        # Apply speed limit based on action
        self.speed_limit = self.speed_limits[action]
        # Simulate one step in SUMO and collect metrics (occupancy, speed, etc.)
        # ...
        return obs, reward, done, {}

    def reset(self):
        # Reset simulation environment
        return obs

# Instantiate training and evaluation environments
train_env = SubprocVecEnv([lambda: Monitor(TrafficEnv(port=8000 + i)) for i in range(num_train_envs_per_model)])
eval_env = SubprocVecEnv([lambda: Monitor(TrafficEnv(port=9000))])


# Rule-Based Benchmark

In [None]:
# Implement rule-based logic for VSL control
def rule_based_vsl(occupancy):
    if occupancy > 80:
        return 50  # Severe congestion
    elif occupancy > 40:
        return 80  # Moderate congestion
    else:
        return 130  # Free-flow conditions


# Training and Evaluation

In [None]:
# Train DQN model with callbacks for evaluation and checkpointing
model = DQN("MlpPolicy", train_env, verbose=1)
model.learn(total_timesteps=100000)

# Evaluate the trained model on test scenarios
obs = eval_env.reset()
done = False
while not done:
    action, _states = model.predict(obs)
    obs, reward, done, info = eval_env.step(action)


# Visualization

In [None]:
# Visualize results: rewards over time, emissions reduction, etc.
plt.plot(rewards)
plt.title('Reward Over Time')
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.show()
