In [6]:
!pip install stable-baselines3

Collecting stable-baselines3
  Using cached stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Using cached stable_baselines3-2.5.0-py3-none-any.whl (183 kB)
Installing collected packages: stable-baselines3
Successfully installed stable-baselines3-2.5.0


In [10]:
!pip3 install torch torchvision torchaudio



In [11]:
import numpy as np
import random
from gymnasium import Env
from gymnasium.spaces import Box, MultiDiscrete
from stable_baselines3 import PPO
import os

OSError: [WinError 127] The specified procedure could not be found. Error loading "C:\ProgramData\anaconda3\Lib\site-packages\torch\lib\shm.dll" or one of its dependencies.

In [3]:
class TrafficSimulation(Env):
    def __init__(self):
        super(TrafficSimulation, self).__init__()

        # Define the observation space as a Box space with densities
        self.observation_space = Box(low=0, high=np.inf, shape=(8,), dtype=np.float32)

        # Define the action space as a MultiDiscrete
        self.action_space = MultiDiscrete([
            7,  # com: 7 possible combinations (0 to 6, mapped to 1-7)
            36, 36, 36, 36  # side-timers: 4 values each ranging from 5 to 40 (5 + i where i is in range(0, 36))  min_density = max(np.min(observation), 0.1) 
        ])
        # Episode length and state initialization
        self.episode_length = 15
        self.current_step = 0

        import pynetlogo
        
        self.netlogo = pynetlogo.NetLogoLink(
            jvm_path=r"C:\Program Files\Java\jdk-19\bin\server\jvm.dll",
            gui=True,
            #netlogo_home=r'/home/srimal/Downloads/NetLogo-6.4.0-64'
            
        )

        self.netlogo.load_model(r'4Way-Junction-Traffic-Simulation-SriLanka.nlogo')
        self.netlogo.command('setup')  # Initialize NetLogo simulation

        # Initialize previous density standard deviation for reward calculation
        self.previous_density_stdev = 0  # Initial state with no densities

    def reset(self, seed=None, options=None):
        """
        Reset the environment state.
        """
        super().reset(seed=seed)  # Properly handle seeding
        
        # Initialize episode length and current step
        self.current_step = 0

        # Reset NetLogo simulation
        self.netlogo.command('setup')

        # Initialize observation with default densities as zeros since no initial densities
        observation = np.zeros(8, dtype=np.float32)  # All densities start at zero
        
        # Reset previous density standard deviation
        self.previous_density_stdev = 0

        return observation, {}

    def step(self, action):
        """
        Perform one step in the environment.
        """
        # Decode the action
        selected_combination = int(action[0]) + 1  # Value for 'com' (1 to 7)
        green_light_durations = [int(g + 5) for g in action[1:]]  # Values for 'side-timers' adjusted back to 5-40 range
    
        # Update NetLogo with the selected combination and timers
        self.netlogo.command(f'set routes-combinations "com{selected_combination}"')
        self.netlogo.command(f'set side1 {green_light_durations[0]}')
        self.netlogo.command(f'set side2 {green_light_durations[1]}')
        self.netlogo.command(f'set side3 {green_light_durations[2]}')
        self.netlogo.command(f'set side4 {green_light_durations[3]}')
    
        # Run the NetLogo model until the cycle completes
        self.netlogo.command('go-cycle')  # Use the go-cycle method to run a full cycle
    
        # Get the new observation (densities)
        observation = self.get_current_densities()
    
        # Check if all observations are 0 (potential deadlock)
        if np.all(observation == 0):
            print("Deadlock detected: All densities are 0. Ending episode early.")
            reward = -10  # Provide a significant negative reward for deadlock
            done = True  # End the episode
            truncated = True  # Indicate that it was truncated
            info = {"reason": "deadlock"}  # Provide additional info about the deadlock
            return observation, reward, done, truncated, info

        # Calculate the minimum density and set it as min-density in NetLogo
        min_density = max(np.min(observation), 0.8) 
        print(f"Setting min-density to {min_density}")
        self.netlogo.command(f'set min-density {min_density}')  # Update the min-density in NetLogo
    
        # Calculate reward
        new_density_stdev = np.std(observation)
        reward = 0
        if new_density_stdev < self.previous_density_stdev:
            reward = 1
        elif new_density_stdev > self.previous_density_stdev:
            reward = -1
    
        # Update the previous density standard deviation
        self.previous_density_stdev = new_density_stdev
    
        # Check if the episode is done
        self.current_step += 1
        done = self.current_step >= self.episode_length
    
        # No truncation in this simple example
        truncated = False
        info = {}
    
        return observation, reward, done, truncated, info


    def get_current_densities(self):
        """
        Get the current traffic densities from NetLogo using the `check_density()` function.
        """
        densities_dict = self.check_density()
        densities = np.array(list(densities_dict.values()))
        return densities

    def check_density(self):
        """
        Calculate and return traffic densities based on the current route combination.
        """
        current_com = self.netlogo.report('routes-combinations')

        # Define the routes combination dictionary
        routes_combination = {
            "com1": ['R12', 'R34', 'R56', 'R78', None],
            "com2": ['R12', 'R37', 'R48', 'R56', None],
            "com3": ['R14', 'R26', 'R37', 'R58', None],
            "com4": ['R14', 'R27', 'R36', 'R58', None],
            "com5": ['R15', 'R26', 'R34', 'R78', None],
            "com6": ['R15', 'R26', 'R37', 'R48', None],
            "com7": ['R15', 'R27', 'R36', 'R48', None]
        }
        
        # Define the routes dictionary
        routes = {
            'R12': ['S1', 'S2', None],
            'R14': ['S1', 'S4', None],
            'R15': ['S1', 'S5', None],
            'R26': ['S2', 'S6', None],
            'R27': ['S2', 'S7', None],
            'R34': ['S3', 'S4', None],
            'R36': ['S3', 'S6', None],
            'R37': ['S3', 'S7', None],
            'R48': ['S4', 'S8', None],
            'R56': ['S5', 'S6', None],
            'R58': ['S5', 'S8', None],
            'R78': ['S7', 'S8', None]
        }

        # Select the specific combination from the dictionary
        selected_combination_routes = routes_combination.get(current_com, [])
        
        # Remove None values from the list
        selected_combination_routes = [route for route in selected_combination_routes if route is not None]
        
        # Create a sides dictionary for the selected combination
        sides = {f'side{i+1}': route for i, route in enumerate(selected_combination_routes)}

        # Construct the modes dictionary using the routes dictionary
        modes = {}
        
        for side, route in sides.items():
            # Get sensors associated with the route from the routes dictionary
            sensors = routes.get(route, [])
            # Remove None values from sensors
            sensors = [sensor for sensor in sensors if sensor is not None]
            # Create the entry for the modes dictionary
            modes[side] = [None, {sensor: None for sensor in sensors}]

        # Initialize densities dictionary for all sensors
        result = {'S1': None, 'S2': None, 'S3': None, 'S4': None, 'S5': None, 'S6': None, 'S7': None, 'S8': None}

        def get_variable(s):
            # Get the current value of the side timer from NetLogo
            return int(self.netlogo.report(f'{s}'))

        def v_count(name):
            # Safely report the vehicle count for the given light
            result = self.netlogo.report(
                f'ifelse-value (any? lights with [name = "{name}"]) [ [cars-passed] of one-of lights with [name = "{name}"] ] [ 0 ]'
            )
            return int(result)

        # Calculate the densities based on the current mode settings
        for key, value in modes.items():
            modes[key][0] = get_variable(key)  # Get the timer value for the side
            for sub_key, sub_value in value[1].items():
                vehicle_count = v_count(sub_key)  # Get the vehicle count for the light
                
                # Avoid division by zero
                if value[0] != 0:
                    result[sub_key] = round(vehicle_count / value[0], 2)
                else:
                    result[sub_key] = 0.0  # Assign a default value when there are no cars passing

        return result

    def render(self):
        # Visualization Implement is done withing the Netlogo
        pass

    def close(self):
        self.netlogo.kill_workspace()


In [3]:
# Initialize the environment
env = TrafficSimulation()

In [4]:
# Define the path for logs and save the model
log_path = os.path.join('Training', 'Logs')
model_save_path = os.path.join('Training', 'PPO_Traffic_Model')

# Create the PPO model with the 'MultiInputPolicy' for the custom environment
model = PPO(
    "MlpPolicy", 
    env, 
    verbose=1, 
    tensorboard_log=log_path
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [None]:
# Train the model for a specific number of timesteps  min_density = max(np.min(observation), 0.1) 
model.learn(total_timesteps=30000)

Logging to Training/Logs/PPO_3
side1-counter 3 : S1 2  S4 12
side2-counter 4 : S2 22  S7 7
side3-counter 4 : S3 8  S6 25
side4-counter 4 : S5 3  S8 28
Cycle Finished at 16 in com4
Setting min-density to 0.8
side1-counter 4 : S1 1  S2 15
side2-counter 4 : S3 8  S4 21
side3-counter 4 : S5 5  S6 15
side4-counter 4 : S7 10  S8 16
Cycle Finished at 32 in com1
Setting min-density to 0.8
side1-counter 4 : S1 2  S4 11
side2-counter 4 : S2 13  S7 4
side3-counter 4 : S3 8  S6 17
side4-counter 4 : S5 3  S8 11
Cycle Finished at 48 in com4
Setting min-density to 0.8
side1-counter 4 : S1 2  S5 4
side2-counter 4 : S2 8  S6 13
side3-counter 4 : S3 5  S4 14
side4-counter 4 : S7 4  S8 8
Cycle Finished at 64 in com5
Setting min-density to 0.8
side1-counter 4 : S1 1  S2 8
side2-counter 4 : S3 11  S4 11
side3-counter 4 : S5 3  S6 16
side4-counter 4 : S7 10  S8 10
Cycle Finished at 80 in com1
Setting min-density to 0.8
side1-counter 4 : S1 1  S5 2
side2-counter 4 : S2 13  S7 6
side3-counter 4 : S3 5  S6 10


In [6]:
env.close()

### PPO Testing

In [4]:
import os
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
import numpy as np

# Initialize the environment
env = TrafficSimulation()
env = Monitor(env)  # Wrapping the environment for logging

# Define the path where your PPO model is saved
model_save_path = os.path.join('Training', 'PPO_Traffic_Model')

# Load the saved model
model = PPO.load(model_save_path, env=env)

# Reset the environment to start a new episode
observation, _ = env.reset()

# Initialize variables to keep track of cumulative reward
cumulative_reward = 0
num_steps = 100  # Number of steps you want to run the simulation

for step in range(num_steps):
    # Predict the action to take based on the current observation
    action, _ = model.predict(observation, deterministic=True)
    
    # Take the action in the environment
    observation, reward, done, truncated, info = env.step(action)
    
    # Accumulate the reward
    cumulative_reward += reward

    # Print the results for each step
    print(f"Step {step + 1}:")
    print(f"Action Taken: {action}")
    print(f"Observation: {observation}")
    print(f"Reward: {reward}")
    print(f"Cumulative Reward: {cumulative_reward}")
    print("-" * 30)
    
    # If the episode is done, reset the environment and print the cumulative reward
    if done or truncated:
        print(f"Episode finished. Cumulative Reward: {cumulative_reward}")
        # Reset the environment and cumulative reward for the next episode
        observation, _ = env.reset()
        cumulative_reward = 0

# Close the environment after the simulation
env.close()


Exception: Can't get attribute '_function_setstate' on <module 'cloudpickle.cloudpickle' from 'C:\\ProgramData\\anaconda3\\Lib\\site-packages\\cloudpickle\\cloudpickle.py'>
Exception: Can't get attribute '_function_setstate' on <module 'cloudpickle.cloudpickle' from 'C:\\ProgramData\\anaconda3\\Lib\\site-packages\\cloudpickle\\cloudpickle.py'>


Wrapping the env in a DummyVecEnv.
Setting min-density to 0.8
Step 1:
Action Taken: [ 3 19  0 17  7]
Observation: [0.75 6.4  0.6  3.   2.   6.4  1.   7.  ]
Reward: -1
Cumulative Reward: -1
------------------------------
Setting min-density to 0.8
Step 2:
Action Taken: [ 3 27  0 17  7]
Observation: [1.  4.  1.  3.8 2.2 3.  0.6 4.6]
Reward: 1
Cumulative Reward: 0
------------------------------
Setting min-density to 0.8
Step 3:
Action Taken: [ 0 12 20 17 14]
Observation: [1.2 1.4 1.  3.2 2.  2.8 0.8 2.4]
Reward: 1
Cumulative Reward: 1
------------------------------
Setting min-density to 0.8
Step 4:
Action Taken: [ 0 14 17  0 20]
Observation: [2.6 2.8 1.2 2.8 2.  3.6 0.4 2.8]
Reward: -1
Cumulative Reward: 0
------------------------------
Setting min-density to 0.8
Step 5:
Action Taken: [ 0 14  7 17 29]
Observation: [1.2 2.6 0.8 2.8 1.8 2.6 0.6 2.8]
Reward: 1
Cumulative Reward: 1
------------------------------
Setting min-density to 0.8
Step 6:
Action Taken: [ 0 12 17 17 14]
Observation: 

In [7]:
# Save the model to the specified path
model.save(model_save_path)

In [8]:
del model

In [9]:
env.close()

In [13]:
import os
from stable_baselines3 import A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env

In [11]:
# Assuming TrafficSimulation is your custom environment class
# You should have it implemented similarly to how you've set up for PPO

# Create the environment
env = TrafficSimulation()

In [14]:
# Define the path for logs and save the model
log_path = os.path.join('Training', 'Logs')
model_save_path = os.path.join('Training', 'A2C_Traffic_Model')

# Create the DQN model with an MLP policy
# Create the A2C model with an MLP policy
model = A2C(
    "MlpPolicy", 
    env, 
    verbose=1, 
    tensorboard_log=log_path,
    learning_rate=0.001,  # Learning rate for the optimizer
    n_steps=5,  # Number of steps to run for each environment per update
)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [15]:
# Train the model for a specific number of timesteps
model.learn(total_timesteps=10000)

# Save the model
model.save(model_save_path)

# Evaluate the model
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward}, Std reward: {std_reward}")

# Optionally, you can load the model later for further training or evaluation
# loaded_model = DQN.load(model_save_path, env=env)

Logging to Training/Logs/A2C_1
side1-counter 3 : S1 0  S4 6
side2-counter 4 : S2 22  S7 6
side3-counter 4 : S3 5  S6 23
side4-counter 4 : S5 3  S8 24
Cycle Finished at 16 in com4
Setting min-density to 0.8
side1-counter 4 : S1 3  S4 19
side2-counter 4 : S2 13  S7 5
side3-counter 4 : S3 6  S6 10
side4-counter 4 : S5 3  S8 19
Cycle Finished at 32 in com4
Setting min-density to 0.8
side1-counter 4 : S1 4  S4 13
side2-counter 4 : S2 7  S7 8
side3-counter 4 : S3 5  S6 8
side4-counter 4 : S5 4  S8 10
Cycle Finished at 48 in com4
Setting min-density to 1.0
side1-counter 4 : S1 4  S5 2
side2-counter 4 : S2 4  S7 6
side3-counter 4 : S3 5  S6 11
side4-counter 4 : S4 9  S8 13
Cycle Finished at 64 in com7
Setting min-density to 0.8
side1-counter 4 : S1 4  S5 4
side2-counter 4 : S2 9  S6 14
side3-counter 4 : S3 4  S4 15
side4-counter 4 : S7 4  S8 12
Cycle Finished at 80 in com5
Setting min-density to 1.0
side1-counter 4 : S1 4  S4 19
side2-counter 4 : S2 12  S6 14
side3-counter 4 : S3 5  S7 8
side4

KeyboardInterrupt: 

In [150]:
from stable_baselines3 import A2C

In [151]:
model2 = A2C("MultiInputPolicy", env, verbose=1, tensorboard_log=log_path)
# Train the model
model2.learn(total_timesteps=20000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to TestTraining/Logs/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 100      |
|    ep_rew_mean        | 80.4     |
| time/                 |          |
|    fps                | 341      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -13.7    |
|    explained_variance | 0.0354   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 33.5     |
|    value_loss         | 7.78     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 100      |
|    ep_rew_mean        | 87.4     |
| time/                 |          |
|    fps                | 366      |
|    iterations     

<stable_baselines3.a2c.a2c.A2C at 0x7ce04d098dd0>

## Evaluate Policy

In [7]:
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

In [9]:
# Define the path where your PPO model is saved
model_save_path = os.path.join('Training', 'PPO_Traffic_Model')

# Initialize the environment
env = TrafficSimulation()

# Load the saved model
model = PPO.load(model_save_path, env=env)

evaluate_policy(model, env, n_eval_episodes=10)

Exception: Can't get attribute '_function_setstate' on <module 'cloudpickle.cloudpickle' from 'C:\\ProgramData\\anaconda3\\Lib\\site-packages\\cloudpickle\\cloudpickle.py'>
Exception: Can't get attribute '_function_setstate' on <module 'cloudpickle.cloudpickle' from 'C:\\ProgramData\\anaconda3\\Lib\\site-packages\\cloudpickle\\cloudpickle.py'>


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




Setting min-density to 1.5
Setting min-density to 1.6
Setting min-density to 1.8
Setting min-density to 1.4
Setting min-density to 1.4
Setting min-density to 1.6
Setting min-density to 1.6
Setting min-density to 1.4
Setting min-density to 1.4
Setting min-density to 1.2
Setting min-density to 1.4
Setting min-density to 1.4
Setting min-density to 1.4
Setting min-density to 1.2
Setting min-density to 1.6
Setting min-density to 0.8
Setting min-density to 1.0
Setting min-density to 1.0
Setting min-density to 0.8
Setting min-density to 1.4
Setting min-density to 1.2
Setting min-density to 1.2
Setting min-density to 0.8
Setting min-density to 0.8
Setting min-density to 1.0
Setting min-density to 0.8
Setting min-density to 0.8
Setting min-density to 0.8
Setting min-density to 0.8
Setting min-density to 0.8
Setting min-density to 1.75
Setting min-density to 1.2
Setting min-density to 1.0
Setting min-density to 1.6
Setting min-density to 1.2
Setting min-density to 1.4
Setting min-density to 1.2


(0.8, 2.2715633383201097)

In [29]:
obs, info = env.reset()

In [53]:
# Predict the action using only the observation
action, _states = model.predict(obs)

print(action)

[ 4 14 22  0 32]
