<a href="https://colab.research.google.com/github/kritisinghh/capstone/blob/main/maddpg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gymnasium stable-baselines3 numpy pandas scikit-learn


Collecting stable-baselines3
  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (

In [3]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder


def preprocess_data(df):
    categorical_cols = ["eclass", "route", "type"]
    encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
    encoded_data = encoder.fit_transform(df[categorical_cols])

    encoded_df = pd.DataFrame(
        encoded_data,
        columns=encoder.get_feature_names_out(categorical_cols)
    )

    df = df.drop(columns=categorical_cols)
    df = pd.concat([df, encoded_df], axis=1)

    return df


class TrafficSignalEnv(gym.Env):
    def __init__(self, df):
        super(TrafficSignalEnv, self).__init__()
        self.df = df
        self.current_step = 0
        self.max_steps = len(df) - 1

        self.action_space = gym.spaces.Discrete(3)
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(df.columns) - 1,), dtype=np.float32
        )

    def reset(self, **kwargs):
        self.current_step = 0
        obs = np.array(self.df.iloc[self.current_step, 1:].values, dtype=np.float32)
        return obs, {}  # Add an empty dictionary as the second return value


    def step(self, action):
        state = np.array(self.df.iloc[self.current_step, 1:].values, dtype=np.float32)

        # Define action mapping
        signal_type = ["red", "yellow", "green"][action]

        # Simulated logic (adjust based on real data)
        co2 = self.df.at[self.current_step, "CO2"]
        fuel = self.df.at[self.current_step, "fuel"]
        reward = - (co2 + fuel)  # Negative reward to minimize emissions

        # Move to next step
        self.current_step += 1
        done = self.current_step >= self.max_steps
        truncated = False  # Add this (for early termination cases)

        # Get next state
        next_state = np.array(self.df.iloc[self.current_step, 1:].values, dtype=np.float32)

        return next_state, reward, done, truncated, {}  # Corrected (5 values)


# Load dataset
df = pd.read_csv("traffic_flow_dataset.csv")

# Preprocess the dataset
df = preprocess_data(df)

# Create environment
env = DummyVecEnv([lambda: TrafficSignalEnv(df)])

# Train RL model
model = PPO("MlpPolicy", env, verbose=1)

# Number of episodes
num_episodes = 10

# Training loop
for episode in range(num_episodes):
    obs = env.reset()
    done = False
    episode_co2 = 0
    episode_fuel = 0

    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, _ = env.step(action)

        if env.envs[0].current_step > 0:
            episode_co2 += env.envs[0].df.at[env.envs[0].current_step - 1, "CO2"]
            episode_fuel += env.envs[0].df.at[env.envs[0].current_step - 1, "fuel"]

    print(f"Episode {episode}: CO2 Reduction = {-episode_co2:.2f}, Fuel Reduction = {-episode_fuel:.2f}")

print("Training complete!")


Using cpu device
Episode 0: CO2 Reduction = -124425.25, Fuel Reduction = -5464.99
Episode 1: CO2 Reduction = -124425.25, Fuel Reduction = -5464.99
Episode 2: CO2 Reduction = -124425.25, Fuel Reduction = -5464.99
Episode 3: CO2 Reduction = -124425.25, Fuel Reduction = -5464.99
Episode 4: CO2 Reduction = -124425.25, Fuel Reduction = -5464.99
Episode 5: CO2 Reduction = -124425.25, Fuel Reduction = -5464.99
Episode 6: CO2 Reduction = -124425.25, Fuel Reduction = -5464.99
Episode 7: CO2 Reduction = -124425.25, Fuel Reduction = -5464.99
Episode 8: CO2 Reduction = -124425.25, Fuel Reduction = -5464.99
Episode 9: CO2 Reduction = -124425.25, Fuel Reduction = -5464.99
Training complete!
