<a href="https://colab.research.google.com/github/kritisinghh/capstone/blob/main/ppo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gymnasium stable-baselines3 numpy pandas scikit-learn


Collecting stable-baselines3
  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (

In [3]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Preprocessing function
def preprocess_data(df):
    categorical_cols = ["eclass", "route", "type"]
    encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
    encoded_data = encoder.fit_transform(df[categorical_cols])

    encoded_df = pd.DataFrame(
        encoded_data,
        columns=encoder.get_feature_names_out(categorical_cols)
    )

    df = df.drop(columns=categorical_cols)
    df = pd.concat([df, encoded_df], axis=1)

    return df

# Environment class
class TrafficSignalEnv(gym.Env):
    def __init__(self, df):
        super(TrafficSignalEnv, self).__init__()
        self.df = df
        self.current_step = 0
        self.max_steps = len(df) - 1

        self.action_space = gym.spaces.Discrete(3)  # 0 = red, 1 = yellow, 2 = green
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(df.columns) - 1,), dtype=np.float32
        )

    def reset(self, **kwargs):
        self.current_step = np.random.randint(0, self.max_steps)  # Random start point
        obs = np.array(self.df.iloc[self.current_step, 1:].values, dtype=np.float32)
        return obs, {}

    def step(self, action):
        co2 = self.df.at[self.current_step, "CO2"]
        fuel = self.df.at[self.current_step, "fuel"]

        if self.current_step > 0:
            prev_co2 = self.df.at[self.current_step - 1, "CO2"]
            prev_fuel = self.df.at[self.current_step - 1, "fuel"]
            co2_reduction = prev_co2 - co2
            fuel_reduction = prev_fuel - fuel
        else:
            co2_reduction = 0
            fuel_reduction = 0

        reward = np.clip((co2_reduction + fuel_reduction) / 10, -5, 5)
        self.current_step += 1
        done = self.current_step >= self.max_steps
        truncated = False

        next_state = np.array(self.df.iloc[self.current_step, 1:].values, dtype=np.float32)
        return next_state, reward, done, truncated, {}

# Load and preprocess data
df = pd.read_csv("traffic_flow_dataset.csv")
df = preprocess_data(df)

env = DummyVecEnv([lambda: TrafficSignalEnv(df)])

# Define and train the model
model = PPO("MlpPolicy", env, verbose=1)


num_episodes = 100

for episode in range(num_episodes):
    obs = env.reset()
    done = False
    initial_co2 = env.envs[0].df.at[0, "CO2"]
    initial_fuel = env.envs[0].df.at[0, "fuel"]
    final_co2 = initial_co2
    final_fuel = initial_fuel

    while not done:
        action, _ = model.predict(obs, deterministic=False)
        obs, reward, done, _ = env.step(action)

        if env.envs[0].current_step > 0:
            final_co2 = env.envs[0].df.at[env.envs[0].current_step, "CO2"]
            final_fuel = env.envs[0].df.at[env.envs[0].current_step, "fuel"]

    co2_reduction = initial_co2 - final_co2
    fuel_reduction = initial_fuel - final_fuel

    print(f"Episode {episode + 1}: CO2 Reduction = {co2_reduction:.2f}, Fuel Reduction = {fuel_reduction:.2f}")

print("Training complete!")


Using cpu device
Episode 1: CO2 Reduction = -32.94, Fuel Reduction = -3.63
Episode 2: CO2 Reduction = 1.75, Fuel Reduction = 4.48
Episode 3: CO2 Reduction = -37.92, Fuel Reduction = -2.18
Episode 4: CO2 Reduction = 0.92, Fuel Reduction = -0.46
Episode 5: CO2 Reduction = -96.23, Fuel Reduction = 2.60
Episode 6: CO2 Reduction = -80.69, Fuel Reduction = -2.58
Episode 7: CO2 Reduction = -114.90, Fuel Reduction = 0.18
Episode 8: CO2 Reduction = 15.11, Fuel Reduction = -1.42
Episode 9: CO2 Reduction = -41.79, Fuel Reduction = -3.37
Episode 10: CO2 Reduction = -62.00, Fuel Reduction = -3.66
Episode 11: CO2 Reduction = -98.76, Fuel Reduction = 0.18
Episode 12: CO2 Reduction = -115.82, Fuel Reduction = -2.37
Episode 13: CO2 Reduction = -35.89, Fuel Reduction = -0.27
Episode 14: CO2 Reduction = 26.13, Fuel Reduction = 3.09
Episode 15: CO2 Reduction = -116.35, Fuel Reduction = 3.05
Episode 16: CO2 Reduction = -47.79, Fuel Reduction = 0.05
Episode 17: CO2 Reduction = -95.72, Fuel Reduction = 0.01
