<a href="https://colab.research.google.com/github/kritisinghh/capstone/blob/main/lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install gymnasium stable-baselines3 sb3-contrib numpy pandas scikit-learn

Collecting stable-baselines3
  Downloading stable_baselines3-2.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting sb3-contrib
  Downloading sb3_contrib-2.5.0-py3-none-any.whl.metadata (4.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baseline

In [5]:
import gymnasium as gym
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.vec_env import DummyVecEnv
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

def preprocess_data(df):
    categorical_cols = ["eclass", "route", "type"]
    encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
    encoded_data = encoder.fit_transform(df[categorical_cols])

    encoded_df = pd.DataFrame(
        encoded_data,
        columns=encoder.get_feature_names_out(categorical_cols)
    )

    df = df.drop(columns=categorical_cols)
    df = pd.concat([df, encoded_df], axis=1)

    return df

class TrafficSignalEnv(gym.Env):
    def __init__(self, df, sequence_length=5):
        super().__init__()
        self.df = df
        self.current_step = 0
        self.max_steps = len(df) - 1
        self.sequence_length = sequence_length

        self.action_space = gym.spaces.Discrete(3)
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(df.columns) - 1,), dtype=np.float32
        )

    def reset(self, **kwargs):
        self.current_step = np.random.randint(0, self.max_steps)
        obs = np.array(self.df.iloc[self.current_step, 1:].values, dtype=np.float32).flatten()
        return obs, {}

    def step(self, action):
        co2 = self.df.at[self.current_step, "CO2"]
        fuel = self.df.at[self.current_step, "fuel"]

        if self.current_step > 0:
            prev_co2 = self.df.at[self.current_step - 1, "CO2"]
            prev_fuel = self.df.at[self.current_step - 1, "fuel"]
            co2_reduction = prev_co2 - co2
            fuel_reduction = prev_fuel - fuel
        else:
            co2_reduction = 0
            fuel_reduction = 0

        reward = co2_reduction + fuel_reduction

        self.current_step += 1
        done = self.current_step >= self.max_steps
        truncated = False

        next_state = np.array(self.df.iloc[self.current_step, 1:].values, dtype=np.float32).flatten()
        return next_state, reward, done, truncated, {}

df = pd.read_csv("traffic_flow_dataset.csv")
df = preprocess_data(df)

env = DummyVecEnv([lambda: TrafficSignalEnv(df)])

model = RecurrentPPO("MlpLstmPolicy", env, verbose=1)

num_episodes = 500

for episode in range(num_episodes):
    obs = env.reset()
    done = False
    lstm_states = None
    initial_co2 = env.envs[0].df.at[env.envs[0].current_step, "CO2"]
    initial_fuel = env.envs[0].df.at[env.envs[0].current_step, "fuel"]

    final_co2 = initial_co2
    final_fuel = initial_fuel
    episode_starts = np.ones((1,), dtype=bool)

    while not done:
        action, lstm_states = model.predict(obs, state=lstm_states, episode_start=episode_starts, deterministic=False)
        obs, reward, done,  _ = env.step(action)
        episode_starts = np.array([done], dtype=bool)
        if env.envs[0].current_step > 0:
            final_co2 = env.envs[0].df.at[env.envs[0].current_step, "CO2"]
            final_fuel = env.envs[0].df.at[env.envs[0].current_step, "fuel"]

    co2_reduction = initial_co2 - final_co2
    fuel_reduction = initial_fuel - final_fuel

    print(f"Episode {episode + 1}: CO2 Reduction = {co2_reduction:.2f}, Fuel Reduction = {fuel_reduction:.2f}")

print("Training complete!")

Using cpu device
Episode 1: CO2 Reduction = -21.73, Fuel Reduction = -0.06
Episode 2: CO2 Reduction = 74.44, Fuel Reduction = -1.02
Episode 3: CO2 Reduction = -12.03, Fuel Reduction = 3.42
Episode 4: CO2 Reduction = -36.30, Fuel Reduction = -0.66
Episode 5: CO2 Reduction = 23.27, Fuel Reduction = 0.41
Episode 6: CO2 Reduction = -85.62, Fuel Reduction = 4.11
Episode 7: CO2 Reduction = 90.31, Fuel Reduction = -7.06
Episode 8: CO2 Reduction = -57.40, Fuel Reduction = 2.24
Episode 9: CO2 Reduction = 69.06, Fuel Reduction = -1.15
Episode 10: CO2 Reduction = 144.71, Fuel Reduction = 5.12
Episode 11: CO2 Reduction = -76.68, Fuel Reduction = 0.36
Episode 12: CO2 Reduction = 83.27, Fuel Reduction = 4.33
Episode 13: CO2 Reduction = 46.13, Fuel Reduction = -5.44
Episode 14: CO2 Reduction = 102.81, Fuel Reduction = -2.33
Episode 15: CO2 Reduction = 50.04, Fuel Reduction = -5.22
Episode 16: CO2 Reduction = -68.20, Fuel Reduction = -6.54
Episode 17: CO2 Reduction = 20.24, Fuel Reduction = 5.03
Episo