In [27]:
# # Install environment and agent
!pip install highway-env
# TODO: we use the bleeding edge version because the current stable version does not support the latest gym>=0.21 versions. Revert back to stable at the next SB3 release.
!pip install git+https://github.com/DLR-RM/stable-baselines3

# # Environment
import gymnasium as gym
import highway_env

gym.register_envs(highway_env)

# Agent
from stable_baselines3 import DQN


import sys
from tqdm.notebook import trange
# !pip install tensorboardx gym pyvirtualdisplay
# !apt-get install -y xvfb ffmpeg

  from pkg_resources import load_entry_point
Collecting gymnasium>=1.0.0a2
  Using cached gymnasium-1.0.0a2-py3-none-any.whl (954 kB)
[31mERROR: stable-baselines3 2.4.0a9 has requirement gymnasium<0.30,>=0.28.1, but you'll have gymnasium 1.0.0a2 which is incompatible.[0m
Installing collected packages: gymnasium
  Attempting uninstall: gymnasium
    Found existing installation: gymnasium 0.29.1
    Uninstalling gymnasium-0.29.1:
      Successfully uninstalled gymnasium-0.29.1
Successfully installed gymnasium-1.0.0a2
  from pkg_resources import load_entry_point
Collecting git+https://github.com/DLR-RM/stable-baselines3
  Cloning https://github.com/DLR-RM/stable-baselines3 to /tmp/pip-req-build-h9pyv2wb
  Running command git clone -q https://github.com/DLR-RM/stable-baselines3 /tmp/pip-req-build-h9pyv2wb
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h    Preparing wheel metadata ... [?25ldone
Collecting gymnasium<0.30,>=0

In [16]:
import os
import requests
import time

# Set your Claude.ai API key as an environment variable for security
# For example, in your terminal:
# export CLAUDE_API_KEY='your_claude_api_key_here'

CLAUDE_API_KEY = os.getenv('CLAUDE_API_KEY')
CLAUDE_API_URL = 'https://api.anthropic.com/v1/complete'

def claude_action(prompt1, assist1, prompt2, model='claude-v1', max_tokens_to_sample=50, temperature=0.7):
    """
    Sends prompts to Claude.ai and retrieves the recommended action.

    Parameters:
        prompt1 (str): The initial prompt.
        assist1 (str): The assistant prompt.
        prompt2 (str): The detailed scenario prompt.
        model (str): The Claude model to use.
        max_tokens_to_sample (int): Maximum number of tokens to generate.
        temperature (float): Sampling temperature.

    Returns:
        str: The action recommended by Claude.ai.
    """
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {CLAUDE_API_KEY}',
    }

    # Combine the prompts into a single input as per Claude's API requirements
    # Adjust the separators based on Claude's API specifications
    full_prompt = f"{prompt1}\n\n{assist1}\n\n{prompt2}"

    data = {
        'prompt': full_prompt,
        'model': model,
        'max_tokens_to_sample': max_tokens_to_sample,
        'temperature': temperature,
        'stop_sequences': ['\n'],
    }

    try:
        response = requests.post(CLAUDE_API_URL, headers=headers, json=data)
        response.raise_for_status()
        response_json = response.json()
        # Extract the text from the response
        action_text = response_json.get('completion', '').strip()
        # Assume the action is in the format "Final decision: ACTION"
        if 'Final decision:' in action_text:
            action = action_text.split('Final decision:')[-1].strip().upper()
            return action
        else:
            # Fallback or handle unexpected format
            print(f"Unexpected response format: {action_text}")
            return 'IDLE'  # Default action
    except requests.exceptions.RequestException as e:
        print(f"Error communicating with Claude.ai: {e}")
        # Implement retry logic or return a default action
        time.sleep(1)  # Wait before retrying
        return 'IDLE'  # Default action


In [17]:
import random

def randomize_env_config(env, base_config,
                         vehicleCount_range=(3, 10),
                         vehicles_density_range=(1, 5), spacing = (1,3),
                         duration_range=(30, 60)):
    """
    Randomizes the environment configuration for diversity.

    Parameters:
        env (gym.Env): The environment instance to configure.
        base_config (dict): The base configuration dictionary.
        vehicleCount_range (tuple): Range for the number of vehicles.
        vehicles_density_range (tuple): Range for vehicle density.
        duration_range (tuple): Range for simulation duration in seconds.

    Returns:
        None
    """
    # Randomize the number of vehicles
    vehicleCount = random.randint(*vehicleCount_range)
    env.config['observation']['vehicles_count'] = vehicleCount

    # Randomize vehicle density
    vehicles_density = random.uniform(*vehicles_density_range)
    env.config['vehicles_density'] = vehicles_density

    # Randomize duration
    duration = random.randint(*duration_range)
    env.config['duration'] = duration
    
    ##rndomize spacing
    spacing = random.randint(*spacing)
    env.config["initial_spacing"] = spacing
    # Optionally, randomize other parameters like lane change distances, etc.
    # For example, you can add randomness to target speeds or other dynamics

    # Reconfigure the environment
    env.env.configure(env.config)


In [18]:
def map_llm_action_to_label(llm_act):
    """
    Maps the LLM-recommended action string to a numerical label.

    Parameters:
        llm_act (str): The action string recommended by the LLM.

    Returns:
        int: Numerical label corresponding to the action.
    """
    if 'LANE_LEFT' in llm_act.upper():
        return 0
    elif 'IDLE' in llm_act.upper():
        return 1
    elif 'LANE_RIGHT' in llm_act.upper():
        return 2
    elif 'FASTER' in llm_act.upper():
        return 3
    elif 'SLOWER' in llm_act.upper():
        return 4
    else:
        return 1  # Default to IDLE if action is unrecognized

In [19]:
import gym  # Ensure you have gym installed
import numpy as np
import pandas as pd
from tqdm import trange
import os

def generate_dataset_with_claude(env, file_name, episodes=500, samples_per_episode=10,
                                 vehicleCount_range=(3, 10),
                                 vehicles_density_range=(1, 5),
                                 duration_range=(30, 60)):
    """
    Generates a labeled dataset by randomizing environment configurations,
    capturing observations, using Claude.ai for action recommendations,
    labeling actions, and saving the dataset.

    Parameters:
        env (gym.Env): The Gym environment instance.
        file_name (str): The name of the CSV file to save the dataset.
        episodes (int): Number of episodes to run.
        samples_per_episode (int): Number of samples (configurations) per episode.
        vehicleCount_range (tuple): Range for the number of vehicles.
        vehicles_density_range (tuple): Range for vehicle density.
        duration_range (tuple): Range for simulation duration in seconds.

    Returns:
        None
    """
    observations = []
    actions = []

    base_config = env.config.copy()  # Preserve the base configuration

    for episode in trange(episodes, desc="Dataset Generation"):
        for sample in range(samples_per_episode):
            # Randomize environment configuration for diversity
            randomize_env_config(env, base_config,
                                 vehicleCount_range,
                                 vehicles_density_range,
                                 duration_range)

            # Reset the environment with the new configuration
            obs = env.reset()
            done, truncated = False, False

            # Capture the initial observation
            if isinstance(obs, tuple):
                obs, info = obs  # If reset returns (obs, info)
            else:
                info = {}

            # Generate prompts for Claude.ai
            prompt1, assist1, prompt2 = env.prompt_design(obs)
            llm_act = claude_action(prompt1, assist1, prompt2)

            # Convert LLM action to a numerical label
            action_label = map_llm_action_to_label(llm_act)

            # Store observation and corresponding LLM action
            observations.append(obs.flatten())
            actions.append(action_label)

            # Optionally, you can advance the environment by one step with a dummy action
            # to simulate state transitions, but since the goal is to capture diverse
            # configurations without relying on previous actions, it's not necessary.

    # Convert to numpy arrays
    observations = np.array(observations)
    actions = np.array(actions)

    # Save the dataset as a CSV file
    data = pd.DataFrame(observations)
    data['action'] = actions

    # Create a directory to save the dataset if it doesn't exist
    dataset_dir = 'datasets'
    if not os.path.exists(dataset_dir):
        os.makedirs(dataset_dir)

    # Save the dataset
    dataset_path = os.path.join(dataset_dir, file_name)
    data.to_csv(dataset_path, index=False)

    print(f"Dataset saved to {dataset_path}")


groq


In [20]:
# !pip3 install groq

from groq import *

client = Groq(api_key = "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")
     

In [21]:
def groq_action(prompt1, assist1, prompt2, last_act='FASTER'):

    chat_completion = client.chat.completions.create(messages=[{"role": "user", "content": prompt1},
                                                           {"role": "assistant", "content": assist1},
                                                           {"role": "user", "content": prompt2}], model="llama3-groq-70b-8192-tool-use-preview")
    
    try:
        action = chat_completion.choices[0].message.content.strip().split('Final decision: ')[1].strip().split('\'')[0]
    except:
        action = last_act

    return action

In [22]:
import gym  # Ensure you have gym installed
import numpy as np
import pandas as pd
from tqdm import trange
import os

def save_and_go(observations, actions,file_name):
        # Convert to numpy arrays
        observations = np.array(observations)
        actions = np.array(actions)

        # Save the dataset as a CSV file
        data = pd.DataFrame(observations)
        data['action'] = actions

        # Create a directory to save the dataset if it doesn't exist
        dataset_dir = 'datasets'
        if not os.path.exists(dataset_dir):
            os.makedirs(dataset_dir)

        # Save the dataset
        dataset_path = os.path.join(dataset_dir, file_name)
        data.to_csv(dataset_path, index=False)

        print(f"Dataset saved to {dataset_path}")


def generate_dataset_with_groq(env, file_name, episodes=500, samples_per_episode=10,
                                 vehicleCount_range=(3, 10),
                                 vehicles_density_range=(1, 5),
                                 duration_range=(30, 60)):
    """
    Generates a labeled dataset by randomizing environment configurations,
    capturing observations, using Claude.ai for action recommendations,
    labeling actions, and saving the dataset.

    Parameters:
        env (gym.Env): The Gym environment instance.
        file_name (str): The name of the CSV file to save the dataset.
        episodes (int): Number of episodes to run.
        samples_per_episode (int): Number of samples (configurations) per episode.
        vehicleCount_range (tuple): Range for the number of vehicles.
        vehicles_density_range (tuple): Range for vehicle density.
        duration_range (tuple): Range for simulation duration in seconds.

    Returns:
        None
    """
    observations = []
    actions = []

    base_config = env.config.copy()  # Preserve the base configuration

    for episode in trange(episodes, desc="Dataset Generation"):
        for sample in range(samples_per_episode):
            # Randomize environment configuration for diversity
            randomize_env_config(env, base_config,
                                 vehicleCount_range,
                                 vehicles_density_range,
                                 duration_range)

            # Reset the environment with the new configuration
            obs = env.reset()
            done, truncated = False, False

            # Capture the initial observation
            if isinstance(obs, tuple):
                obs, info = obs  # If reset returns (obs, info)
            else:
                info = {}

            # Generate prompts for Claude.ai
            prompt1, assist1, prompt2 = env.prompt_design(obs)
            llm_act = groq_action(prompt1, assist1, prompt2)

            # Convert LLM action to a numerical label
            action_label = map_llm_action_to_label(llm_act)

            # Store observation and corresponding LLM action
            observations.append(obs.flatten())
            actions.append(action_label)

            # Optionally, you can advance the environment by one step with a dummy action
            # to simulate state transitions, but since the goal is to capture diverse
            # configurations without relying on previous actions, it's not necessary.
    save_and_go(observations, actions, "test_rn_groq")
            
    

In [23]:
import gym
from stable_baselines3 import DQN
import pprint
from matplotlib import pyplot as plt
import numpy as np

class MyHighwayEnv_llm(gym.Env):
    def __init__(self, vehicleCount=5):
        super(MyHighwayEnv_llm, self).__init__()
        # base setting
        self.vehicleCount = vehicleCount
        self.prev_action  = 'FASTER'

        # environment setting
        self.config = {
            "observation": {
                "type": "Kinematics",
                "features": ["presence", "x", "y", "vx", "vy"],
                "absolute": True,
                "normalize": False,
                "vehicles_count": vehicleCount,
                "see_behind": True,
            },
            "action": {
                "type": "DiscreteMetaAction",
                "target_speeds": np.linspace(0, 32, 9),
            },
            "duration": 40,
            "vehicles_density": 2,
            "show_trajectories": True,
            "render_agent": True,
        }
        self.env = gym.make("highway-fast-v0")
        self.env.configure(self.config)
        self.action_space = self.env.action_space
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf, shape=(10,5), dtype=np.float32
        )

    def find_smallest_positive(self, arr):
        smallest_positive = float('inf')
        index = -1

        for i, value in enumerate(arr):
            if 0 < value < smallest_positive:
                smallest_positive = value
                index = i

        return smallest_positive, index

    def prompt_design(self, obs_):

        prompt1 = 'You are a smart driving assistant. You, the \'ego\' car, are now driving on a highway. You need to recommend ONLY ONE best action among the following set of actions based on the current scenario: \n \
        \t1. IDLE -- maintain the current speed in the current lane \n \
        \t2. FASTER -- accelerate the ego vehicle \n \
        \t3. SLOWER -- decelerate the ego vehicle \n \
        \t4. LANE_LEFT -- change to the adjacent left lane \n \
        \t5. LANE_RIGHT -- change to the adjacent right lane\n'

        assist1 = 'Understood. Please provide the current scenario or conditions, such as traffic density, speed of surrounding vehicles, your current speed, and any other relevant information, so I can recommend the best action.'

        prompt2 = 'Here is the current scenario:\n \
        There are four lanes on the highway: Lane-1 (left most), Lane-2, Lane-3, Lane-4 (right most). \n\n'

        x, y, vx, vy = obs_[:,1], obs_[:,2], obs_[:,3], obs_[:,4]

        ego_x, ego_y   = x[0], y[0]
        ego_vx, ego_vy = vx[0], vy[0]

        veh_x, veh_y   = x[1:] - ego_x, y[1:] - ego_y
        veh_vx, veh_vy = vx[1:], vy[1:]

        lanes          = y//4+1
        ego_lane       = lanes[0]
        veh_lanes      = lanes[1:]

        if ego_lane == 1:
            ego_left_lane  = 'Left lane: Not available\n'
            ego_right_lane = 'Right lane: Lane-' + str(ego_lane+1) + '\n'
        elif ego_lane == 4:
            ego_left_lane  = 'Left lane: Lane-' + str(ego_lane-1) + '\n'
            ego_right_lane = 'Right lane: Not available\n'
        else:
            ego_left_lane  = 'Left lane: Lane-' + str(ego_lane-1) + '\n'
            ego_right_lane = 'Right lane: Lane-' + str(ego_lane+1) + '\n'

        prompt2 += 'Ego vehicle:\n \
        \tCurrent lane: Lane-' + str(ego_lane) + '\n' + '\t' + ego_left_lane + '\t' + ego_right_lane + '\tCurrent speed: ' + str(ego_vx) + ' m/s \n\n'

        lane_info = 'Lane info:\n'
        for i in range(4):
            inds     = np.where(veh_lanes == i+1)[0]
            num_v    = len(inds)
            if num_v > 0:
                val, ind = self.find_smallest_positive(veh_x[inds])
                true_ind = inds[ind]
                lane_info += '\tLane-' + str(i+1) + ': There are ' + str(num_v) + ' vehicle(s) in this lane ahead of ego vehicle, closest being ' + str(veh_x[true_ind]) + ' m ahead traveling at ' + str(veh_vx[true_ind]) + ' m\/s. \n'
            else:
                lane_info += '\tLane-' + str(i+1) + ' No other vehicle ahead of ego vehicle.\n'

        prompt2 += lane_info

        att_info = '\nAttention points:\n \
        \t1. SLOWER has least priority and should be used only when no other action is safe.\n \
        \t2. DO NOT change lanes frequently.\n \
        \t3. Safety is priority, but do not forget efficiency.\n \
        \t4. Your suggested action has to be one from one of the above five listed actions - IDLE, SLOWER, FASTER, LANE_LEFT, LANE_RIGHT. \n \
        Your last action was ' + self.prev_action + '.Please recommend action for the current scenario only in this format and DONT propound anything else other than \'Final decision: <final decision>\'.\n'

        prompt2 += att_info

        return prompt1, assist1, prompt2

    def step(self, action):
        """
        Steps the environment with the given action.
        """
        # Define a mapping from action labels to action strings if needed
        action_dict = {
            0: 'LANE_LEFT',
            1: 'IDLE',
            2: 'LANE_RIGHT',
            3: 'FASTER',
            4: 'SLOWER'
        }

        # Step the wrapped environment and capture all returned values
        obs, dqn_reward, done, truncated, info = self.env.step(action)

        self.prev_action = action_dict.get(action, 'IDLE')
        
        Reward = 1 / (1 + np.exp(-dqn_reward))

        return obs, Reward, done, truncated, info

    def reset(self, **kwargs):
        """
        Resets the environment.
        """
        obs = self.env.reset(**kwargs)
        return obs  # Ensure to return the observation


In [28]:
 

env = MyHighwayEnv_llm(vehicleCount=5)
gym.register_envs(highway_env)

    # Optionally, verify environment setup
print(f"Observation space: {env.observation_space}")
print(f"Action space: {env.action_space}")

# # Generate the dataset
# generate_dataset_with_claude(
#     env=env,
#     file_name='highway_dataset_claude.csv',
#     episodes=500,               # Number of episodes
#     samples_per_episode=10,     # Number of samples per episode
#     vehicleCount_range=(3, 10), # Range for number of vehicles
#     vehicles_density_range=(1, 5), # Range for vehicle density
#     duration_range=(30, 60) 
#     )# Range for simulation duration in seconds)

generate_dataset_with_groq(
    env=env,
    file_name='highway_dataset_groq.csv',
    episodes=50,               # Number of episodes
    samples_per_episode=10,     # Number of samples per episode
    vehicleCount_range=(3, 10), # Range for number of vehicles
    vehicles_density_range=(1, 5), # Range for vehicle density
    duration_range=(30, 60) 
    )# Range for simulation duration in seconds)

Observation space: Box(-inf, inf, (10, 5), float32)
Action space: Discrete(5)


  logger.warn(


Dataset Generation:   0%|          | 0/50 [00:00<?, ?it/s]

  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.w

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (500,) + inhomogeneous part.