In [None]:
import gymnasium as gym
import stable_baselines3 as sb3

env = gym.make("CarRacing-v3", render_mode="rgb_array") # For now rgb_array to allow manual play
env.reset()

# Test the environment with random actions
#for _ in range(100):
#    action = env.action_space.sample()
#    observation, reward, done, truncated, info = env.step(action)
#    env.render()
#    if done or truncated:
#        env.reset()

  from pkg_resources import resource_stream, resource_exists


(array([[[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        ...,
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]],
 
        [[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         ...,
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]], shape=(96, 96, 3), dtype=uint8),
 {})

: 

## Week 1: Customizing the environment

### Environment understanding
First things first, we took the time to understand the underlying forces at play in this environment. The following are the collected insights:
- **Action Space - Discrete vs Continuous**: This environment has, by default, a continuous action space (p.e.: steering $s \in [-1, 1]$). However, it can be converted to a discrete action space (p.e. by only allowing full left or full right steering).

- **Friction**: As per the environment implementation, friction is a vector applied in the oposite direction of the moving car and proportional to its current speed.

- **Grip**: Describes the adherence of the car to the track. If the rear wheels angle to the cars current moving direction is too great, especially at high speeds, it will lose grip, and enter a **drifting motion**.

Moreover, by reading the environment source code (available under [car_racing.py](https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/envs/box2d/car_racing.py) and [car_dynamics.py](https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/envs/box2d/car_dynamics.py)), we found that:

- The track itself is made out of tiles, which are squares with coordinates and rotation variables. They are held in a list in the `CarRacing` class, under `self.track`. Knowing this may make it possible to calculate the line with minimum curvature (ideal trajectory).

- Each wheel in `car.wheels` *knows* if it is over one or more road tiles. This is done by checking `len(wheel.tiles)`

### Reward modifications
We have noticed that the reward function (as described [here](https://gymnasium.farama.org/environments/box2d/car_racing/#rewards)) is very shallow, in the sense that it encourages process (by rewarding per tile completed) but does not encourage any type of behaviour to acheive that progress. As such, we plan to implement the following modifications:

- **Gas Bias**: Pushing forward must be encouraged. As such, and to counteract the early training association<br>
`moving = crashing`, a small reward will be provided for pushing Gas.

- **Wiggle Protection**: Often a model trained on these sorts of environments will perform what is called **intentional wiggling**. That is when, in a curve, for example, the car switches sharply and repeatedly between right-steering and left-steering. This is a technique learnt by models to ensure car grip. It also is, however, a strategy that exploits simple environments, including `CarRacing-v3`, in which steering does not lose speed. To prevent this, sharply changing steering direction will have a moderate penalty.

- **Off-road Penalty**: As described below under *Aditional modifications* the simulation will be truncated shortly after the car leaves the track, if it does not return quickly. However, giving a truncation a flat penalty does not seem appropriate, since it gives no context or warning

### Aditional modifications

- **Early stopping**: Prevents the simulation from going on when the car has deviated too far out of the track, leaving it aimlessly wandering around. Aims to quicken the training speed.

In [1]:
import config as cfg
import gymnasium as gym
from gymnasium.envs.box2d.car_racing import CarRacing
from gymnasium.envs.box2d.car_dynamics import Car
import numpy as np
import cv2

class CustomEnvironment(gym.Wrapper):
    def __init__(self,
        env,
        gas_reward=cfg.GAS_REWARD,
        wiggle_penalty=cfg.WIGGLE_PENALTY,
        wiggle_tolerance=cfg.WIGGLE_TOLERANCE,
        off_road_wheel_penalty=cfg.OFF_ROAD_WHEEL_PENALTY,
        verbose=False
    ):
        super().__init__(env)
        self.gas_reward = gas_reward
        self.wiggle_penalty = wiggle_penalty
        self.wiggle_tolerance = wiggle_tolerance
        self.off_road_wheel_penalty = off_road_wheel_penalty
        self.verbose = verbose
        
        self.info = {}
        self.info["wheels_on_road"] = 4
        self.info["consecutive_off_road"] = 0
    
    def check_early_stop(self, env):
        car : Car = env.unwrapped.car
        
        wheels_on_road = 0
        for wheel in car.wheels:
            if len(wheel.tiles) > 0:
                wheels_on_road += 1
        if self.verbose:
            if wheels_on_road == 0: print("Car off road!")
            else: print("Wheels on road:", wheels_on_road)
        
        return wheels_on_road
    
    def get_optimal_path(self, iterations=100):
        # 1. Extract the raw center line (x, y) coordinates
        # env.track is a list of (alpha, beta, x, y)
        raw_track = self.env.unwrapped.track
        path = np.array([[p[2], p[3]] for p in raw_track])

        # 2. Track Parameters
        # In CarRacing, track width is roughly constant. 
        # We define a "Safe Width" slightly smaller than the real width so the agent doesn't clip the grass.
        TRACK_WIDTH = 40.0 
        SAFE_MARGIN = 5.0  # Stay away from the absolute edge
        max_displacement = (TRACK_WIDTH / 2) - SAFE_MARGIN

        # Copy the path to modify it
        optimized_path = np.copy(path)
        num_points = len(path)

        # 3. The "Rubber Band" Iteration
        for _ in range(iterations):
            for i in range(num_points):
                # Get indices for previous and next points (handling the loop wrap-around)
                prev_idx = (i - 1) % num_points
                next_idx = (i + 1) % num_points

                # A. Calculate the midpoint between neighbors
                # The straightest line between Prev and Next passes through this midpoint
                midpoint = (optimized_path[prev_idx] + optimized_path[next_idx]) / 2

                # B. Move the current point towards that midpoint (Smoothing)
                # This creates the "Shortest Path" effect
                # 0.5 means we move halfway there. 
                optimized_path[i] = optimized_path[i] * 0.2 + midpoint * 0.8

                # C. Constrain to Track Width (The "Walls")
                # We cannot let the point leave the road.
                # Calculate distance from the original center line (path[i])
                center = path[i]
                current = optimized_path[i]

                diff = current - center
                dist = np.linalg.norm(diff)

                if dist > max_displacement:
                    # If we pulled too tight and hit the wall, clamp it back to the edge
                    diff = diff / dist * max_displacement
                    optimized_path[i] = center + diff

        return optimized_path
    
    def plot_optimal_line(self, observation, optimal_path=None, color=(0, 255, 0), thickness=2):
        """
        Plots the optimal racing line on the rendered observation.
        
        Args:
            observation: The RGB array from env.render() or env.step()
            optimal_path: List of (x, y) coordinates in world space. If None, computes it.
            color: RGB color tuple for the line (default: green)
            thickness: Line thickness in pixels
            
        Returns:
            Modified observation with the optimal line drawn
        """
        # Get optimal path if not provided
        if optimal_path is None:
            optimal_path = self.get_optimal_path(self.env)
        
        # Make a copy to avoid modifying the original
        img = observation.copy()
        
        # Get the car's position for reference
        car = self.env.unwrapped.car
        
        # CarRacing uses a specific coordinate transformation
        # We need to convert world coordinates to screen coordinates
        # The environment uses WINDOW_W=600, WINDOW_H=400, SCALE=6.0, VIDEO_W=600, VIDEO_H=400
        WINDOW_W = 600
        WINDOW_H = 400
        SCALE = 6.0
        PLAYFIELD = 2000 / SCALE
        
        # Convert world coordinates to screen coordinates
        screen_points = []
        for point in optimal_path:
            x, y = point
            # Transform to screen space (same as CarRacing's _render method)
            screen_x = int(WINDOW_W / 2 + (x - car.hull.position.x) * SCALE)
            screen_y = int(WINDOW_H / 2 + (y - car.hull.position.y) * SCALE)
            
            # Only add points that are visible on screen
            if 0 <= screen_x < WINDOW_W and 0 <= screen_y < WINDOW_H:
                screen_points.append((screen_x, screen_y))
        
        # Draw lines between consecutive points
        for i in range(len(screen_points) - 1):
            cv2.line(img, screen_points[i], screen_points[i + 1], color, thickness)
        
        # Connect the last point to the first (closed loop)
        if len(screen_points) > 1:
            cv2.line(img, screen_points[-1], screen_points[0], color, thickness)
        
        return img
    
    
    def step(self, action, last_action=None):
        observation, reward, done, truncated, info = self.env.step(action)
        
        if hasattr(info, "lap_finished"):
            self.info["lap_finished"] = info["lap_finished"]

        # ------------------
        #      GAS BIAS
        # ------------------
        gas = action[1]
        reward += gas * self.gas_reward
        
        # ------------------
        # WIGGLE PROTECTION
        # ------------------
        current_steering = action[0]  # Assuming action[0] is the steering
        if last_action is not None:
            # Additional reward for maintaining gas
            last_steering = last_action[0]
            wiggle = current_steering - last_steering
            if abs(wiggle) > self.wiggle_tolerance:
                reward -= abs(wiggle) * self.wiggle_penalty
        
        # ------------------
        # EARLY STOP LOGIC
        # ------------------
        wheels_on_road = self.check_early_stop(self.env)
        self.info["wheels_on_road"] = wheels_on_road
        if wheels_on_road == 0:
            self.info["consecutive_off_road"] += 1
        else:
            self.info["consecutive_off_road"] = 0
        
        
        if self.info["consecutive_off_road"] > cfg.MAX_OFF_ROAD_STEPS:
            truncated = True
            if self.verbose: print("Episode truncated due to excessive off-road time.")
        else: pass
        
        reward -= (4 - wheels_on_road) * self.off_road_wheel_penalty
        
        return observation, reward, done, truncated, self.info



  from pkg_resources import resource_stream, resource_exists


In [7]:
import gymnasium as gym
import stable_baselines3 as sb3
import numpy as np

env = gym.make("CarRacing-v3", render_mode="rgb_array") # For now rgb_array to allow manual play
env.reset()

env = CustomEnvironment(env)

for _ in range(1000):
    action = np.array([0.0, 1.0, 0.0])  # Example action: straight steering, full gas, no brake
    observation, reward, done, truncated, info = env.step(action)
    env.render()
    if done or truncated:
        env.reset()
        break

Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on road: 4
Wheels on 

In [10]:
import gymnasium as gym

# 1. Create the environment
env = gym.make("CarRacing-v3", render_mode="rgb_array")
env.reset()

# 2. Peel back the wrappers
raw_env = env.unwrapped

# 3. Print all attributes (variables and functions)
print("--- TOP LEVEL ATTRIBUTES ---")
print(dir(raw_env))

# 4. Investigate the 'car' object specifically
print("\n--- CAR ATTRIBUTES ---")
# I found 'car' in the list above, so now let's look inside it
print(dir(raw_env.car))

# 5. Investigate a wheel
print("\n--- WHEEL ATTRIBUTES ---")
# I found 'wheels' in the car list, let's look at the first wheel
print(dir(raw_env.car.wheels[0]))

--- TOP LEVEL ATTRIBUTES ---
['__annotations__', '__class__', '__class_getitem__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__orig_bases__', '__parameters__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_create_image_array', '_create_track', '_destroy', '_draw_colored_polygon', '_ezpickle_args', '_ezpickle_kwargs', '_init_colors', '_np_random', '_np_random_seed', '_reinit_colors', '_render', '_render_indicators', '_render_road', 'action_space', 'bg_color', 'car', 'clock', 'close', 'contactListener_keepref', 'continuous', 'domain_randomize', 'fd_tile', 'get_wrapper_attr', 'grass_color', 'has_wrapper_attr', 'invisible_state_window', 'invisible_video_window', 'isopen', 'lap_complete_percen

In [2]:
import gymnasium as gym
import stable_baselines3 as sb3
import numpy as np

env = gym.make("CarRacing-v3", render_mode="rgb_array") # For now rgb_array to allow manual play
env.reset()

env = CustomEnvironment(env)
optimal_path = env.get_optimal_path()

for _ in range(1000):
    action = env.action_space.sample()
    observation, reward, done, truncated, info = env.step(action)
    # Plot the optimal racing line on the observation with much thicker line
    observation = env.plot_optimal_line(observation, optimal_path, color=(0, 255, 0), thickness=8)
    
    # Resize the image to make it much larger
    observation_large = cv2.resize(observation, (600, 400), interpolation=cv2.INTER_LINEAR)
    
    cv2.imshow("CarRacing with Optimal Line", observation_large)
    cv2.waitKey(1)
