**TESTING ENV RENDER AND IMAGE**

In [18]:
import gymnasium as gym
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
import numpy as np
import base64
from PIL import Image
import io


env = gym.make("parking-v0", render_mode="rgb_array")
obs = env.reset()
print(obs[1])
rendered = env.render()
print(rendered)

# Convert numpy array to PIL Image
image = Image.fromarray(rendered)

# Save the image to a BytesIO object
buffer = io.BytesIO()
image.save(buffer, format='JPEG')

# Get the content of the BytesIO object
image_bytes = buffer.getvalue()

# Encode the bytes to base64
image_base64 = base64.b64encode(image_bytes)

# If you need it as a string
image_base64_str = image_base64.decode('utf-8')
print(image_base64)

{'speed': 0, 'crashed': False, 'action': array([ 0.13548748, -0.39294848], dtype=float32), 'is_success': False}
[[[100 100 100]
  [100 100 100]
  [100 100 100]
  ...
  [100 100 100]
  [100 100 100]
  [100 100 100]]

 [[100 100 100]
  [100 100 100]
  [100 100 100]
  ...
  [100 100 100]
  [100 100 100]
  [100 100 100]]

 [[100 100 100]
  [100 100 100]
  [100 100 100]
  ...
  [100 100 100]
  [100 100 100]
  [100 100 100]]

 ...

 [[100 100 100]
  [100 100 100]
  [100 100 100]
  ...
  [100 100 100]
  [100 100 100]
  [100 100 100]]

 [[100 100 100]
  [100 100 100]
  [100 100 100]
  ...
  [100 100 100]
  [100 100 100]
  [100 100 100]]

 [[100 100 100]
  [100 100 100]
  [100 100 100]
  ...
  [100 100 100]
  [100 100 100]
  [100 100 100]]]
b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAEsAlgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEA

: 

In [10]:
import os
from openai import OpenAI
from openaikey import OPENAI_API_KEY

client = OpenAI(api_key=OPENAI_API_KEY)


env_description = """
Here is an image of a top-down view of a parking lot. 
The parking area is surrounded by a yellow border.
The parking lot has numerous empty parking spots marked by white lines on a gray surface. 
There is a green car which can be controlled via throttle and steering angle. 
There is a blue object which represents the goal parking spot where we would like the car to end up within the neighboring white lines. 
Please describe a reward function that would incentivize the car to park in the blue spot at the right angle.
"""

prompt = """
You are an assistant tasked with turning language subgoals into machine readable code. You will be given a list of text subgoals, and you must translate these subgoals into code that takes in an observation of the format:

[POSITION_X, POSITION_Y, VELOCITY_X, VELOCITY_Y, cos(CAR_ANGLE), sin(CAR_ANGLE)]

Each subgoal will use some of these constants, so you can directly take them and use them in the reward function to try to attain the desired subgoal.

Output each subgoal as a python function that takes in the observation and returns a reward function that prioritizes the specific subgoal. This reward function should be dense; it should make the agent want to move closer to the specific subgoal. You can do this by using a distance metric or similar function.

Please respond only with Yes or No followed by only the code for the reward function which is defined by def reward_function(obs).
"""

response = client.chat.completions.create(
  model="gpt-4-turbo",
  messages = [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": env_description,
        },
        {
          "type": "image_url",
          "image_url": {
            "url": f"data:image/jpeg;base64,{image_base64_str}",
            "detail": "low"
          },
        },
        {
          "type": "text",
          "text": prompt,
        },
      ],
    }
  ],
  max_tokens=300,
)
print(response.choices[0].message.content)

Yes

```python
import numpy as np

def reward_function(obs):
    # Constants for the goal's position
    GOAL_X = 10  # Example values, you need to set these according to the scenario
    GOAL_Y = 10
    
    # Extract car's position and angle from observation
    pos_x, pos_y, _, _, cos_angle, sin_angle = obs
    
    # Calculate the distance to the goal
    distance_to_goal = np.sqrt((pos_x - GOAL_X)**2 + (pos_y - GOAL_Y)**2)
    
    # Angle of the car, arctan2 handles the quadrants correctly
    car_angle = np.arctan2(sin_angle, cos_angle)
    
    # Desired angle for the car when parked correctly, assuming the goal is aligning with the y-axis.
    desired_angle = np.pi / 2  # Update if different alignment is required e.g., parallel to x or y axis
    
    # Calculate the angular difference to the desired angle
    angular_difference = np.abs((car_angle - desired_angle) % (2 * np.pi))
    
    # Penalize both distance to the goal and the angular difference
    # Negative exponentia