In [None]:
!pip install "stable-baselines3[extra]>=2.0.0a4"

Collecting stable-baselines3[extra]>=2.0.0a4
  Downloading stable_baselines3-2.2.0a11-py3-none-any.whl (181 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.7/181.7 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gymnasium<0.30,>=0.28.1 (from stable-baselines3[extra]>=2.0.0a4)
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
Collecting shimmy[atari]~=1.3.0 (from stable-baselines3[extra]>=2.0.0a4)
  Downloading Shimmy-1.3.0-py3-none-any.whl (37 kB)
Collecting autorom[accept-rom-license]~=0.6.1 (from stable-baselines3[extra]>=2.0.0a4)
  Downloading AutoROM-0.6.1-py3-none-any.whl (9.4 kB)
Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra]>=2.0.0a4)
  Downloading AutoROM.accept-rom-license-0.6.1.tar.gz (434 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [None]:
def point_in_polygon(point, polygon):
    x, y = point
    n = len(polygon)
    inside = False

    p1x, p1y = polygon[0]
    for i in range(n + 1):
        p2x, p2y = polygon[i % n]
        if y > min(p1y, p2y):
            if y <= max(p1y, p2y):
                if x <= max(p1x, p2x):
                    if p1y != p2y:
                        x_inters = (y - p1y) * (p2x - p1x) / (p2y - p1y) + p1x
                        if p1x == p2x or x <= x_inters:
                            inside = not inside
        p1x, p1y = p2x, p2y
    return inside

In [None]:
import numpy as np
import math
import gymnasium as gym
from gymnasium import spaces

class GolfEnv(gym.Env):
  def __init__(self):
    #Define the size of the grid
    self.grid_width = 200
    self.grid_height = 300
    self.shot_counter = 0

    #Define the action space as a Box space
    self.action_space = spaces.Box(
        low=np.array([0, 0]),  # Minimum values for direction and club-type
        high=np.array([180, 12]),  # Maximum values for direction and club-type
        dtype=np.float32
    )

    #Define the observation space
    self.observation_space = spaces.Dict({
          'x':spaces.Discrete(self.grid_width),
          'y':spaces.Discrete(self.grid_height)
      })

    #ball position
    self.ball_position = {
      'x': 50,
      'y': 0,
    }
    #green position
    self.green_left_corner = [self.grid_width-20, self.grid_height-20]
    self.green_right_corner = [self.grid_width, self.grid_height]

    #creating a dictionary to convert club to distance
    self.club_distances = {
            0: 200,
            1: 180,
            2: 170,
            3: 160,
            4: 150,
            5: 145,
            6: 138,
            7: 127,
            8: 120,
            9: 110,
            10: 97,
            11: 85,
            12: 55,
        }


  def step(self, action):
    # Get the direction and club type from the action
    direction = action[0]
    club_type = int(action[1])

    # Use direction to calculate the new position
    angle_rad = math.radians(direction)
    delta_x = self.club_distances[club_type] * math.cos(angle_rad)
    delta_y = self.club_distances[club_type] * math.sin(angle_rad)

    # Calculate new position
    new_x = self.ball_position['x'] + delta_x
    new_y = self.ball_position['y'] + delta_y

    new_x = round(new_x)
    new_y = round(new_y)

    #sets the reward to 0
    reward = 0
    terminated=False
    truncated=False
    #checks if the ball position is in the observation space and if not it sets it to the closest edge
    if new_x>self.observation_space['x'].n:
      reward = reward-10
      new_x=self.observation_space['x'].n
    if new_x<0:
      reward = reward-10
      new_x=0
    if new_y>self.observation_space['y'].n:
      reward = reward-10
      new_y=self.observation_space['y'].n

    #sets the ball position to the new position
    self.ball_position['x']=new_x
    self.ball_position['y']=new_y

    #checks if the ball position is on the green
    if (self.green_left_corner[0]<=self.ball_position['x']>=self.green_right_corner[0]) and (self.green_left_corner[1]<=self.ball_position['y']>=self.green_right_corner[1]):
      reward=10
      terminated = True

    bunker = [(25, 53), (26, 42), (36, 34), (49, 33), (56, 44), (78, 95), (86, 138), (86, 45), (78, 147), (67, 142), (62, 148), (60, 176), (42, 186), (31, 176), (29, 168), (30, 150), (57, 180), (58, 111), (29, 60)]

    ptc = (self.ball_position['x'], self.ball_position['y'])

    if point_in_polygon(ptc, bunker):
      reward=10

    #adds a shot to the shot counter
    self.shot_counter+=1

    #limit to 10 shots per round
    if self.shot_counter>=10:
      truncated=True

    observation = {
        'x': self.ball_position['x'],
        'y': self.ball_position['y']
    }

    return observation, reward, terminated, truncated, {}

  def reset(self, seed=None, options=None):
    #sets ball position to the middle of the start of the hole
    self.ball_position = {
        'x': 50,
        'y': 0,
    }

    observation = {
        'x': self.ball_position['x'],
        'y': self.ball_position['y']
    }

    return observation, {}









In [None]:
from stable_baselines3.common.env_checker import check_env

  from tensorflow.tsl.python.lib.core import pywrap_ml_dtypes


In [None]:
check_env(env, warn=True)

NameError: ignored

In [None]:
env = GolfEnv()
observation = env.reset()
action=np.array([90,12])

observation, reward, done, truncated, info = env.step(action)

print(observation, reward)
print(action)

{'x': 50, 'y': 55} 10
[90 12]
