In [None]:
%pip list

In [10]:
import pygame
import gymnasium as gym
import random
import numpy as np
from gymnasium import Env, spaces
from typing import Optional

class ParkingEnv(Env):
    metadata = {"render_modes": ["human"], "render_fps": 20}

    def __init__(self, render_mode: Optional[str] = None):
        super(ParkingEnv, self).__init__()
        self.SCREEN_WIDTH = 500
        self.SCREEN_HEIGHT = 500
        self.WHITE = (255, 255, 255)
        self.BLACK = (0, 0, 0)
        self.RED = (255, 0, 0)
        self.GREEN = (0, 255, 0)
        
        self.car_width = 50
        self.car_height = 50
        self.car_speed = 20

        self.action_space = spaces.Discrete(4)  # [left, right, up, down]
        self.observation_space = spaces.Box(
            low=0,
            high=max(self.SCREEN_WIDTH, self.SCREEN_HEIGHT),
            shape=(2,),  # Car's x, y coordinates
            dtype=np.float32
        )
        
        self.render_mode = render_mode
        self.screen = None
        self.clock = None

        self.obstacles = []
        self.destinations = []
        
        self.reset()
        self.times = 0

    def reset(self):
        self.car_x = random.randint(0, self.SCREEN_WIDTH - self.car_width)
        self.car_y = random.randint(0, self.SCREEN_HEIGHT - self.car_height)
        self.destinations = self._create_destinations(5)
        self.obstacles = self._create_obstacles(10)
        self.game_over = False

        state = np.array([self.car_x, self.car_y], dtype=np.float32)
        return state

    def step(self, action):
        if action == 0 and self.car_x > 0:  # Left
            self.car_x -= self.car_speed
        elif action == 1 and self.car_x < self.SCREEN_WIDTH - self.car_width:  # Right
            self.car_x += self.car_speed
        elif action == 2 and self.car_y > 0:  # Up
            self.car_y -= self.car_speed
        elif action == 3 and self.car_y < self.SCREEN_HEIGHT - self.car_height:  # Down
            self.car_y += self.car_speed

        reward = 1  # Default reward for moving into the white area
        done = False
        
        if self._check_collision():
            reward -= 10
            self.times += 1
            print(f"{self.times}: -10")  # Penalty for hitting an obstacle

        for dest_x, dest_y in self.destinations:
            if self._is_car_parked(dest_x, dest_y):
                reward += 100
                self.times += 1
                print(f"{self.times}: +100")  # Reward for parking
                done = True

        state = np.array([self.car_x, self.car_y], dtype=np.float32)
        return state, reward, done, {}


    def render(self):
        if self.render_mode is None:
            return
        
        if self.screen is None:
            pygame.init()
            self.screen = pygame.display.set_mode((self.SCREEN_WIDTH, self.SCREEN_HEIGHT))
            self.clock = pygame.time.Clock()
        
        self.screen.fill(self.WHITE)
        self._draw_car()
        self._draw_obstacles()
        self._draw_destinations()
        
        pygame.display.flip()
        self.clock.tick(self.metadata["render_fps"])

    def close(self):
        if self.screen is not None:
            pygame.quit()
            self.screen = None
            self.clock = None

    def _create_obstacles(self, num_obstacles):
        obstacles = []
        for _ in range(num_obstacles):
            while True:
                obstacle_x = random.randint(0, self.SCREEN_WIDTH - 50)
                obstacle_y = random.randint(0, self.SCREEN_HEIGHT - 50)
                obstacle_rect = pygame.Rect(obstacle_x, obstacle_y, 50, 50)
                if not self._check_rect_collision(self.car_x, self.car_y, obstacle_rect) and not any(self._check_rect_collision(dest_x, dest_y, obstacle_rect) for dest_x, dest_y in self.destinations):
                    obstacles.append([obstacle_x, obstacle_y, 50, 50])
                    break
        return obstacles

    def _create_destinations(self, num_destinations):
        destinations = []
        for _ in range(num_destinations):
            while True:
                dest_x = random.randint(0, self.SCREEN_WIDTH - 50)
                dest_y = random.randint(0, self.SCREEN_HEIGHT - 50)
                dest_rect = pygame.Rect(dest_x - 25, dest_y - 25, 50, 50)
                if not self._check_rect_collision(self.car_x, self.car_y, dest_rect) and not any(self._check_rect_collision(dest_x, dest_y, pygame.Rect(obstacle[0], obstacle[1], obstacle[2], obstacle[3])) for obstacle in self.obstacles):
                    destinations.append((dest_x, dest_y))
                    break
        return destinations

    def _draw_car(self):
        pygame.draw.rect(self.screen, self.RED, [self.car_x, self.car_y, self.car_width, self.car_height])

    def _draw_obstacles(self):
        for obstacle in self.obstacles:
            pygame.draw.rect(self.screen, self.BLACK, obstacle)

    def _draw_destinations(self):
        for dest_x, dest_y in self.destinations:
            pygame.draw.rect(self.screen, self.GREEN, [dest_x - 25, dest_y - 25, 50, 50])

    def _check_collision(self):
        car_rect = pygame.Rect(self.car_x, self.car_y, self.car_width, self.car_height)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], obstacle[2], obstacle[3])
            if car_rect.colliderect(obstacle_rect):
                return True
        return False

    def _is_car_parked(self, dest_x, dest_y):
        car_rect = pygame.Rect(self.car_x, self.car_y, self.car_width, self.car_height)
        dest_rect = pygame.Rect(dest_x - 25, dest_y - 25, 50, 50)
        intersection = car_rect.clip(dest_rect)
        if intersection.width * intersection.height >= 0.50 * self.car_width * self.car_height:
            return True
        return False

    def _check_rect_collision(self, x, y, rect):
        temp_rect = pygame.Rect(x, y, self.car_width, self.car_height)
        return temp_rect.colliderect(rect)
    


if __name__ == "__main__":
    env = ParkingEnv(render_mode="human")
    env.reset()
    running = True

    # Initialize pygame
    pygame.init()
    env.render()

    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
            elif event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE:
                running = False
        
        action = env.action_space.sample()  # Random action
        _, _, done, _ = env.step(action)
        env.render()
    env.close()

1: +100
2: -10
3: -10
4: -10
5: -10
6: -10
7: -10
8: -10
9: -10
10: -10
11: -10
12: +100
13: -10
14: -10
15: -10
16: -10
17: -10
18: -10
19: -10
20: -10
21: -10
22: -10
23: -10
24: -10
25: -10
26: -10
27: -10
28: -10
29: -10
30: -10
31: -10
32: -10
33: -10
34: -10
35: -10
36: -10
37: -10
38: -10
39: -10
40: -10
41: -10
42: -10
43: -10
44: -10
45: -10
46: -10
47: -10
48: -10
49: -10
50: -10
51: -10
52: -10
53: -10
54: -10
55: -10
56: -10
57: -10
58: -10
59: -10
60: -10
61: -10
62: -10
63: -10
64: -10
65: -10
66: -10


KeyboardInterrupt: 

In [4]:
import pygame
import random
import numpy as np
from gymnasium import Env, spaces
from typing import Optional
from stable_baselines3 import PPO
import math

class ParkingEnv(Env):
    metadata = {"render_modes": ["human"], "render_fps": 240}

    def __init__(self, render_mode: Optional[str] = None):
        super().__init__()
        self.SCREEN_WIDTH = 400
        self.SCREEN_HEIGHT = 600
        self.DARK_GREEN = (0, 100, 0)
        self.BLACK = (0, 0, 0)
        self.RED = (255, 0, 0)
        
        self.road_width = 100
        self.car_size = 40
        self.car_speed = 50

        self.action_space = spaces.Discrete(4)  # [left, right, up, down]
        self.observation_space = spaces.Box(
            low=0,
            high=max(self.SCREEN_WIDTH, self.SCREEN_HEIGHT),
            shape=(2,),  # Car's x, y coordinates
            dtype=np.float32
        )
        
        self.render_mode = render_mode
        self.screen = None
        self.clock = None
        
        self.reset()
        self.times = 0
        self.human_feedback = 0

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.car_x = self.SCREEN_WIDTH // 2 - self.car_size // 2
        self.car_y = self.SCREEN_HEIGHT - self.car_size - 10
        self.game_over = False

        state = np.array([self.car_x, self.car_y], dtype=np.float32)
        return state, {}  # Return state and an empty info dict

    def step(self, action):
        if action == 0:  # Left
            self.car_x -= self.car_speed
        elif action == 1:  # Right
            self.car_x += self.car_speed
        elif action == 2:  # Up
            self.car_y -= self.car_speed
        elif action == 3:  # Down
            self.car_y += self.car_speed

        # Ensure car stays within screen bounds
        self.car_x = max(0, min(self.car_x, self.SCREEN_WIDTH - self.car_size))
        self.car_y = max(0, min(self.car_y, self.SCREEN_HEIGHT - self.car_size))

        reward = self._calculate_reward()
        done = False
        
        if self.car_y <= 10 and abs(self.car_x - self.SCREEN_WIDTH // 2) < self.road_width // 2:
            reward += 100
            done = True
            self.times += 1
            print(f"{self.times}: Parked, reward = {reward}")

        state = np.array([self.car_x, self.car_y], dtype=np.float32)
        return state, reward + self.human_feedback, done, False, {}

    def _calculate_reward(self):
        if self._is_on_road():
            return 1  # Positive reward for staying on the road
        else:
            return -10  # Negative reward for being off the road

    def _is_on_road(self):
        center_x, center_y = self.SCREEN_WIDTH // 2, self.SCREEN_HEIGHT // 2
        car_center_x = self.car_x + self.car_size // 2
        car_center_y = self.car_y + self.car_size // 2
        
        # Calculate distance from car center to road center
        dx = abs(car_center_x - center_x)
        dy = abs(car_center_y - center_y)
        
        # Check if car is on the vertical part of the 8
        if dx < self.road_width // 2:
            return True
        
        # Check if car is on the curved part of the 8
        radius = self.SCREEN_HEIGHT // 4
        upper_center_y = center_y - radius
        lower_center_y = center_y + radius
        
        dist_upper = math.sqrt((car_center_x - center_x)**2 + (car_center_y - upper_center_y)**2)
        dist_lower = math.sqrt((car_center_x - center_x)**2 + (car_center_y - lower_center_y)**2)
        
        return min(dist_upper, dist_lower) < self.road_width // 2

    def render(self):
        if self.render_mode is None:
            return False
        
        if self.screen is None:
            pygame.init()
            self.screen = pygame.display.set_mode((self.SCREEN_WIDTH, self.SCREEN_HEIGHT))
            self.clock = pygame.time.Clock()
        
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                self.close()
                return True
            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_ESCAPE:
                    self.close()
                    return True
                elif event.key == pygame.K_UP:
                    self.human_feedback += 1
                    print("Positive feedback given")
                elif event.key == pygame.K_DOWN:
                    self.human_feedback -= 1
                    print("Negative feedback given")
        
        self.screen.fill(self.DARK_GREEN)
        self._draw_road()
        self._draw_car()
        
        pygame.display.flip()
        self.clock.tick(self.metadata["render_fps"])
        return False

    def close(self):
        if self.screen is not None:
            pygame.quit()
            self.screen = None
            self.clock = None

    def _draw_road(self):
        center_x, center_y = self.SCREEN_WIDTH // 2, self.SCREEN_HEIGHT // 2
        radius = self.SCREEN_HEIGHT // 4
        
        # Draw vertical part of 8
        pygame.draw.rect(self.screen, self.BLACK, [center_x - self.road_width // 2, 0, self.road_width, self.SCREEN_HEIGHT])
        
        # Draw upper circle of 8
        pygame.draw.circle(self.screen, self.BLACK, (center_x, center_y - radius), radius, self.road_width)
        
        # Draw lower circle of 8
        pygame.draw.circle(self.screen, self.BLACK, (center_x, center_y + radius), radius, self.road_width)

    def _draw_car(self):
        pygame.draw.rect(self.screen, self.RED, [self.car_x, self.car_y, self.car_size, self.car_size])

if __name__ == "__main__":
    from stable_baselines3 import PPO

    # Initialize the environment
    env = ParkingEnv(render_mode="human")

    # Define the PPO model
    model = PPO("MlpPolicy", env, verbose=1)

    # Train the model
    model.learn(total_timesteps=10000)  # Increased training time

    # Test the model
    num_runs = 10000
    for run in range(num_runs):
        obs, _ = env.reset()
        done = False
        total_reward = 0
        steps = 0
        
        while not done:
            action, _states = model.predict(obs)
            obs, reward, done, truncated, _info = env.step(action)
            total_reward += reward
            steps += 1
            should_exit = env.render()
            if should_exit:
                env.close()
                exit()
        
        print(f"Run {run + 1}/{num_runs} - Steps: {steps}, Total Reward: {total_reward}")
        
        # Small delay between runs to make it easier to see
        pygame.time.wait(1000)

    env.close()

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
1: Parked, reward = 101
2: Parked, reward = 101
3: Parked, reward = 101
4: Parked, reward = 101
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 462       |
|    ep_rew_mean     | -2.98e+03 |
| time/              |           |
|    fps             | 1609      |
|    iterations      | 1         |
|    time_elapsed    | 1         |
|    total_timesteps | 2048      |
----------------------------------
5: Parked, reward = 101
6: Parked, reward = 101
7: Parked, reward = 101
8: Parked, reward = 101
9: Parked, reward = 101
10: Parked, reward = 101
11: Parked, reward = 101
12: Parked, reward = 101
13: Parked, reward = 101
14: Parked, reward = 101
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 266          |
|    ep_rew_mean          | -1.71e+03    |
| time/                   |              |

KeyboardInterrupt: 

: 