In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pygame
import random
import os
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
import time
import pandas as pd
import matplotlib.pyplot as plt
from stable_baselines3.common.callbacks import EvalCallback


width_global = 20
height_global = 20

class CrossyRoadEnv(gym.Env):
    metadata = {"render_modes": ["human"], "render_fps": 12}

    def __init__(self, render_mode="human"):
        super().__init__()

        self.width = width_global
        self.height = height_global
        self.max_steps = self.width * self.height
        self.render_mode = render_mode
        
        self.observation_space = spaces.Box(low=0, high=4, shape=(self.height * self.width,), dtype=np.int32)
        self.action_space = spaces.Discrete(5)

        self.success_count = 0
        self.last_positions = []

        if self.render_mode == "human":
            pygame.init()
            self.screen = pygame.display.set_mode((self.width * 40, self.height * 40))
            self.clock = pygame.time.Clock()
            self.font = pygame.font.SysFont(None, 24)

    def seed(self, seed=None):
        self.np_random, _ = gym.utils.seeding.np_random(seed)
        random.seed(seed)
        np.random.seed(seed)

    def _get_obs(self):
        grid = np.zeros((self.height, self.width), dtype=np.int32)
        grid[self.agent_pos[1], self.agent_pos[0]] = 1
        for rock in self.rocks:
            grid[rock[1], rock[0]] = 2
        for row in self.car_rows:
            grid[row, :] = 4
        for car in self.cars:
            grid[car[1], car[0]] = 3
        return grid.flatten()

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.seed(seed)

        self.agent_pos = [self.width // 2, self.height - 1]
        self.done = False
        self.score = 0
        self.steps = 0
        self.last_positions = []

        self.rocks = []
        self.cars = []
        self.car_rows = []
        self.car_directions = {}

        self.passed_top_bonus = False

        forbidden_rows = [0, self.height - 1, self.height - 2]

        rocks_count = int(np.random.randint(0.03 * self.width * self.height, 0.04 * self.width * self.height))
        attempts = 0
        while len(self.rocks) < rocks_count and attempts < 500:
            rx = random.randint(1, self.width - 2)
            ry = random.randint(1, self.height - 3)
            if ry not in forbidden_rows and [rx, ry] not in self.rocks and [rx, ry] != self.agent_pos:
                self.rocks.append([rx, ry])
            attempts += 1

        potential_car_rows = [y for y in range(1, self.height - 2) if y not in forbidden_rows and y not in [r[1] for r in self.rocks]]
        random.shuffle(potential_car_rows)
        self.car_rows = potential_car_rows[:3]

        for row in self.car_rows:
            self.car_directions[row] = random.choice(["left", "right"])
            for _ in range(random.randint(1, 2)):
                cx = random.randint(0, self.width - 1)
                self.cars.append([cx, row])

        return self._get_obs(), {}

    def step(self, action):
        if self.done:
            return self._get_obs(), 0, True, False, {"success_count": self.success_count}
    
        reward = 0.0
        self.steps += 1
    
        x, y = self.agent_pos
        new_x, new_y = x, y
    
        if action == 0 and y > 0:
            new_y -= 1
        elif action == 1 and y < self.height - 1:
            new_y += 1
        elif action == 2 and x > 0:
            new_x -= 1
        elif action == 3 and x < self.width - 1:
            new_x += 1
        elif action == 4:
            reward -= 0.05 
    
        if [new_x, new_y] in self.rocks:
            reward -= 0.2
        else:
            self.agent_pos = [new_x, new_y]
    
        new_cars = []
        for x, y in self.cars:
            if self.car_directions[y] == "left":
                x = (x - 1) % self.width
            else:
                x = (x + 1) % self.width
            new_cars.append([x, y])
        self.cars = new_cars
    
        if self.agent_pos in self.cars:
            reward -= 0.5  
            self.done = True
            return self._get_obs(), reward, self.done, False, {"success_count": self.success_count}
    
        if self.agent_pos[1] > y:
            reward -= 0.3  
        elif self.agent_pos[1] < y:
            reward += 0.3  
        
        if not self.passed_top_bonus and self.agent_pos[1] <= int(0.2 * self.height):
            reward += 2.0
            self.passed_top_bonus = True

 
    
        if self.agent_pos[1] in [15, 10, 5]:
            reward += 1.0
    
        self.last_positions.append(tuple(self.agent_pos))
        if len(self.last_positions) > 10:
            self.last_positions.pop(0)
            if all(pos == tuple(self.agent_pos) for pos in self.last_positions):
                reward -= 0.5
    
        if self.agent_pos[1] == 0:
            reward += 10.0
            self.done = True
            self.success_count += 1
        elif self.agent_pos[1] == self.height // 2:
            reward += 5.0
    
        if self.steps >= self.max_steps:
            self.done = True
            

        return self._get_obs(), reward, self.done, False, {"success_count": self.success_count}



    def render(self):
        if self.render_mode != "human":
            return

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                exit()

        self.screen.fill((170, 220, 60))

        for row in self.car_rows:
            for x in range(self.width):
                pygame.draw.rect(self.screen, (50, 50, 50), (x * 40, row * 40, 40, 40))

        for rock in self.rocks:
            pygame.draw.rect(self.screen, (100, 100, 100), (rock[0] * 40, rock[1] * 40, 40, 40))

        for car in self.cars:
            pygame.draw.rect(self.screen, (255, 0, 0), (car[0] * 40, car[1] * 40, 40, 40))

        pygame.draw.circle(self.screen, (255, 200, 0),
                           (self.agent_pos[0] * 40 + 20, self.agent_pos[1] * 40 + 20), 20)

        text = self.font.render(f"Steps: {self.steps}  Successes: {self.success_count}", True, (0, 0, 0))
        self.screen.blit(text, (10, 10))

        pygame.display.flip()
        self.clock.tick(self.metadata["render_fps"])

    def close(self):
        if self.render_mode == "human":
            pygame.quit()


In [None]:
os.makedirs("models", exist_ok=True)
os.makedirs("models/best_model", exist_ok=True)
os.makedirs("logs", exist_ok=True)

def make_env():
    env = CrossyRoadEnv(render_mode=None)
    return Monitor(env, filename=os.path.join("logs", "monitor.csv"))

train_env = DummyVecEnv([make_env])
eval_env = DummyVecEnv([make_env])

check_env(CrossyRoadEnv(render_mode=None), warn=True)

model = PPO(
    "MlpPolicy",
    train_env,
    verbose=1,
    n_steps=2048,
    batch_size=64,
    learning_rate=2.5e-4,
    gamma=0.99,
    device="cpu"
)

eval_callback = EvalCallback(
    eval_env,
    best_model_save_path="./models/best_model",
    log_path="./logs",
    eval_freq=100_000,
    n_eval_episodes=10,
    deterministic=True,
    render=False
)

model.learn(total_timesteps=1_000_000, callback=eval_callback)

model.save("models/crossy_ppo")

print("✅ Trening zakończony. Model końcowy oraz najlepszy model zapisane.")

monitor_file = os.path.join("logs", "monitor.csv")
df = pd.read_csv(monitor_file, skiprows=1)

df["timestep"] = df["l"].cumsum()

plt.figure(figsize=(10, 5))
plt.plot(df["timestep"], df["r"], label="Reward per episode")
plt.xlabel("Timesteps")
plt.ylabel("Reward")
plt.title("Postęp treningu agenta PPO")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.savefig("logs/training_plot.png")  
plt.show()


In [None]:
env = CrossyRoadEnv(render_mode=None)
model = PPO.load("models/best_model/best_model", device="cpu")

timeout_seconds = 24
all_successes = 0
all_episodes = 0
tests = 5

for i in range(tests):

    success_count = 0
    total_episodes = 0
    start_time = time.time()

    obs, _ = env.reset()
    done = False
    while (time.time() - start_time) < timeout_seconds:
        obs, _ = env.reset()
        done = False

        while not done:
            action, _ = model.predict(obs)
            action = int(action) 
            obs, reward, done, _, _ = env.step(action)
            env.render()

            if done :
                total_episodes += 1
                if  env.agent_pos[1] == 0:
                    success_count += 1
            

    print(f"✔️ Test nr {i+1} zakończony. Procent sukcesów: {(success_count / total_episodes) * 100:.2f}%")
    all_successes += success_count
    all_episodes += total_episodes

print(f"Łączna liczba sukcesów we wszystkich testach: {all_successes}")
print(f"Średnia liczba sukcesów na test: {all_successes / tests:.2f}")
print(f"Średni procent sukcesów: {(all_successes / all_episodes) * 100:.2f}%")


env.close()
