## Snake Modell

In [1]:
import gymnasium as gym
import numpy as np
import Snake
from gymnasium.spaces import Discrete, Box
from typing import Optional

In [6]:
class SnakePlayer(gym.Env):
    def __init__(self):
        self.game = Snake.SnakeGame()
        self.action_space = Discrete(4)
        self.observation_space = Box(0, 100, shape=(self.game.gridSize[0], self.game.gridSize[1]), dtype=int)
        self.state = self.game.grid
        self.lastScore = 0
        
    def step(self, action):
        self.game.nextDirection = action

        self.game.step()

        reward = 0
        if self.game.score > self.lastScore:
            reward = 5
            self.lastScore = self.game.score

        self.state = self.game.grid

        return np.array(self.state), reward, self.game.death, False, {"Steps": self.game.steps, "Score": self.game.score}
    
    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None,):
        if hasattr(self.game, "window"):
            try:
                self.game.window.destroy()
            except: pass
        
        self.game = Snake.SnakeGame()
        self.state = self.game.grid
        self.lastScore = 0
        return (np.array(self.state), {})
        
    def render(self):
        self.game.setupUI()
        self.game.window.update()
        pass
        
    def close(self):
        pass

In [7]:
env = SnakePlayer()

In [11]:
durchgänge = 5  # Anzahl der Durchgänge
for i in range(durchgänge):  # Teste die Umgebung X mal
    state = env.reset()  # Setze die Umgebung für den nächsten Durchlauf zurück
    done = False  # Brich den Prozess ab, wenn z.B. die Zeit abläuft
    score = 0

    while not done:
        # env.render()  # Zeichne die Umgebung
        action = env.action_space.sample()  # Nimmt ein Random Element aus dem Action Space (Links oder Rechts)
        state, reward, done, term, info = env.step(action)  # state: Observation Space, reward: Belohnung, done: Fertig Ja/Nein, term: Termination (vorzeitiges Beenden), info: Extra Infos
        score += reward
    print(f"Durchgang {i}, Score: {score}, Info: {info}")
env.close()

Durchgang 0, Score: 0, Info: {'Steps': 294, 'Score': 0}
Durchgang 1, Score: 0, Info: {'Steps': 53, 'Score': 0}
Durchgang 2, Score: 0, Info: {'Steps': 45, 'Score': 0}
Durchgang 3, Score: 0, Info: {'Steps': 38, 'Score': 0}
Durchgang 4, Score: 0, Info: {'Steps': 56, 'Score': 0}
Durchgang 5, Score: 5, Info: {'Steps': 32, 'Score': 1}
Durchgang 6, Score: 0, Info: {'Steps': 65, 'Score': 0}
Durchgang 7, Score: 0, Info: {'Steps': 17, 'Score': 0}
Durchgang 8, Score: 0, Info: {'Steps': 43, 'Score': 0}
Durchgang 9, Score: 0, Info: {'Steps': 42, 'Score': 0}
Durchgang 10, Score: 0, Info: {'Steps': 57, 'Score': 0}
Durchgang 11, Score: 0, Info: {'Steps': 19, 'Score': 0}
Durchgang 12, Score: 0, Info: {'Steps': 40, 'Score': 0}
Durchgang 13, Score: 0, Info: {'Steps': 38, 'Score': 0}
Durchgang 14, Score: 0, Info: {'Steps': 41, 'Score': 0}
Durchgang 15, Score: 0, Info: {'Steps': 18, 'Score': 0}
Durchgang 16, Score: 0, Info: {'Steps': 50, 'Score': 0}
Durchgang 17, Score: 5, Info: {'Steps': 38, 'Score': 1}
D

In [12]:
from stable_baselines3.common.env_checker import check_env

In [13]:
check_env(env, warn=True)



In [14]:
from stable_baselines3 import PPO

In [15]:
env = SnakePlayer()

In [16]:
model = PPO("MlpPolicy", env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [17]:
model.learn(total_timesteps=100000)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 57.5     |
|    ep_rew_mean     | 0.714    |
| time/              |          |
|    fps             | 433      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 54.3        |
|    ep_rew_mean          | 0.667       |
| time/                   |             |
|    fps                  | 275         |
|    iterations           | 2           |
|    time_elapsed         | 14          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.020251282 |
|    clip_fraction        | 0.149       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -0.871      |
|    learning_rate        | 0.

<stable_baselines3.ppo.ppo.PPO at 0x15b885a64a0>

In [19]:
model.save("Models/Snake")

In [22]:
env = SnakePlayer()

In [25]:
model.load("Models/SnakeDemo", env=env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


<stable_baselines3.ppo.ppo.PPO at 0x15b89a67970>

In [27]:
durchgaenge = 10  # Anzahl Durchgänge der Umgebung
for d in range(durchgaenge):  # Teste die Umgebung X mal
    observation = env.reset()  # Setzt die Umgebung zurück und gibt einige Werte zurück
    done = False  # Umgebungen können Zeitlimitiert sein (hier 200ms)
    score = 0
    
    while not done:
        env.render()  # Zeichne die Umgebung (falls möglich)
        if len(observation) == 2:
            observation = observation[0]
        action, _ = model.predict(observation)  # 2 Parameter: Action, next State (in wiederkehrenden Modellen)
        observation, reward, done, term, info = env.step(action)
        score += reward
    print(f"Durchgang: {d}, Score: {score}, Steps: {info}")
env.close()

Durchgang: 0, Score: 0, Steps: {'Steps': 107, 'Score': 0}
Durchgang: 1, Score: 0, Steps: {'Steps': 46, 'Score': 0}


KeyboardInterrupt: 