## Doodle Jump Modell

In [106]:
import gymnasium as gym
import numpy as np
from DoodleJump import DoodleJumpGame

from typing import Optional
from gymnasium.spaces import Discrete, Box, Dict, Sequence
from stable_baselines3.common.env_checker import check_env

In [197]:
class DoodleJumpPlayer(gym.Env):
    def __init__(self):
        self.action_space = Discrete(2)
        self.observation_space = Dict({ "Platforms": Sequence(Box(0, 800, shape=(3,), dtype=np.int16), stack=True), "PlayerPosX": Box(0, 770, dtype=np.int16), "JumpTicks": Box(0, 150, dtype=np.int16) })
        self.game = DoodleJumpGame()
        self.state = self.getState()

    def step(self, action):
        self.game.step()
        
        previousPlayerPosX = self.game.playerPosX
        if action == 0:
            self.game.playerPosX -= 3
        elif action == 1:
            self.game.playerPosX += 3

        # näheste Plattform: in X Richtung erreichbar, mit den JumpTicks erreichbar
        leastDistance = 9999999
        closestPlatform = None
        for p in self.game.platforms:
            if p[1] - (self.game.playerPosY + (self.game.jumpTicks * 3)) < 0:
                continue
            if p[0] < self.game.playerPosX:
                distance = p[0] + p[2] - self.game.playerPosX
            elif p[0] > self.game.playerPosX:
                distance = p[0] - self.game.playerPosX
            else:
                distance = 0
                
            if distance < leastDistance:
                leastDistance = distance
                closestPlatform = p

        reward = 0
        if closestPlatform != None:
            if action == 0:
                targetX = closestPlatform[0] + closestPlatform[2]
                if self.game.playerPosX - targetX < previousPlayerPosX - targetX:
                    reward = 1
            if action == 1:
                targetX = closestPlatform[0]
                if targetX - self.game.playerPosX < targetX - previousPlayerPosX:
                    reward = 1
        
        return self.getState(), reward, self.game.death, self.game.death, { "ingame_score": self.game.highestHeight }

    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
        self.close()
        self.game = DoodleJumpGame()
        self.state = self.getState()
        return (self.state, {})

    def render(self):
        self.game.setupUI()
        self.game.window.update()

    def close(self):
        if hasattr(self.game, "window"):
            try:
                self.game.window.destroy()
            except: pass

    def getState(self) -> dict:
        print(np.array(self.game.platforms))
        return dict({ "Platforms": np.array(self.game.platforms), "PlayerPosX": np.array(self.game.playerPosX, ndmin=1, dtype=np.int16), "JumpTicks": np.array(self.game.jumpTicks, ndmin=1, dtype=np.int16) })

In [198]:
env = DoodleJumpPlayer()

[[541  85 100]
 [203 170 117]
 [499 255 101]
 [558 340 112]
 [324 425 120]
 [226 510 107]
 [355 595 113]]


In [199]:
env.observation_space.sample()

OrderedDict([('JumpTicks', array([87], dtype=int16)),
             ('Platforms',
              array([[141, 440, 598],
                     [ 58, 532, 200],
                     [631, 178, 791],
                     [128, 755,  50],
                     [320, 680, 398],
                     [793, 208,  90],
                     [152, 386, 583],
                     [333,  33, 329],
                     [201, 675, 150]], dtype=int16)),
             ('PlayerPosX', array([452], dtype=int16))])

In [200]:
check_env(env, warn=True)

[[ 85  85 100]
 [227 170 115]
 [362 255 112]
 [360 340 114]
 [540 425 112]
 [140 510 109]
 [192 595 111]]
[[174  85 113]
 [323 170 119]
 [  6 255 100]
 [385 340 118]
 [145 425 104]
 [553 510 100]
 [ 37 595 115]]


AssertionError: Error while checking key=Platforms: The observation returned by the `reset()` method does not match the shape of the given observation space Sequence(Box(0, 800, (3,), int16), stack=True). Expected: None, actual shape: (7, 3)

In [201]:
durchgaenge = 10
for x in range(durchgaenge):
    state = env.reset()
    score = 0
    done = False

    while not done:
        # env.render()
        action = env.action_space.sample()
        state, reward, done, term, info = env.step(action)
        score += reward
        # print(f"\tDurchgang {x + 1}, Geschwindigkeit: {state}")

    print(f"Durchgang {x + 1}, Reward: {score}, Peak: {info["ingame_score"]}")

[[ 47  85 119]
 [574 170 101]
 [588 255 109]
 [667 340 107]
 [261 425 120]
 [ 84 510 111]
 [375 595 109]]
[[ 47  88 119]
 [574 173 101]
 [588 258 109]
 [667 343 107]
 [261 428 120]
 [ 84 513 111]
 [375 598 109]]
[[ 47  91 119]
 [574 176 101]
 [588 261 109]
 [667 346 107]
 [261 431 120]
 [ 84 516 111]
 [375 601 109]]
[[ 47  94 119]
 [574 179 101]
 [588 264 109]
 [667 349 107]
 [261 434 120]
 [ 84 519 111]
 [375 604 109]]
[[ 47  97 119]
 [574 182 101]
 [588 267 109]
 [667 352 107]
 [261 437 120]
 [ 84 522 111]
 [375 607 109]]
[[ 47 100 119]
 [574 185 101]
 [588 270 109]
 [667 355 107]
 [261 440 120]
 [ 84 525 111]
 [375 610 109]]
[[ 47 103 119]
 [574 188 101]
 [588 273 109]
 [667 358 107]
 [261 443 120]
 [ 84 528 111]
 [375 613 109]]
[[ 47 106 119]
 [574 191 101]
 [588 276 109]
 [667 361 107]
 [261 446 120]
 [ 84 531 111]
 [375 616 109]]
[[ 47 109 119]
 [574 194 101]
 [588 279 109]
 [667 364 107]
 [261 449 120]
 [ 84 534 111]
 [375 619 109]]
[[ 47 112 119]
 [574 197 101]
 [588 282 109]
 

In [5]:
from stable_baselines3 import PPO

In [8]:
env = DoodleJumpPlayer()

model = PPO("MlpPolicy", env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [9]:
model.learn(total_timesteps=100_000)

-----------------------------
| time/              |      |
|    fps             | 1735 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3e+03        |
|    ep_rew_mean          | 3e+03        |
| time/                   |              |
|    fps                  | 907          |
|    iterations           | 2            |
|    time_elapsed         | 4            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0022882232 |
|    clip_fraction        | 0.0488       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.684       |
|    explained_variance   | -0.1         |
|    learning_rate        | 0.0003       |
|    loss                 | 9.81         |
|    n_updates            | 10           |
|    policy_grad

<stable_baselines3.ppo.ppo.PPO at 0x219e42149e0>

In [11]:
model.save("Models/DoodleJump")

In [6]:
env = DoodleJumpPlayer()
model = PPO.load("Models/DoodleJump", env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [7]:
durchgaenge = 10
for x in range(durchgaenge):
    observation = env.reset()  # Umgebung auf den Anfangszustand zurücksetzen
    score = 0  # Variable um die Performance des Algorithmus' zu speichern
    done = False  # Beschreibt, ob der Agent noch "im Spiel" ist
    observation = observation[0]

    while not done:
        env.render()
        
        # Jetzt nehmen wir anstatt einer Random Action die Vorhersage des Models
        action, _ = model.predict(observation)
        if action.ndim == 1:
            action = action[0]
        
        observation, reward, done, term, info = env.step(action)
        score += reward  # reward: Gibt dem Agenten für korrekte Aktionen +Punkte, oder keine Punkte

    print(f"Durchgang {x + 1}, Score: {score}")

ValueError: Error: Unexpected observation shape (26,) for Box environment, please use (23,) or (n_env, 23) for the observation shape.