In [3]:
# -*- encoding: utf-8 -*-
'''
@File    :   game.py
@Time    :   2024/09/22 21:33:33
@Author  :   junewluo 
'''

import os
import numpy as np
from gym import Env, spaces
from ple import PLE
from ple.games import FlappyBird

# os.putenv('SDL_VIDEODRIVER', 'fbcon')
# os.environ["SDL_VIDEODRIVER"] = "dummy"

class FlappyBirdWrapper(Env):
    metadata = {
        'render.mode':['human','rgb_array'],
    }
    # 如果想把画面渲染出来，就传参display_screen=True
    def __init__(self, **kwargs):
        self.game = FlappyBird()
        self.p = PLE(self.game, display_screen=True, **kwargs)
        self.action_set = self.p.getActionSet()

        # 3个输入状态：见函数self._get_obs
        self.observation_space = spaces.Box(low=np.finfo(np.float32).min, high=np.finfo(np.float32).max, shape=(3,), dtype=np.float32)
        # 两个输出状态：跳或者不跳
        self.action_space = spaces.Discrete(len(self.action_set))

    def _get_obs(self):
        # 获取游戏的状态
        state = self.game.getGameState()
        # 小鸟与它前面一对水管中下面那根水管的水平距离
        dist_to_pipe_horz = state["next_pipe_dist_to_player"]
        # 小鸟与它前面一对水管中下面那根水管的顶端的垂直距离
        dist_to_pipe_bottom = state["player_y"] - state["next_pipe_top_y"]
        # 获取小鸟的水平速度
        velocity = state['player_vel']
        # 将这些信息封装成一个数据返回
        return np.array([dist_to_pipe_horz, dist_to_pipe_bottom, velocity])

    def reset(self):
        self.p.reset_game()
        return self._get_obs(), dict()

    def step(self, action):
        reward = self.p.act(self.action_set[action])
        obs = self._get_obs()
        done = self.p.game_over()
        truncation = False
        return obs, reward, done, truncation, dict()

    def seed(self, *args, **kwargs):
        pass

    def render(self, *args, **kwargs):
        # self.p.initScreen()
        rgb = self.p.getScreenRGB()
        return rgb

couldn't import doomish
Couldn't import doom


In [4]:
env = FlappyBirdWrapper()
env.reset()
env.render()



array([[[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       ...,

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]],

       [[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        ...,
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]]], dtype=uint8)

In [1]:
from ple.games import Pixelcopter, MonsterKong, Pong, PuckWorld, RaycastMaze, Snake, WaterWorld
from ple import PLE
env = PuckWorld()
p = PLE(env)

pygame 2.6.0 (SDL 2.28.4, Python 3.9.19)
Hello from the pygame community. https://www.pygame.org/contribute.html
couldn't import doomish
Couldn't import doom


In [2]:
p.getActionSet()

[119, 97, 100, 115, None]

In [3]:
p.reset_game()

In [4]:
env.getGameState()

{'player_x': 4.5,
 'player_y': 4.5,
 'player_velocity_x': 0,
 'player_velocity_y': 0,
 'good_creep_x': 56.49369639963426,
 'good_creep_y': 19.45319673968546,
 'bad_creep_x': 64,
 'bad_creep_y': 64}

In [1]:
import configparser

config = configparser.ConfigParser()
config.read("configure.conf")

['configure.conf']

In [2]:
config.getboolean('MONITOR','wandb')

False

In [1]:
args = [  
    'python', 'ppo_mp_main.py', '--env_name', 'LunarLander-v2', '--env_num', '20',   
    '--layers', '3', '--wandb', 'False', '--tensorboard', 'False', '--max_train_steps', '500',   
    '--per_batch_steps', '500', '--evaluate_freq', '20', '--save_freq', '20', '--batch_size', '4096',   
    '--mini_batch_size', '512', '--hidden_width', '64', '--lr_a', '0.005', '--lr_c', '0.0002',   
    '--gamma', '0.98', '--lamda', '0.95', '--epsilon', '0.2', '--use_gae', 'True',   
    '--grad_clip_param', '0.5', '--use_adv_norm', 'True', '--use_state_norm', 'False',   
    '--use_reward_norm', 'False', '--use_reward_scaling', 'False', '--entropy_coef', '0.05',   
    '--use_lr_decay', 'True', '--use_grad_clip', 'True', '--use_orthogonal_init', 'True',   
    '--use_ppo_clip', 'True', '--hidden_dims', '128', '128'  
]
' '.join(args)

'python ppo_mp_main.py --env_name LunarLander-v2 --env_num 20 --layers 3 --wandb False --tensorboard False --max_train_steps 500 --per_batch_steps 500 --evaluate_freq 20 --save_freq 20 --batch_size 4096 --mini_batch_size 512 --hidden_width 64 --lr_a 0.005 --lr_c 0.0002 --gamma 0.98 --lamda 0.95 --epsilon 0.2 --use_gae True --grad_clip_param 0.5 --use_adv_norm True --use_state_norm False --use_reward_norm False --use_reward_scaling False --entropy_coef 0.05 --use_lr_decay True --use_grad_clip True --use_orthogonal_init True --use_ppo_clip True --hidden_dims 128 128'

In [20]:
''.join(config.get('NETWORK', 'hidden_dims', fallback=[])).split(',')

['128', '128']

In [22]:
config.get('NETWORK', 'hidden_dims').split(",")

['128', '128']

: 

In [4]:
import datetime
now_time = datetime.datetime.now().strftime("%Y-%m-%d")

In [5]:
bool("True")

'2024-09-28'