In [3]:
from enum import Enum

import numpy as np
import pygame

import gymnasium as gym
from gymnasium import spaces


class Actions(Enum):
    RIGHT = 0
    UP = 1
    LEFT = 2
    DOWN = 3


class GridWorldEnv(gym.Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}

    def __init__(self, render_mode=None, size=5):
        self.size = size  # The size of the square grid
        self.window_size = 512  # The size of the PyGame window

        # Observations are dictionaries with the agent's and the target's location.
        # Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).
        self.observation_space = spaces.Dict(
            {
                "agent": spaces.Box(0, size - 1, shape=(2,), dtype=int),
                "target": spaces.Box(0, size - 1, shape=(2,), dtype=int),
            }
        )
        self._agent_location = np.array([-1, -1], dtype=int)
        self._target_location = np.array([-1, -1], dtype=int)

        # We have 4 actions, corresponding to "right", "up", "left", "down"
        self.action_space = spaces.Discrete(4)

        """
        The following dictionary maps abstract actions from `self.action_space` to
        the direction we will walk in if that action is taken.
        i.e. 0 corresponds to "right", 1 to "up" etc.
        """
        self._action_to_direction = {
            Actions.RIGHT.value: np.array([1, 0]),
            Actions.UP.value: np.array([0, 1]),
            Actions.LEFT.value: np.array([-1, 0]),
            Actions.DOWN.value: np.array([0, -1]),
        }

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        """
        If human-rendering is used, `self.window` will be a reference
        to the window that we draw to. `self.clock` will be a clock that is used
        to ensure that the environment is rendered at the correct framerate in
        human-mode. They will remain `None` until human-mode is used for the
        first time.
        """
        self.window = None
        self.clock = None

    def _get_obs(self):
        return {"agent": self._agent_location, "target": self._target_location}

    # %%
    # We can also implement a similar method for the auxiliary information
    # that is returned by ``step`` and ``reset``. In our case, we would like
    # to provide the manhattan distance between the agent and the target:

    def _get_info(self):
        return {"distance": np.linalg.norm(self._agent_location - self._target_location, ord=1)}

    def reset(self, seed=None, options=None):
        # We need the following line to seed self.np_random
        super().reset(seed=seed)

        # Choose the agent's location uniformly at random
        self._agent_location = self.np_random.integers(0, self.size, size=2, dtype=int)

        # We will sample the target's location randomly until it does not coincide with the agent's location
        self._target_location = self._agent_location
        while np.array_equal(self._target_location, self._agent_location):
            self._target_location = self.np_random.integers(0, self.size, size=2, dtype=int)

        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()

        return observation, info

    # %%
    # Step
    # ~~~~
    #
    # The ``step`` method usually contains most of the logic of your
    # environment. It accepts an ``action``, computes the state of the
    # environment after applying that action and returns the 5-tuple
    # ``(observation, reward, terminated, truncated, info)``. See
    # :meth:`gymnasium.Env.step`. Once the new state of the environment has
    # been computed, we can check whether it is a terminal state and we set
    # ``done`` accordingly. Since we are using sparse binary rewards in
    # ``GridWorldEnv``, computing ``reward`` is trivial once we know
    # ``done``.To gather ``observation`` and ``info``, we can again make
    # use of ``_get_obs`` and ``_get_info``:

    def step(self, action):
        # Map the action (element of {0,1,2,3}) to the direction we walk in
        direction = self._action_to_direction[action]
        # We use `np.clip` to make sure we don't leave the grid
        self._agent_location = np.clip(self._agent_location + direction, 0, self.size - 1)
        # An episode is done iff the agent has reached the target
        terminated = np.array_equal(self._agent_location, self._target_location)
        reward = 1 if terminated else 0  # Binary sparse rewards
        observation = self._get_obs()
        info = self._get_info()

        if self.render_mode == "human":
            self._render_frame()

        return observation, reward, terminated, False, info

    # %%
    # Rendering
    # ~~~~~~~~~
    #
    # Here, we are using PyGame for rendering. A similar approach to rendering
    # is used in many environments that are included with Gymnasium and you
    # can use it as a skeleton for your own environments:

    def render(self):
        if self.render_mode == "rgb_array":
            return self._render_frame()

    def _render_frame(self):
        if self.window is None and self.render_mode == "human":
            pygame.init()
            pygame.display.init()
            self.window = pygame.display.set_mode((self.window_size, self.window_size))
        if self.clock is None and self.render_mode == "human":
            self.clock = pygame.time.Clock()

        canvas = pygame.Surface((self.window_size, self.window_size))
        canvas.fill((255, 255, 255))
        pix_square_size = self.window_size / self.size  # The size of a single grid square in pixels

        # First we draw the target
        pygame.draw.rect(
            canvas,
            (255, 0, 0),
            pygame.Rect(
                pix_square_size * self._target_location,
                (pix_square_size, pix_square_size),
            ),
        )
        # Now we draw the agent
        pygame.draw.circle(
            canvas,
            (0, 0, 255),
            (self._agent_location + 0.5) * pix_square_size,
            pix_square_size / 3,
        )

        # Finally, add some gridlines
        for x in range(self.size + 1):
            pygame.draw.line(
                canvas,
                0,
                (0, pix_square_size * x),
                (self.window_size, pix_square_size * x),
                width=3,
            )
            pygame.draw.line(
                canvas,
                0,
                (pix_square_size * x, 0),
                (pix_square_size * x, self.window_size),
                width=3,
            )

        if self.render_mode == "human":
            # The following line copies our drawings from `canvas` to the visible window
            self.window.blit(canvas, canvas.get_rect())
            pygame.event.pump()
            pygame.display.update()

            # We need to ensure that human-rendering occurs at the predefined framerate.
            # The following line will automatically add a delay to keep the framerate stable.
            self.clock.tick(self.metadata["render_fps"])
        else:  # rgb_array
            return np.transpose(np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2))

    # %%
    # Close
    # ~~~~~
    #
    # The ``close`` method should close any open resources that were used by
    # the environment. In many cases, you don’t actually have to bother to
    # implement this method. However, in our example ``render_mode`` may be
    # ``"human"`` and we might need to close the window that has been opened:

    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.quit()

In [10]:
env = GridWorldEnv(render_mode="rgb_array")
env.observation_space

Dict('agent': Box(0, 4, (2,), int64), 'target': Box(0, 4, (2,), int64))

In [12]:
from gymnasium.wrappers import RecordEpisodeStatistics, RecordVideo

num_eval_episodes = 10

env = RecordVideo(env, video_folder="cartpole-agent", name_prefix="eval", episode_trigger=lambda x: True)
env = RecordEpisodeStatistics(env, buffer_length=num_eval_episodes)

for episode_num in range(num_eval_episodes):
    obs, info = env.reset()

    episode_over = False
    while not episode_over:
        action = env.action_space.sample()  # replace with actual agent
        obs, reward, terminated, truncated, info = env.step(action)

        episode_over = terminated or truncated
env.close()

print(f"Episode time taken: {env.time_queue}")
print(f"Episode total rewards: {env.return_queue}")
print(f"Episode lengths: {env.length_queue}")

  logger.warn(
Exception ignored in: <function RecordVideo.__del__ at 0x75615d72f5b0>
Traceback (most recent call last):
  File "/home/showpiece/Desktop/Ed/CardGame/.venv/lib/python3.10/site-packages/gymnasium/wrappers/rendering.py", line 415, in __del__
    if len(self.recorded_frames) > 0:
AttributeError: 'RecordVideo' object has no attribute 'recorded_frames'
Exception ignored in: <function RecordVideo.__del__ at 0x75615d72f5b0>
Traceback (most recent call last):
  File "/home/showpiece/Desktop/Ed/CardGame/.venv/lib/python3.10/site-packages/gymnasium/wrappers/rendering.py", line 415, in __del__
    if len(self.recorded_frames) > 0:
AttributeError: 'RecordVideo' object has no attribute 'recorded_frames'


Episode time taken: deque([0.051061, 0.037788, 0.084512, 0.031572, 0.096664, 0.024848, 0.039326, 0.038904, 0.156592, 0.104778], maxlen=10)
Episode total rewards: deque([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], maxlen=10)
Episode lengths: deque([40, 27, 49, 13, 63, 12, 20, 22, 95, 67], maxlen=10)


In [16]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random

# Определение карт и мастей
SUITS = ["hearts", "diamonds", "clubs", "spades"]
RANKS = ["6", "7", "8", "9", "10", "J", "Q", "K", "A"]

# Создание колоды
DECK = [(rank, suit) for rank in RANKS for suit in SUITS]

# Определение значений карт
RANK_VALUES = {rank: value for value, rank in enumerate(RANKS)}


class DurakEnv(gym.Env):
    def __init__(self, num_players=2):
        super(DurakEnv, self).__init__()

        # Количество игроков
        self.num_players = num_players

        # Определение пространства действий
        self.action_space = spaces.Discrete(len(DECK))  # Действия: выбор карты из руки

        # Определение пространства наблюдений
        self.observation_space = spaces.Dict(
            {
                "hand": spaces.MultiDiscrete([len(DECK)] * len(DECK)),  # Карты в руке
                "table": spaces.MultiDiscrete([len(DECK)] * len(DECK)),  # Карты на столе
                "trump": spaces.Discrete(len(SUITS)),  # Козырь
                "turn": spaces.Discrete(num_players),  # Очередь хода
            }
        )

        # Инициализация состояния игры
        self.reset()

    def reset(self, seed=None, options=None):
        # Сброс игры
        self.deck = DECK.copy()
        random.shuffle(self.deck)

        # Раздача карт
        self.hands = [[] for _ in range(self.num_players)]
        for _ in range(6):  # Каждый игрок получает 6 карт
            for player in range(self.num_players):
                if self.deck:
                    self.hands[player].append(self.deck.pop())

        # Определение козыря
        self.trump = random.choice(SUITS)

        # Инициализация стола
        self.table = []

        # Очередь хода
        self.turn = 0

        return self._get_observation(), {}

    def step(self, action):
        # Выполнение действия
        player = self.turn
        card = self.hands[player][action]

        # Проверка корректности действия
        if not self._is_valid_action(card):
            return self._get_observation(), -1, False, False, {}  # Штраф за некорректное действие

        # Добавление карты на стол
        self.table.append(card)
        self.hands[player].remove(card)

        # Переход хода
        self.turn = (self.turn + 1) % self.num_players

        # Проверка завершения игры
        done = self._is_game_over()
        reward = 1 if not done else -1  # Награда за корректное действие

        return self._get_observation(), reward, done, False, {}

    def _get_observation(self):
        # Возвращает текущее состояние игры
        return {
            "hand": self.hands[self.turn],
            "table": self.table,
            "trump": self.trump,
            "turn": self.turn,
        }

    def _is_valid_action(self, card):
        # Проверка корректности действия
        if not self.table:
            return True  # Первый ход
        # Проверка соответствия масти или козыря
        return card[1] == self.table[-1][1] or card[1] == self.trump

    def _is_game_over(self):
        # Проверка завершения игры
        return all(len(hand) == 0 for hand in self.hands)

    def render(self, mode="human"):
        # Визуализация игры (опционально)
        print(f"Player {self.turn}'s turn")
        print(f"Hand: {self.hands[self.turn]}")
        print(f"Table: {self.table}")
        print(f"Trump: {self.trump}")


env = DurakEnv(num_players=2)
observation, info = env.reset()

for _ in range(100):
    action = env.action_space.sample()  # Случайное действие
    observation, reward, done, truncated, info = env.step(action)

    if done:
        observation, info = env.reset()

env.close()

IndexError: list index out of range

In [17]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random

# Определение карт и мастей
SUITS = ["hearts", "diamonds", "clubs", "spades"]
RANKS = ["6", "7", "8", "9", "10", "J", "Q", "K", "A"]

# Создание колоды
DECK = [(rank, suit) for rank in RANKS for suit in SUITS]

# Определение значений карт
RANK_VALUES = {rank: value for value, rank in enumerate(RANKS)}


class DurakEnv(gym.Env):
    def __init__(self, num_players=2):
        super(DurakEnv, self).__init__()

        # Количество игроков
        self.num_players = num_players

        # Определение пространства действий
        self.action_space = spaces.Discrete(len(DECK))  # Действия: выбор карты из руки

        # Определение пространства наблюдений
        self.observation_space = spaces.Dict(
            {
                "hand": spaces.MultiDiscrete([len(DECK)] * len(DECK)),  # Карты в руке
                "table": spaces.MultiDiscrete([len(DECK)] * len(DECK)),  # Карты на столе
                "trump": spaces.Discrete(len(SUITS)),  # Козырь
                "turn": spaces.Discrete(num_players),  # Очередь хода
            }
        )

        # Инициализация состояния игры
        self.reset()

    def reset(self, seed=None, options=None):
        # Сброс игры
        self.deck = DECK.copy()
        random.shuffle(self.deck)

        # Раздача карт
        self.hands = [[] for _ in range(self.num_players)]
        for _ in range(6):  # Каждый игрок получает 6 карт
            for player in range(self.num_players):
                if self.deck:
                    self.hands[player].append(self.deck.pop())

        # Определение козыря
        self.trump = random.choice(SUITS)

        # Инициализация стола
        self.table = []

        # Очередь хода
        self.turn = 0

        return self._get_observation(), {}

    def step(self, action):
        # Выполнение действия
        player = self.turn

        # Проверка, что действие находится в пределах карт игрока
        if action >= len(self.hands[player]):
            return self._get_observation(), -1, False, False, {}  # Штраф за некорректное действие

        card = self.hands[player][action]

        # Проверка корректности действия
        if not self._is_valid_action(card):
            return self._get_observation(), -1, False, False, {}  # Штраф за некорректное действие

        # Добавление карты на стол
        self.table.append(card)
        self.hands[player].remove(card)

        # Переход хода
        self.turn = (self.turn + 1) % self.num_players

        # Проверка завершения игры
        done = self._is_game_over()
        reward = 1 if not done else -1  # Награда за корректное действие

        return self._get_observation(), reward, done, False, {}

    def _get_observation(self):
        # Возвращает текущее состояние игры
        return {
            "hand": self.hands[self.turn],
            "table": self.table,
            "trump": self.trump,
            "turn": self.turn,
        }

    def _is_valid_action(self, card):
        # Проверка корректности действия
        if not self.table:
            return True  # Первый ход
        # Проверка соответствия масти или козыря
        return card[1] == self.table[-1][1] or card[1] == self.trump

    def _is_game_over(self):
        # Проверка завершения игры
        return all(len(hand) == 0 for hand in self.hands)

    def render(self, mode="human"):
        # Визуализация игры (опционально)
        print(f"Player {self.turn}'s turn")
        print(f"Hand: {self.hands[self.turn]}")
        print(f"Table: {self.table}")
        print(f"Trump: {self.trump}")


env = DurakEnv(num_players=2)
observation, info = env.reset()

for _ in range(100):
    action = env.action_space.sample()  # Случайное действие
    observation, reward, done, truncated, info = env.step(action)

    if done:
        observation, info = env.reset()

env.close()

In [None]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random

# Определение карт и мастей
SUITS = ["♤", "♧", "♢", "♡"]  # Символы для мастей
RANKS = ["6", "7", "8", "9", "10", "J", "Q", "K", "A"]

# Создание колоды
DECK = [(rank, suit) for rank in RANKS for suit in SUITS]

# Определение значений карт
RANK_VALUES = {rank: value for value, rank in enumerate(RANKS)}


class DurakEnv(gym.Env):
    def __init__(self, num_players=2):
        super(DurakEnv, self).__init__()

        # Количество игроков
        self.num_players = num_players

        # Определение пространства действий
        self.action_space = spaces.Discrete(len(DECK))  # Действия: выбор карты из руки

        # Определение пространства наблюдений
        self.observation_space = spaces.Dict(
            {
                "hand": spaces.MultiDiscrete([len(DECK)] * len(DECK)),  # Карты в руке
                "table": spaces.MultiDiscrete([len(DECK)] * len(DECK)),  # Карты на столе
                "trump": spaces.Discrete(len(SUITS)),  # Козырь
                "turn": spaces.Discrete(num_players),  # Очередь хода
            }
        )

        # Инициализация состояния игры
        self.reset()

    def reset(self, seed=None, options=None):
        # Сброс игры
        self.deck = DECK.copy()
        random.shuffle(self.deck)

        # Раздача карт
        self.hands = [[] for _ in range(self.num_players)]
        for _ in range(6):  # Каждый игрок получает 6 карт
            for player in range(self.num_players):
                if self.deck:
                    self.hands[player].append(self.deck.pop())

        # Определение козыря
        self.trump = random.choice(SUITS)

        # Инициализация стола
        self.table = []

        # Очередь хода
        self.turn = 0

        return self._get_observation(), {}

    def step(self, action):
        # Выполнение действия
        player = self.turn

        # Проверка, что действие находится в пределах карт игрока
        if action in self.hands[player]:
            return self._get_observation(), -1, False, False, {}  # Штраф за некорректное действие

        card = self.hands[player][action]

        # Проверка корректности действия
        if not self._is_valid_action(card):
            return self._get_observation(), -1, False, False, {}  # Штраф за некорректное действие

        # Добавление карты на стол
        self.table.append(card)
        self.hands[player].remove(card)

        # Переход хода
        self.turn = (self.turn + 1) % self.num_players

        # Проверка завершения игры
        done = self._is_game_over()
        reward = 1 if not done else -1  # Награда за корректное действие

        return self._get_observation(), reward, done, False, {}

    def _get_observation(self):
        # Возвращает текущее состояние игры
        return {
            "hand": self.hands[self.turn],
            "table": self.table,
            "trump": self.trump,
            "turn": self.turn,
        }

    def _is_valid_action(self, card):
        # Проверка корректности действия
        if not self.table:
            return True  # Первый ход
        # Проверка соответствия масти или козыря
        return card[1] == self.table[-1][1] or card[1] == self.trump

    def _is_game_over(self):
        # Проверка завершения игры
        return all(len(hand) == 0 for hand in self.hands)

    def render(self, mode="human"):
        # Визуализация игры
        print("\n" + "=" * 40)
        print(f"Player {self.turn}'s turn")
        print("=" * 40)

        # Отображение карт на столе
        print("Table: ", end="")
        for card in self.table:
            print(f"{card[0]}{card[1]}", end=" ")
        print()

        # Отображение карт в руках игроков
        for player in range(self.num_players):
            print(f"Player {player} hand: ", end="")
            for card in self.hands[player]:
                print(f"{card[0]}{card[1]}", end=" ")
            print()

        # Отображение козыря
        print(f"Trump: {self.trump}")
        print("=" * 40 + "\n")


env = DurakEnv(num_players=2)
observation, info = env.reset()

for _ in range(100):
    # Выбор случайного действия из доступных карт в руке
    hand_size = len(observation["hand"])
    action = np.random.choice(hand_size)  # Выбираем случайное действие из руки

    # Визуализация текущего состояния
    env.render()

    # Выполнение действия
    observation, reward, done, truncated, info = env.step(action)

    if done:
        print("Game over!")
        observation, info = env.reset()

env.close()


Player 0's turn
Table: 
Player 0 hand: Q♢ K♤ 7♧ A♢ 9♧ A♧ 
Player 1 hand: J♤ 8♡ K♡ 10♧ 6♡ J♡ 
Trump: ♡


Player 1's turn
Table: 7♧ 
Player 0 hand: Q♢ K♤ A♢ 9♧ A♧ 
Player 1 hand: J♤ 8♡ K♡ 10♧ 6♡ J♡ 
Trump: ♡


Player 1's turn
Table: 7♧ 
Player 0 hand: Q♢ K♤ A♢ 9♧ A♧ 
Player 1 hand: J♤ 8♡ K♡ 10♧ 6♡ J♡ 
Trump: ♡


Player 0's turn
Table: 7♧ 10♧ 
Player 0 hand: Q♢ K♤ A♢ 9♧ A♧ 
Player 1 hand: J♤ 8♡ K♡ 6♡ J♡ 
Trump: ♡


Player 0's turn
Table: 7♧ 10♧ 
Player 0 hand: Q♢ K♤ A♢ 9♧ A♧ 
Player 1 hand: J♤ 8♡ K♡ 6♡ J♡ 
Trump: ♡


Player 0's turn
Table: 7♧ 10♧ 
Player 0 hand: Q♢ K♤ A♢ 9♧ A♧ 
Player 1 hand: J♤ 8♡ K♡ 6♡ J♡ 
Trump: ♡


Player 0's turn
Table: 7♧ 10♧ 
Player 0 hand: Q♢ K♤ A♢ 9♧ A♧ 
Player 1 hand: J♤ 8♡ K♡ 6♡ J♡ 
Trump: ♡


Player 0's turn
Table: 7♧ 10♧ 
Player 0 hand: Q♢ K♤ A♢ 9♧ A♧ 
Player 1 hand: J♤ 8♡ K♡ 6♡ J♡ 
Trump: ♡


Player 0's turn
Table: 7♧ 10♧ 
Player 0 hand: Q♢ K♤ A♢ 9♧ A♧ 
Player 1 hand: J♤ 8♡ K♡ 6♡ J♡ 
Trump: ♡


Player 1's turn
Table: 7♧ 10♧ A♧ 
Player 0 hand: Q♢ K♤ A♢ 9♧ 
P