In [1]:
import datetime
import gymnasium as gym
# import importlib
import numpy as np

from time import sleep

import backtothecode_gym
import backtothecode_gym.envs.lib.board as board_lib
import backtothecode_gym.envs.lib.renderer as renderer_lib
import backtothecode_players as players_lib

from backtothecode_gym.envs import BackToTheCodeEnvParams

def create_game(params):
    players = [
        players_lib.MultiFeatureAIPlayer(
            id=params.HERO_ID,
            name='Hero',
        ),
        players_lib.RandomPlayer(
            id=params.OPPONENT_ID,
            name='Opponent',
            momentum=0.5
        )
    ]
    bttc = gym.make(
        id='BackToTheCode',
        players=players,
        renderer=renderer_lib.CanvasRenderer()
    )
    return bttc, players


def play_game(bttc, players, is_training=False, verbose=False):
    bttc.reset(is_training=is_training)
    terminated = False
    truncated = False
    while not terminated and not truncated:
        old_board = bttc.unwrapped.board.copy()
        action = hero.move(old_board, verbose=verbose)
        observation, reward, terminated, truncated, info = bttc.step(action)
        new_board = bttc.unwrapped.board.copy()
        if is_training:
            # Send feedback to the hero
            hero.feedback(old_board, action, reward, new_board, terminated or truncated)
        else:
            bttc.render()
            sleep(0.1)

In [2]:
bttc, players = create_game(BackToTheCodeEnvParams)
hero, opponent = players

run_name = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')     
print(run_name)
num_training_games = 20
for game_id in range(num_training_games):
    print(f"Game {game_id}... ", end='')
    play_game(bttc, players, is_training=True)
    for player in players:
        if player.trainable:
            player.train()
    print(f"Rounds: {bttc.round_number} Score: {hero.score}/{opponent.score} Random move prob: {hero._get_random_move_prob()}", hero.rewards)
    
hero.save(run_name)

print("Showtime!")
play_game(bttc, players, is_training=False, verbose=True)
    
bttc.close()

20230827-130122
Game 0... 

  state = torch.tensor(state, dtype=torch.float)
  logger.warn(


Rounds: 350 Score: 156/136 Random move prob: 0.9235056600070435 [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,

  logger.warn(


Canvas(height=696, sync_image_data=True, width=1050)

[0 1 0 0 0 1 0 0] -> tensor([ 0.1995, -0.0106,  0.1745,  0.1623], grad_fn=<AddBackward0>)
[0 1 0 0 0 1 0 0] -> tensor([ 0.1995, -0.0106,  0.1745,  0.1623], grad_fn=<AddBackward0>)
[0 1 0 0 0 1 0 0] -> tensor([ 0.1995, -0.0106,  0.1745,  0.1623], grad_fn=<AddBackward0>)
[0 1 0 0 0 1 0 0] -> tensor([ 0.1995, -0.0106,  0.1745,  0.1623], grad_fn=<AddBackward0>)
[0 1 0 0 0 1 0 0] -> tensor([ 0.1995, -0.0106,  0.1745,  0.1623], grad_fn=<AddBackward0>)
[0 1 0 0 0 1 0 0] -> tensor([ 0.1995, -0.0106,  0.1745,  0.1623], grad_fn=<AddBackward0>)
[0 1 0 0 0 1 0 0] -> tensor([ 0.1995, -0.0106,  0.1745,  0.1623], grad_fn=<AddBackward0>)
[100   1   0   0   0   1   0   0] -> tensor([-13.6085,  -0.3799,  -2.9492,  -1.4080], grad_fn=<AddBackward0>)
[1 1 0 0 1 0 0 0] -> tensor([-0.0706,  0.0725,  0.0804,  0.0832], grad_fn=<AddBackward0>)
[0 0 1 0 0 0 1 0] -> tensor([ 0.1554,  0.0074, -0.0997,  0.1516], grad_fn=<AddBackward0>)
[100   1   1   0   0   1   0   0] -> tensor([-13.7003,  -0.3804,  -2.9839,  -1.4

KeyboardInterrupt: 

In [None]:
bttc, players = create_game(BackToTheCodeEnvParams)
hero, opponent = players
 
run_name = '20230827-112056'
hero.load(run_name)

print("Showtime!")
play_game(bttc, players, is_training=False, verbose=True)
    
bttc.close()

In [None]:
z