In [1]:
%load_ext autoreload
%autoreload 2

In [10]:
from functools import partial

from darts_pro.base import DartBoard, compute_probability_lookup
from darts_pro.dqn import (
    Agent,
    PureGreedyStrategy,
    LinearNetwork,
    EpisodeEndPayload,
    AgentPlayer,
    TrainingPlayer
)
from darts_pro.games.o_one import (
    X01GameState,
    X01TrainingLoop,
    X01TrainingConfig,
    X01Game,
    create_seeded_x01_memory
)

In [3]:
N_EPISODES = 500
NETWORK_SHAPE = [128, 128]
TARGET_SCORE = 20
PLAY_TEST_GAME_FREQUENCY = 100
MAX_TURNS = 10
MIN_EPISODES_BEFORE_TEST_GAME = 300

In [4]:
def _get_starting_scores(target_score: int, _: int) -> tuple[dict[int, int], int]:
    return {0: target_score}, 0

def _play_game(
    agent_player: AgentPlayer,
    board: DartBoard,
    target_score: int,
    max_turns: int,
    episode_number: int
):
    teams = {0: [agent_player]}
    scores, _ = _get_starting_scores(target_score, episode_number)
    game = X01Game(teams, board, target_score, max_turns, scores)

    game.play_to_completion(verbose=True)
    

def play_game_callback(
    agent_player: AgentPlayer,
    board: DartBoard,
    target_score: int,
    max_turns: int,
    payload: EpisodeEndPayload
):
    if payload.episode_number % 50 == 0:
        print(f"Completed episode {payload.episode_number}")
    if (
        payload.episode_number == 0 or
        payload.episode_number < MIN_EPISODES_BEFORE_TEST_GAME or
        payload.episode_number % PLAY_TEST_GAME_FREQUENCY != 0
    ):
        return
    print("----------------------------")
    print(f"Playing test game for episode {payload.episode_number}")
    _play_game(agent_player, board, target_score, max_turns, payload.episode_number)
    print(f"Finished test game for episode {payload.episode_number}")
    print("----------------------------")


In [5]:
board = DartBoard.get_default_dartboard(False)
input_size = X01GameState({0: 0}, 301, 0, 0, '', 0).to_tensor().shape[0]
policy_network = LinearNetwork(input_size, len(board.indexed_targets), NETWORK_SHAPE)
target_network = LinearNetwork(input_size, len(board.indexed_targets), NETWORK_SHAPE)
target_network.load_state_dict(policy_network.state_dict())
target_network.eval()

config = X01TrainingConfig(max_target_score=TARGET_SCORE, min_win_turns=1)

# Trying seeded memory
lookups = compute_probability_lookup(config.accuracy_sigma_x, config.accuracy_sigma_y, len(board.radial_targets))
# seeded_memory = create_seeded_x01_memory(board, lookups)

# inference agent
inference_agent = AgentPlayer(
    Agent(PureGreedyStrategy(), list(board.indexed_targets.keys())),
    policy_network,
    0,
    'InferenceAgent',
    lookups
)
game_cb = partial(play_game_callback, inference_agent, board, TARGET_SCORE, MAX_TURNS)

In [6]:
get_starting_scores_for_loop = partial(_get_starting_scores, TARGET_SCORE)
loop = X01TrainingLoop(
    N_EPISODES,
    policy_network,
    target_network,
    config,
    board=board,
    initialize_game=get_starting_scores_for_loop,
    # memory=seeded_memory
)
loop.add_on_episode_end_callback(game_cb)

In [7]:
loop.run()

Completed episode 0
Completed episode 50
Completed episode 100
Completed episode 150
Completed episode 200
Completed episode 250
Completed episode 300
Completed episode 350
Completed episode 400
----------------------------
Playing test game for episode 400
Playing throw 1, turn 1 for player 0__InferenceAgent
Current scores: {'scores': {0: 20}, 'target_score': 20, 'turn_number': 0, 'current_throwing_team': 0, 'current_throwing_player': '0__InferenceAgent', 'current_thrower_dart_number': 0}
Throwing at Target(value=15, multiplier=3, is_bullseye=False)
Hit target Target(value=0, multiplier=1, is_bullseye=False)
Current scores: {'scores': {0: 20}, 'target_score': 20, 'turn_number': 0, 'current_throwing_team': 0, 'current_throwing_player': '0__InferenceAgent', 'current_thrower_dart_number': 0}
Game is currently done: False

Playing throw 2, turn 1 for player 0__InferenceAgent
Current scores: {'scores': {0: 20}, 'target_score': 20, 'turn_number': 0, 'current_throwing_team': 0, 'current_thro

## Testing After Training

In [None]:
# board = DartBoard.get_default_dartboard(False)
# all_actions = list(board.indexed_targets.keys())

# new_agent = Agent(PureGreedyStrategy(), all_actions)
# dummy_state = X01GameState({0: 5}, 301, 17, 0, '', 0).to_tensor()[None]
# act = new_agent.select_action(dummy_state, policy_network)
# print(board.indexed_targets[act])

In [None]:
_play_game(inference_agent, board, 301, 5, 1)

In [11]:
seeded_memory = create_seeded_x01_memory(TrainingPlayer(0, 'seed', lookups), board, 301, 10, 5000)

In [14]:
len(seeded_memory._memory)

6100

In [15]:
lookups

{<AimPoints.BULLSEYE: 1>: ProbabilityComputationResult(sigma_x=0.25, sigma_y=0.25, double_bull_percentage=0.025055662959059075, single_bull_percentage=0.12373780476822326, miss_percentage=-0.11313549760482067, radial_target_percentages={0: RadialProbabiltyResult(inner_single_percentage=0.038048929334225205, triple_percentage=0.002443341082735139, outer_single_percentage=0.007449860497931826, double_percentage=0.00027497057898474934), 1: RadialProbabiltyResult(inner_single_percentage=0.038048929334225205, triple_percentage=0.002443341082735139, outer_single_percentage=0.007449860497931827, double_percentage=0.00027497057898474944), 2: RadialProbabiltyResult(inner_single_percentage=0.038048929334225205, triple_percentage=0.0024433410827351385, outer_single_percentage=0.007449860497931827, double_percentage=0.00027497057898474944), 3: RadialProbabiltyResult(inner_single_percentage=0.038048929334225205, triple_percentage=0.002443341082735139, outer_single_percentage=0.007449860497931827, d