# Darkchess Brain AI Playground
> Choose two agents from the list below.  
> e.g. Human(you) v.s. AlphaBeta(CPU)

In [None]:
from brain.arena import Battle
from brain.agent import (
    Human,
    Random,
    MinMax,
    AlphaBeta
)

battle = Battle(
    player1=Human(),
    player2=AlphaBeta(depth=4),
    verbose=True
)

# Start the battle
battle.play_games()

# Login to HuggingFace (just login once)

In [None]:
from huggingface_hub import interpreter_login
interpreter_login()

# Train the agent
> You can load table or model first from the local or HuggingFace by other code block before training or testing battle.
- Learning Agent List for training
- Plot training results

# Q-Learning training (small 3x4 board)

In [None]:
from brain.agent import QL, Random, AlphaBeta

ql = QL(small3x4_mode=True)
ql.train(
    iterations=10000,
    epochs=10000,
    evaluate_epochs=20,
    evaluate_agents=[Random(), AlphaBeta(1), AlphaBeta(2), AlphaBeta(4), AlphaBeta(6)],
    evaluate_interval=1000,
    ignore_draw=True,
    hub_model_id="ryanlinjui/darkchess-robot-brain-QL-small3x4"
)
# ql.plot()

  from .autonotebook import tqdm as notebook_tqdm


Iteration 1/10000


Training epochs playing against itself: 100%|██████████| 10000/10000 [00:14<00:00, 691.95it/s]
Updating Q-table: 100%|██████████| 10000/10000 [00:02<00:00, 4219.80it/s]
q-table.npz: 100%|██████████| 3.89M/3.89M [00:01<00:00, 2.47MB/s]


Iteration 2/10000


Training epochs playing against itself: 100%|██████████| 10000/10000 [00:14<00:00, 711.94it/s]
Updating Q-table: 100%|██████████| 10000/10000 [00:02<00:00, 4242.34it/s]
q-table.npz: 100%|██████████| 7.67M/7.67M [00:02<00:00, 3.44MB/s]


Iteration 3/10000


Training epochs playing against itself: 100%|██████████| 10000/10000 [00:14<00:00, 700.91it/s]
Updating Q-table: 100%|██████████| 10000/10000 [00:02<00:00, 4182.45it/s]
q-table.npz: 100%|██████████| 11.4M/11.4M [00:02<00:00, 4.20MB/s]


Iteration 4/10000


Training epochs playing against itself: 100%|██████████| 10000/10000 [00:14<00:00, 680.95it/s]
Updating Q-table: 100%|██████████| 10000/10000 [00:02<00:00, 4210.58it/s]
q-table.npz: 100%|██████████| 15.2M/15.2M [00:03<00:00, 4.34MB/s]


Iteration 5/10000


Training epochs playing against itself: 100%|██████████| 10000/10000 [00:14<00:00, 691.53it/s]
Updating Q-table: 100%|██████████| 10000/10000 [00:02<00:00, 4322.04it/s]
q-table.npz: 100%|██████████| 18.9M/18.9M [00:04<00:00, 3.93MB/s]


Iteration 6/10000


Training epochs playing against itself: 100%|██████████| 10000/10000 [00:14<00:00, 677.86it/s]
Updating Q-table: 100%|██████████| 10000/10000 [00:02<00:00, 4291.04it/s]
q-table.npz: 100%|██████████| 22.6M/22.6M [00:05<00:00, 4.42MB/s]


Iteration 7/10000


Training epochs playing against itself: 100%|██████████| 10000/10000 [00:14<00:00, 674.85it/s]
Updating Q-table: 100%|██████████| 10000/10000 [00:02<00:00, 4256.94it/s]
q-table.npz: 100%|██████████| 26.3M/26.3M [00:05<00:00, 4.67MB/s]


Iteration 8/10000


Training epochs playing against itself: 100%|██████████| 10000/10000 [00:15<00:00, 663.74it/s]
Updating Q-table: 100%|██████████| 10000/10000 [00:02<00:00, 4068.51it/s]
q-table.npz: 100%|██████████| 30.0M/30.0M [00:06<00:00, 4.58MB/s]


Iteration 9/10000


Training epochs playing against itself: 100%|██████████| 10000/10000 [00:15<00:00, 634.82it/s]
Updating Q-table: 100%|██████████| 10000/10000 [00:02<00:00, 3937.30it/s]
q-table.npz: 100%|██████████| 33.7M/33.7M [00:06<00:00, 4.84MB/s]


Iteration 10/10000


Training epochs playing against itself: 100%|██████████| 10000/10000 [00:15<00:00, 656.45it/s]
Updating Q-table: 100%|██████████| 10000/10000 [00:02<00:00, 4149.37it/s]


In [None]:
from brain.agent import QL, Random, AlphaBeta

ql = QL(small3x4_mode=True)
ql.load_from_hub(repo_id="ryanlinjui/darkchess-robot-brain-QL-small3x4")

In [None]:
ql.evaluate(
    evaluate_agents=[Random(), AlphaBeta(1), AlphaBeta(2), AlphaBeta(4), AlphaBeta(6)],
    evaluate_epochs=5,
    ignore_draw=True
)

In [None]:
from brain.agent import QL, Random, AlphaBeta
ql = QL(small3x4_mode=True)
ql.load_from_local(path="tmp.npz")

In [None]:
ql.save_to_local("tmp.npz")

In [None]:
from brain.arena import Battle
from brain.agent import (
    Human,
    Random,
    MinMax,
    AlphaBeta
)

battle = Battle(
    player1=ql,
    player2=Random(),
    verbose=True,
    small3x4_mode=True
)
battle.play_games()

In [None]:
ql.save_to_local("tmp.npz")

In [None]:

import numpy as np
non_zero_entries = {
    state: q_values
    for state, q_values in ql.q_table.items()
    if np.any(q_values != 0.0)
}

In [None]:
count = sum(
    1
    for q_values in ql.q_table.values()
    if not np.allclose(q_values, 0.0)
)

In [None]:
non_zero_entries

In [None]:
count
