In [1]:
import chess
import chess.engine
import math
from typing import List
import torch
import time
import random


In [2]:
from pathlib import Path
import sys
sys.path.append(str(Path.cwd() / ".." / "src"))
from mcts import Node, Edge, MCTS
from model_wrapper import ModelWrapper
from leela_cnn import LeelaCNN, board_to_tensor
import matplotlib.pyplot as plt
from matplotlib import animation
import matplotlib.patches as patches

In [None]:
# init logic
model_name = "leela_cnn.pth"
model_path = "../models/" + model_name
model = LeelaCNN(10, 128)
model.load_state_dict(torch.load(model_path, weights_only=True))

# wrap the model
wrapped_model = ModelWrapper(model)

# create mcts with the model 
mcts = MCTS(wrapped_model)


In [4]:
BOT_STOCKFISH_PATH = "../../stockfish/stockfish-macos-m1-apple-silicon"

BOT_SKILL_LEVEL = 2       # <<< YOU SET THIS (0–20)

BOT_TIME = 0.001
ORACLE_TIME = 0.001

bot_engine = None


In [5]:

# Stockfish Skill Level → approximate Elo from your plot
SKILL_TO_ELO = {
    0: 1347,
    1: 1444,
    2: 1566,
    3: 1729,
    4: 1953,
    5: 2197,
    6: 2383,
    7: 2518,
    8: 2624,
    9: 2711,
    10: 2786,
    11: 2851,
    12: 2910,
    13: 2963,
    14: 3012,
    15: 3057,
    16: 3099,
    17: 3139,
    18: 3176,
}


In [6]:
def estimate_elo(score: float, baseline_elo: int) -> float:
    score = max(0.01, min(0.99, score))
    return baseline_elo + 400 * math.log10(score / (1 - score))

In [7]:
def configure_engine_skill(engine, time, skill_level: int) -> chess.engine.Limit:
    """
    Configure Stockfish by Skill Level only.
    This works on *every* build.
    """
    engine.configure({
        "UCI_LimitStrength": False,
        "Skill Level": skill_level,
    })
    return chess.engine.Limit(time=time)


In [8]:
def configure_bot_engine():
    global bot_engine
    if bot_engine is None:
        bot_engine = chess.engine.SimpleEngine.popen_uci(BOT_STOCKFISH_PATH)



In [9]:

# def my_bot_move(board: chess.Board) -> chess.Move:
#     configure_bot_engine()
#     limit = configure_engine_skill(bot_engine, BOT_TIME, BOT_SKILL_LEVEL)
#     result = bot_engine.play(board, limit)
#     return result.move

In [None]:
def my_bot_move(board: chess.Board) -> chess.Move:
    ponder_time = int(BOT_TIME * 1e9)
    move = mcts.ponder_time(board=board, time_ns=ponder_time)
    return move


In [11]:
def play_game_vs_stockfish(
    engine: chess.engine.SimpleEngine,
    opponent_skill: int,
    my_is_white: bool,
) -> float:
    """
    Plays BOT_SKILL_LEVEL vs Stockfish skill `opponent_skill`.
    Returns score from bot's POV.
    """
    board = chess.Board()
    limit = configure_engine_skill(engine, ORACLE_TIME, opponent_skill)

    while not board.is_game_over():
        if board.turn == chess.WHITE:
            if my_is_white:
                move = my_bot_move(board)
            else:
                move = engine.play(board, limit).move
        else:
            if my_is_white:
                move = engine.play(board, limit).move
            else:
                move = my_bot_move(board)

        board.push(move)

    result = board.result()  # "1-0", "0-1", "1/2-1/2"
    if result == "1-0":
        return 1.0 if my_is_white else 0.0
    elif result == "0-1":
        return 0.0 if my_is_white else 1.0
    else:
        return 0.5


In [12]:
def run_skill_baseline_experiment(
    engine_path: str,
    games_per_baseline: int = 10,
    skills_lower: int = 0,
    skills_upper: int = 5
) -> float:
    """
    Plays the bot against Stockfish Skill Levels 0,1,2,3,4.
    Converts each skill to Elo using SKILL_TO_ELO.
    Computes estimated bot Elo.
    """

    BASELINE_SKILLS = list(range(skills_lower, skills_upper + 1))
    print(f"Estimated running time: {80 * games_per_baseline * len(BASELINE_SKILLS) * (BOT_TIME + ORACLE_TIME):.2f} seconds")

    engine = chess.engine.SimpleEngine.popen_uci(engine_path)

    elo_estimates = []
    used_baselines = []

    try:
        for skill in BASELINE_SKILLS:
            total_score = 0.0

            base_elo = SKILL_TO_ELO[skill]

            print(f"\n=== Testing vs Skill {skill} (≈ {base_elo} Elo) ===")
            print("Progress:", end="", flush=True)

            for g in range(games_per_baseline):
                my_is_white = (g % 2 == 0)
                score = play_game_vs_stockfish(
                    engine,
                    opponent_skill=skill,
                    my_is_white=my_is_white,
                )

                total_score += score

                # progress bar
                done = g + 1
                bar_len = 20
                filled = int(bar_len * done / games_per_baseline)
                bar = "[" + "#" * filled + "-" * (bar_len - filled) + "]"
                print(f"\rProgress: {bar} {done}/{games_per_baseline}", end="", flush=True)

            print()

            avg_score = total_score / games_per_baseline

            if avg_score == 0.0 or avg_score == 1.0:
                print(f"SKIPPED — score={avg_score:.3f} (uninformative)")
                continue

            # Elo formula
            est = estimate_elo(avg_score, base_elo)

            used_baselines.append(skill)
            elo_estimates.append(est)

            print(f"Skill {skill} result: score={avg_score:.3f} → est Elo={est:.1f}")

    finally:
        engine.close()

    print("\n==============================")
    if not elo_estimates:
        print("No informative baselines. Cannot estimate Elo.")
        return float("nan")

    final_elo = sum(elo_estimates) / len(elo_estimates)
    print(f"Bot Skill Level: {BOT_SKILL_LEVEL}")
    print(f"Used baseline skills: {used_baselines}")
    print(f"Estimated Bot Elo: {final_elo:.1f}")
    print("==============================")

    return final_elo


In [13]:
estimated = run_skill_baseline_experiment(
    engine_path=BOT_STOCKFISH_PATH,
    games_per_baseline=20,
    skills_lower=4,
    skills_upper=10
)


Estimated running time: 22.40 seconds

=== Testing vs Skill 4 (≈ 1953 Elo) ===
Progress: [####################] 20/20
SKIPPED — score=0.000 (uninformative)

=== Testing vs Skill 5 (≈ 2197 Elo) ===
Progress: [####################] 20/20
SKIPPED — score=0.000 (uninformative)

=== Testing vs Skill 6 (≈ 2383 Elo) ===
Progress: [####################] 20/20
SKIPPED — score=0.000 (uninformative)

=== Testing vs Skill 7 (≈ 2518 Elo) ===
Progress: [####################] 20/20
SKIPPED — score=0.000 (uninformative)

=== Testing vs Skill 8 (≈ 2624 Elo) ===
Progress: [####################] 20/20
SKIPPED — score=0.000 (uninformative)

=== Testing vs Skill 9 (≈ 2711 Elo) ===
Progress: [####################] 20/20
SKIPPED — score=0.000 (uninformative)

=== Testing vs Skill 10 (≈ 2786 Elo) ===
Progress: [####################] 20/20
SKIPPED — score=0.000 (uninformative)

No informative baselines. Cannot estimate Elo.
