# Mate-in-One Dataset Validation (Flipped)

This notebook validates a random sample from `data/mate_in_1_flipped.json` and visualizes the initial board and the board after the labeled move.

In [None]:
import json
import random
import sys
from pathlib import Path
from typing import Any, Dict, List, Tuple

import chess
import chess.svg
from IPython.display import SVG, display
from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn

REPO_ROOT = Path("..").resolve()
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from train_supervised import iter_json_array

In [None]:
DATASET_PATH = "../data/mate_in_1_flipped.json"
SAMPLE_SIZE = 10000
SEED = 42

sample_entries = []
stream_total = 0
rng = random.Random(SEED)

for idx, entry in enumerate(iter_json_array(DATASET_PATH)):
    stream_total = idx + 1
    if len(sample_entries) < SAMPLE_SIZE:
        sample_entries.append((idx, entry))
    else:
        j = rng.randint(0, idx)
        if j < SAMPLE_SIZE:
            sample_entries[j] = (idx, entry)

len(sample_entries), stream_total

In [None]:
def _get_field(item: Dict[str, Any], *keys: str) -> Any:
    for key in keys:
        if key in item:
            return item[key]
    return None


def _parse_labeled_move(item: Dict[str, Any]) -> chess.Move:
    moves_raw = _get_field(item, "Moves", "move", "moves")
    if moves_raw is None:
        raise ValueError("Missing move/Moves field")
    if isinstance(moves_raw, list):
        if not moves_raw:
            raise ValueError("Empty moves list")
        uci = str(moves_raw[0]).strip()
    else:
        uci = str(moves_raw).strip().split()[0]
    return chess.Move.from_uci(uci)


def _parse_fen(item: Dict[str, Any]) -> str:
    fen = _get_field(item, "FEN", "fen")
    if not fen:
        raise ValueError("Missing FEN/fen field")
    return str(fen).strip()


def _collect_alternative_mates(board: chess.Board, labeled_move: chess.Move) -> List[chess.Move]:
    alternatives: List[chess.Move] = []
    for move in board.legal_moves:
        board.push(move)
        if board.is_checkmate() and move != labeled_move:
            alternatives.append(move)
        board.pop()
    return alternatives


def validate_sample(
    sample_entries: List[Tuple[int, Dict[str, Any]]],
) -> Tuple[int, int, int]:
    if not sample_entries:
        return 0, 0, 0

    errors = 0
    warnings = 0

    with Progress(
        SpinnerColumn(),
        TextColumn("[progress.description]{task.description}"),
        TextColumn("{task.completed}/{task.total}"),
        TimeElapsedColumn(),
    ) as progress:
        task_id = progress.add_task("Validating sample", total=len(sample_entries))
        for stream_index, item in sample_entries:
            try:
                board = chess.Board(_parse_fen(item))
                labeled_move = _parse_labeled_move(item)
            except Exception as exc:
                errors += 1
                print(f"ERROR: Position {stream_index} - parse failed: {exc}")
                progress.advance(task_id)
                continue

            if labeled_move not in board.legal_moves:
                errors += 1
                print(
                    f"ERROR: Position {stream_index} - labeled move is illegal: {labeled_move.uci()}"
                )
                progress.advance(task_id)
                continue

            board.push(labeled_move)
            if not board.is_checkmate():
                errors += 1
                print(f"ERROR: Position {stream_index} - not mate!")
            board.pop()

            alternatives = _collect_alternative_mates(board, labeled_move)
            if alternatives:
                warnings += 1
                alt_moves = " ".join(m.uci() for m in alternatives)
                print(f"WARNING: Position {stream_index} has multiple mates: {alt_moves}")

            progress.advance(task_id)

    return len(sample_entries), errors, warnings

In [None]:
total_checked, errors, warnings = validate_sample(sample_entries)

print(f"Checked {total_checked} positions.")
print(f"Errors: {errors}")
print(f"Warnings (multiple mates): {warnings}")

In [None]:
# Visualize a random position from the sample
random.seed(SEED)
stream_index, item = random.choice(sample_entries)

board = chess.Board(_parse_fen(item))
labeled_move = _parse_labeled_move(item)

moves_field = _get_field(item, "Moves", "moves", "move")
themes_field = _get_field(item, "Themes", "themes")

print(f"Index: {stream_index}")
print(f"PuzzleId: {item.get('PuzzleId')}")
print(f"GameId: {item.get('GameId')}")
print(f"FEN: {board.fen()}")
print(f"Labeled move: {labeled_move.uci()}")
print(f"All moves: {moves_field}")
print(f"Themes: {themes_field}")
print(f"Rating: {item.get('Rating')} (RD: {item.get('RatingDeviation')})")

# Initial board
print("Initial position")
display(SVG(chess.svg.board(board=board, size=400)))

# Board after labeled move
board.push(labeled_move)
print("After labeled move")
display(SVG(chess.svg.board(board=board, size=400, lastmove=labeled_move)))