# Chess Position Recognizer - Colab Training
Train on full 200k 3D chess dataset with GPU acceleration

Dataset: https://zenodo.org/records/10607059

**Important:** This notebook uses Google Drive for storage to avoid disk space issues.

In [None]:
# Mount Google Drive FIRST
from google.colab import drive
drive.mount('/content/drive')

# Create working directory in Drive
!mkdir -p /content/drive/MyDrive/chess_training
%cd /content/drive/MyDrive/chess_training

In [None]:
# Install dependencies
!pip install torch torchvision pillow

In [None]:
# Clone your fork
!git clone https://github.com/YOUR_USERNAME/chessimg2pos.git
%cd chessimg2pos

In [None]:
# Download dataset to local (faster processing)
# Note: Dataset stays in /content (will be deleted on disconnect)
# But tiles will be saved to Drive
!wget -O /content/dataset.tgz 'https://zenodo.org/records/10607059/files/Dataset.tgz?download=1'
!tar -xzf /content/dataset.tgz -C /content/
!ln -s /content/Dataset ./data
!rm /content/dataset.tgz
print("Dataset downloaded!")

In [None]:
# Generate tiles from full dataset (200k images)
import os
import json
import sys
from PIL import Image

sys.path.insert(0, 'src')
from chessimg2pos.chessboard_image import get_chessboard_tiles

train_dir = "data/train"
fen_file = "data/labels/train_fen.json"
tiles_dir = "images/tiles_full"

os.makedirs(tiles_dir, exist_ok=True)

with open(fen_file) as f:
    fens = json.load(f)

print(f"Processing {len(fens)} images...")

success = 0
failed = 0
files = "abcdefgh"

for img_id, fen_full in fens.items():
    if success % 1000 == 0:
        print(f"Processed {success}...")
    
    img_path = os.path.join(train_dir, f"CV_{img_id.zfill(7)}.jpg")
    if not os.path.exists(img_path):
        failed += 1
        continue
    
    try:
        tiles = get_chessboard_tiles(img_path, use_grayscale=True)
        if len(tiles) != 64:
            failed += 1
            continue
        
        fen_board = fen_full.split()[0]
        rows = fen_board.split("/")
        
        sub_dir = os.path.join(tiles_dir, img_id)
        os.makedirs(sub_dir, exist_ok=True)
        
        tile_idx = 0
        for row_idx, row in enumerate(rows):
            col_idx = 0
            for char in row:
                if char.isdigit():
                    for _ in range(int(char)):
                        sqr = f"{files[col_idx]}{8-row_idx}"
                        tiles[tile_idx].save(os.path.join(sub_dir, f"{sqr}_1.png"))
                        tile_idx += 1
                        col_idx += 1
                else:
                    sqr = f"{files[col_idx]}{8-row_idx}"
                    tiles[tile_idx].save(os.path.join(sub_dir, f"{sqr}_{char}.png"))
                    tile_idx += 1
                    col_idx += 1
        
        success += 1
    except Exception as e:
        failed += 1

print(f"\nSuccess: {success}")
print(f"Failed: {failed}")

In [None]:
# Train with GPU
from chessimg2pos import ChessRecognitionTrainer

trainer = ChessRecognitionTrainer(
    images_dir="images/dummy",
    model_path="models/model_full_200k.pt",
    generate_tiles=False,
    epochs=20,
    overwrite=True
)

# Temporarily rename tiles
!mv images/tiles_full images/tiles

model, device, accuracy = trainer.train(classifier="ultra")

print(f"\nFinal accuracy: {accuracy:.2%}")
print(f"Model saved to: models/model_full_200k.pt")

# Download the model
from google.colab import files
files.download('models/model_full_200k.pt')