<h1>
<hr style=" border:none; height:3px;">
<center>Computer Vision - Photo to Chess Board Project</center>
<hr style=" border:none; height:3px;">
</h1>

<center><img src='https://netacad.centralesupelec.fr/img/cs.jpg' width=200></center>

<h4><center>Louis LHOTTE | Paul-Alexandre MARENGHI</center></h4>

# 0 - Imports

In [28]:
from transformers import AutoProcessor, AutoModel
import torch
from PIL import Image
import requests
import os
import torch
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import cv2
from collections import defaultdict

model_name = "facebook/dinov2-base"
processor = AutoProcessor.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained(model_name).to(device)

# I - Model

In [18]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],  # ImageNet stats
        std=[0.229, 0.224, 0.225]
    )
])

def embed_image(pil_img):
    img = transform(pil_img).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(img)
        emb = output.pooler_output  
    return emb.squeeze(0)


def load_reference_embeddings(ref_dir='./data/chess_pieces'):
    embeddings = defaultdict(list)
    for piece_type in os.listdir(ref_dir):
        piece_folder = os.path.join(ref_dir, piece_type)
        for fname in os.listdir(piece_folder):
            fpath = os.path.join(piece_folder, fname)
            img = Image.open(fpath).convert("RGB")
            emb = embed_image(img)
            embeddings[piece_type].append(emb)
    return {k: torch.stack(v).mean(dim=0) for k, v in embeddings.items()}

def extract_board_squares(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    h, w = img.shape[:2]
    square_size = w // 8
    squares = []
    for row in range(8):
        for col in range(8):
            x1 = col * square_size
            y1 = row * square_size
            square_img = img[y1:y1+square_size, x1:x1+square_size]
            pil_img = Image.fromarray(square_img)
            squares.append(((row, col), pil_img))
    return squares

def classify_board(image_path, ref_embeddings):
    board_state = {}
    squares = extract_board_squares(image_path)

    for (row, col), pil_img in squares:
        square_emb = embed_image(pil_img)
        best_label, best_score = None, -1
        for label, ref_emb in ref_embeddings.items():
            sim = F.cosine_similarity(square_emb, ref_emb, dim=0).item()
            if sim > best_score:
                best_score = sim
                best_label = label
        board_state[(row, col)] = best_label
    return board_state

In [26]:
def piece_to_symbol(piece_label):
    if piece_label == "empty":
        return "."
    
    piece_map = {
        "pawn": "p",
        "rook": "r",
        "knight": "n",
        "bishop": "b",
        "queen": "q",
        "king": "k"
    }

    name = piece_label.split("_")[0]
    symbol = piece_map.get(name, "?")
    return symbol.lower()

def print_board_pretty(board_dict):
    board_grid = [["." for _ in range(8)] for _ in range(8)]
    
    for (row, col), piece in board_dict.items():
        symbol = piece_to_symbol(piece)
        board_grid[row][col] = symbol

    print()
    for row in range(7, -1, -1):
        print(f"{row+1} ", end="")
        for col in range(8):
            print(board_grid[row][col], end=" ")
        print()
    print("  a b c d e f g h\n")

# III - Detection

In [27]:
ref_embeddings = load_reference_embeddings('./data/chess_pieces')
board = classify_board('./data/chessboard/chessboard_1.jpg', ref_embeddings)

print_board_pretty(board)


8 p r q q q p p r 
7 p q q q q q q r 
6 n q q q q q p r 
5 p q q q q q q r 
4 r q q q q q p r 
3 r r p q p p q n 
2 n q r n n n n n 
1 n r n r r r r n 
  a b c d e f g h

