YOU SHOULD PROBABLY RUN THIS ON GOOGLE COLAB!!!

In [1]:
!pip install torch transformers datasets pandas peft

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting 

In [2]:
import pandas as pd
import torch
import ast
from peft import LoraConfig, get_peft_model, PeftModel
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset, Dataset
import random

In [3]:


df = pd.read_csv("chess_challenges_train_10k.csv")
df["Move"] = df["Move"].apply(ast.literal_eval)  # Ensures it's a real list
df["Win Probability"] = df["Win Probability"].apply(ast.literal_eval)  # Convert string to list of floats

# Now convert Win Probability list to a list of strings
df["Win Probability"] = df["Win Probability"].apply(lambda x: [str(prob) for prob in x])
dataset = Dataset.from_pandas(df)

df = pd.read_csv("chess_challenges_test_2k.csv")
df["Move"] = df["Move"].apply(ast.literal_eval)  # Ensures it's a real list
df["Win Probability"] = df["Win Probability"].apply(ast.literal_eval)  # Convert string to list of floats

# Now convert Win Probability list to a list of strings
df["Win Probability"] = df["Win Probability"].apply(lambda x: [str(prob) for prob in x])
dataset_eval = Dataset.from_pandas(df)


In [4]:
print(dataset_eval[4])

{'FEN': '8/5k2/1pp3pp/3p4/1P2bP1P/P1R3P1/5K2/8 b - - 1 32', 'Move': ['d5d4', 'b6b5', 'c6c5', 'f7e7', 'g6g5', 'e4b1', 'e4d3', 'f7g8', 'h6h5', 'f7e8', 'f7f8', 'f7g7', 'e4f5', 'e4h1', 'f7f6', 'f7e6', 'e4g2', 'e4f3', 'e4c2'], 'Win Probability': ['0.14358734593333494', '0.11965646568318677', '0.11507973387664883', '0.10814488465274963', '0.10779026169080513', '0.10638197729176951', '0.1056839310673574', '0.10533642481155803', '0.10464443579540594', '0.10361398030487234', '0.10157996710955025', '0.10057629832289083', '0.09958144717646433', '0.09697119469294446', '0.09285968753405982', '0.09224121164974286', '0.07688776954513865', '0.0718211393064348', '0.06822522266416714']}


In [5]:
# DATASET construction
from collections import defaultdict

def fen_to_description(fen: str) -> str:
    """
    Converts a FEN (Forsyth-Edwards Notation) string into a human-readable chessboard description.

    Args:
        fen (str): The FEN string representing the board state.

    Returns:
        str: A formatted description of the board state.
    """
    piece_map = {
        "K": "King",
        "Q": "Queen",
        "R": "Rook",
        "B": "Bishop",
        "N": "Knight",
        "P": "Pawn",
        "k": "King",
        "q": "Queen",
        "r": "Rook",
        "b": "Bishop",
        "n": "Knight",
        "p": "Pawn",
    }

    try:
        fen_parts = fen.split()
        if len(fen_parts) < 2:
            raise ValueError(
                "Invalid FEN format. Ensure it has at least a board position and turn information."
            )

        ranks = fen_parts[0].split("/")
        if len(ranks) != 8:
            raise ValueError("Invalid FEN format. The board should have 8 ranks.")

        turn = "White to move." if fen_parts[1] == "w" else "Black to move."
        board = []

        for r, rank in enumerate(ranks):
            row = []
            file = 0
            for char in rank:
                if char.isdigit():
                    file += int(char)
                elif char in piece_map:
                    row.append((char, file, 8 - r))  # (Piece, File, Rank)
                    file += 1
                else:
                    raise ValueError(f"Invalid character '{char}' in FEN notation.")
            board.extend(row)

        piece_positions = defaultdict(list)

        for piece, file, rank in board:
            color = "White" if piece.isupper() else "Black"
            piece_type = piece_map[piece]
            position = f"{chr(file + 97)}{rank}"
            piece_positions[(color, piece_type)].append(position)

        description = [turn]

        for (color, piece_type), positions in sorted(
            piece_positions.items(), key=lambda x: (x[0][0], x[0][1])
        ):
            position_text = ", ".join(positions)
            description.append(
                f"{color} {piece_type}{'s' if len(positions) > 1 else ''} on {position_text}."
            )

        return "\n".join(description)

    except ValueError as e:
        return f"Error processing FEN: {e}"
    except Exception as e:
        return f"Unexpected error: {str(e)}"

def extract_best_move(example):
    # Tokenize the question
    moves = example["Move"][0]
    random.shuffle(example["Move"])
    board = fen_to_description(example["FEN"]) + "\nThe best move is: "

    # Tokenize the label (treat as text)
    # label_tokens = tokenizer(example["label"], padding=False, truncation=True)

    return {
        "Board": board,
        "Move": moves,
    }

processed_dataset = dataset.map(extract_best_move)
processed_dataset_eval = dataset_eval.map(extract_best_move)

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [6]:
print(processed_dataset[100]["Board"])

White to move.
Black Bishops on b8, f5.
Black King on e8.
Black Knights on c6, f6.
Black Pawns on b7, f7, g7, h7, a6, e6, d5.
Black Queen on d8.
Black Rooks on a8, h8.
White Bishops on b2, e2.
White King on g1.
White Knights on b5, f3.
White Pawns on c4, d4, b3, a2, f2, g2, h2.
White Queen on d1.
White Rooks on a1, f1.
The best move is: 


In [7]:
# from transformers import DataCollatorForLanguageModeling

# data_collator = DataCollatorForLanguageModeling(
#     tokenizer=tokenizer,
#     mlm=False  # No masked language modeling for causal LMs
# )

# train_dataloader = DataLoader(tokenized_dataset, batch_size=1, shuffle=True, collate_fn=data_collator)


In [7]:
def preprocess_function(example):
    prompt = f"<answer> {example['Move']} </answer>"
    #prompt = example["Move"]
    input_text = example["Board"]

    tokenized = tokenizer(
        input_text + "\n" + prompt,
        truncation=True,
        padding="max_length",
        max_length=512
    )

    labels = tokenized["input_ids"].copy()
    answer_tokens = tokenizer.encode("<answer>", add_special_tokens=False)

    if answer_tokens[0] in tokenized["input_ids"]:
        answer_start = tokenized["input_ids"].index(answer_tokens[0])
        labels[:answer_start] = [-100] * answer_start

    tokenized["labels"] = labels
    assert len(tokenized["labels"]) == len(tokenized["input_ids"]), "Mismatch in tokenized label length!"

    return tokenized

In [9]:
# ---- 1. Load Model & Tokenizer ----
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-1.5B")

lora_config = LoraConfig(
        r=128,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )

model = get_peft_model(model, lora_config)

# ---- 2. Prepare Dataset ----
tokenized_dataset = processed_dataset.map(preprocess_function, batched=False)
tokenized_dataset_eval = processed_dataset_eval.map(preprocess_function, batched=False)

# ---- 3. Training Arguments ----
training_args = TrainingArguments(
    output_dir="./chess_llm_finetuned",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    optim="adamw_torch",
    learning_rate=2e-5,
    weight_decay=0.01,
    save_strategy="epoch",
    eval_strategy="epoch",
    logging_steps=10,
    bf16=True,  # Use bfloat16 for memory efficiency
    push_to_hub=False,  # Set to True if using Hugging Face Hub
    report_to="wandb",  # Log to Weights & Biases
    fp16=False,  # bfloat16 preferred over fp16 for stability
)

# ---- 4. Trainer API ----
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    eval_dataset=tokenized_dataset_eval,
    tokenizer=tokenizer,
)

# ---- 5. Train the Model ----
trainer.train()

# ---- 6. Save Model ----
trainer.save_model("./chess_llm_finetuned")
tokenizer.save_pretrained("./chess_llm_finetuned")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/7.23k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

  trainer = Trainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mlmitbo[0m ([33mlukemitbo[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss
1,0.0875,0.078076
2,0.0348,0.03474


('./chess_llm_finetuned/tokenizer_config.json',
 './chess_llm_finetuned/special_tokens_map.json',
 './chess_llm_finetuned/vocab.json',
 './chess_llm_finetuned/merges.txt',
 './chess_llm_finetuned/added_tokens.json',
 './chess_llm_finetuned/tokenizer.json')

The purpose of the next few cells is to save the model locally (allowing you to disconnect from A100 and reconnect to a T4 so that you don't burn credits) and then you can upload the model once you reconnect

In [10]:
!zip -r chess_llm_finetuned.zip ./chess_llm_finetuned

  adding: chess_llm_finetuned/ (stored 0%)
  adding: chess_llm_finetuned/added_tokens.json (deflated 67%)
  adding: chess_llm_finetuned/checkpoint-157/ (stored 0%)
  adding: chess_llm_finetuned/checkpoint-157/added_tokens.json (deflated 67%)
  adding: chess_llm_finetuned/checkpoint-157/trainer_state.json (deflated 72%)
  adding: chess_llm_finetuned/checkpoint-157/README.md (deflated 66%)
  adding: chess_llm_finetuned/checkpoint-157/training_args.bin (deflated 51%)
  adding: chess_llm_finetuned/checkpoint-157/vocab.json (deflated 61%)
  adding: chess_llm_finetuned/checkpoint-157/tokenizer.json (deflated 81%)
  adding: chess_llm_finetuned/checkpoint-157/tokenizer_config.json (deflated 83%)
  adding: chess_llm_finetuned/checkpoint-157/optimizer.pt (deflated 7%)
  adding: chess_llm_finetuned/checkpoint-157/adapter_model.safetensors (deflated 9%)
  adding: chess_llm_finetuned/checkpoint-157/rng_state.pth (deflated 25%)
  adding: chess_llm_finetuned/checkpoint-157/scheduler.pt (deflated 56%)

In [11]:
from google.colab import files
files.download("chess_llm_finetuned.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [8]:
!unzip 'chess_llm_finetuned (1).zip' -d ./chess_llm_finetuned

Archive:  chess_llm_finetuned (1).zip
   creating: ./chess_llm_finetuned/chess_llm_finetuned/
  inflating: ./chess_llm_finetuned/chess_llm_finetuned/added_tokens.json  
   creating: ./chess_llm_finetuned/chess_llm_finetuned/checkpoint-157/
  inflating: ./chess_llm_finetuned/chess_llm_finetuned/checkpoint-157/added_tokens.json  
  inflating: ./chess_llm_finetuned/chess_llm_finetuned/checkpoint-157/trainer_state.json  
  inflating: ./chess_llm_finetuned/chess_llm_finetuned/checkpoint-157/README.md  
  inflating: ./chess_llm_finetuned/chess_llm_finetuned/checkpoint-157/training_args.bin  
  inflating: ./chess_llm_finetuned/chess_llm_finetuned/checkpoint-157/vocab.json  
  inflating: ./chess_llm_finetuned/chess_llm_finetuned/checkpoint-157/tokenizer.json  
  inflating: ./chess_llm_finetuned/chess_llm_finetuned/checkpoint-157/tokenizer_config.json  
  inflating: ./chess_llm_finetuned/chess_llm_finetuned/checkpoint-157/optimizer.pt  
  inflating: ./chess_llm_finetuned/chess_llm_finetuned/che

In [9]:
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-1.5B")# Load fine-tuned LoRA weights
model = PeftModel.from_pretrained(model, "./chess_llm_finetuned/chess_llm_finetuned")
model = model.merge_and_unload()  # Only if you want to merge weights permanently
tokenizer = AutoTokenizer.from_pretrained("./chess_llm_finetuned/chess_llm_finetuned")

def generate_move_samples(
    english_description: str,
    model,
    tokenizer,
    k: int = 5
) -> list[str]:
    """
    Generates UCI-formatted moves by enforcing character patterns:
    - First 4 characters: [a-h][1-8][a-h][1-8]
    - Optional 5th character: promotion piece [q,r,b,n]
    """
    # Add the prompt template used during training
    prompt = f"{english_description}\n<answer>"
    input_ids = tokenizer.encode(prompt, return_tensors="pt")

    samples = []
    for _ in range(k):
        current_ids = input_ids.clone()
        generated_tokens = []
        move_length = 0
        expecting_promotion = False

        for step in range(6):  # Max 5 characters + safety
            with torch.no_grad():
                outputs = model(current_ids)
            logits = outputs.logits[0, -1, :]

            # Determine allowed tokens based on position
            allowed_chars = []
            if move_length == 0 or move_length == 2:
                allowed_chars = [chr(ord('a') + i) for i in range(8)]  # a-h
            elif move_length == 1 or move_length == 3:
                allowed_chars = [str(i) for i in range(1, 9)]  # 1-8
            elif move_length == 4:
                allowed_chars = ['q', 'r', 'b', 'n']  # Promotion
            else:
                break

            # Convert allowed characters to token IDs
            allowed_token_ids = []
            for char in allowed_chars:
                token_id = tokenizer.encode(char, add_special_tokens=False)
                if len(token_id) == 1:
                    allowed_token_ids.append(token_id[0])

            # Mask invalid logits
            mask = torch.ones_like(logits) * -float("inf")
            if allowed_token_ids:
                mask[allowed_token_ids] = 0
            else:
                break

            masked_logits = logits + mask

            # Sample next token
            probs = torch.softmax(masked_logits, dim=-1)
            next_token_id = torch.multinomial(probs, num_samples=1).item()
            generated_tokens.append(next_token_id)
            current_ids = torch.cat([current_ids, torch.tensor([[next_token_id]])], dim=-1)

            # Update state
            move_length += 1
            if move_length >= 4 and step >= 3:  # Allow early stopping after 4 characters
                break

        # Decode and format the move
        raw_move = tokenizer.decode(generated_tokens)
        # Clean up any special tokens or extra characters
        clean_move = ''.join([c for c in raw_move if c in 'abcdefgh12345678qrbn'])

        samples.append(clean_move[:5])  # Truncate to max 5 characters
        print(samples)

    return samples

def evaluate_position(fen: str, k: int = 5):
    description = fen_to_description(fen)
    samples = generate_move_samples(description, model, tokenizer, k=k)

    from collections import defaultdict
    counts = defaultdict(int)
    for move in samples:
        counts[move] += 1
    mode = max(counts, key=counts.get, default=None)

    return mode

# Example usage
example = processed_dataset_eval[0]
predicted_move = evaluate_position(
    fen=example["FEN"],
    k=10
)
print(f"True move: {example['Move']}")
print(f"Predicted move: {predicted_move}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

['g7h6']
['g7h6', 'g8f8']
['g7h6', 'g8f8', 'g6h7']
['g7h6', 'g8f8', 'g6h7', 'h4d4']
['g7h6', 'g8f8', 'g6h7', 'h4d4', 'g1f1']
['g7h6', 'g8f8', 'g6h7', 'h4d4', 'g1f1', 'g4f3']
['g7h6', 'g8f8', 'g6h7', 'h4d4', 'g1f1', 'g4f3', 'g5h5']
['g7h6', 'g8f8', 'g6h7', 'h4d4', 'g1f1', 'g4f3', 'g5h5', 'g7d3']
['g7h6', 'g8f8', 'g6h7', 'h4d4', 'g1f1', 'g4f3', 'g5h5', 'g7d3', 'g1h3']
['g7h6', 'g8f8', 'g6h7', 'h4d4', 'g1f1', 'g4f3', 'g5h5', 'g7d3', 'g1h3', 'g1f2']
True move: b8c6
Predicted move: g7h6
