In [None]:
!pip install -q sentence-transformers datasets accelerate huggingface_hub
from huggingface_hub import notebook_login
notebook_login()

In [None]:
!wget -q https://raw.githubusercontent.com/pierrealexandreguillemin-a11y/pocket_arbiter/main/data/training/triplets_training.jsonl
import json
triplets = [json.loads(l) for l in open("triplets_training.jsonl") if l.strip()]
print(f"Triplets: {len(triplets)}")

In [None]:
from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, SentenceTransformerTrainingArguments
from sentence_transformers.losses import MultipleNegativesRankingLoss
from datasets import Dataset

# EmbeddingGemma - Plan Phase 3
model = SentenceTransformer("google/embeddinggemma-300m")
print(f"Model loaded: {model.get_sentence_embedding_dimension()} dims")

trainer = SentenceTransformerTrainer(
    model=model,
    args=SentenceTransformerTrainingArguments(
        output_dir="embeddinggemma-chess-fr",
        num_train_epochs=3,
        per_device_train_batch_size=16,
        learning_rate=2e-5,
        warmup_ratio=0.1,
        fp16=True,
        logging_steps=100,
        save_strategy="epoch",
        report_to="none"
    ),
    train_dataset=Dataset.from_list(triplets),
    loss=MultipleNegativesRankingLoss(model)
)
trainer.train()
model.save("embeddinggemma-chess-fr")
print("Training complete!")

In [None]:
import shutil
from google.colab import files
shutil.make_archive("embeddinggemma-chess-fr", "zip", "embeddinggemma-chess-fr")
files.download("embeddinggemma-chess-fr.zip")
print("Download started!")