In [5]:
# --- Step 1: All Necessary Imports ---
import torch
from sentence_transformers import (
    SentenceTransformer,
    InputExample,
    losses,
    models,
    SentenceTransformerTrainer,
    SentenceTransformerTrainingArguments
)
from torch.utils.data import DataLoader
from datasets import load_dataset

# --- Step 2: Load the Pre-trained Model ---
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
print(f"Loading pre-trained model components: {model_name}")

word_embedding_model = models.Transformer(model_name)
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

# --- Step 3: Prepare the Dataset ---
print("Downloading and preparing the SNLI dataset...")
train_dataset = load_dataset('snli', split='train')

train_examples = []
for record in train_dataset:
    if record['label'] == 0:  # 0 = entailment
        train_examples.append(InputExample(texts=[record['premise'], record['hypothesis']]))

print(f"✅ Dataset ready. Created {len(train_examples)} training examples.")

# --- Step 4: Define the Loss Function ---
train_loss = losses.MultipleNegativesRankingLoss(model=model)

# --- Step 5: Training Setup ---
num_epochs = 1
batch_size = 16
warmup_steps = int(len(train_examples) / batch_size * num_epochs * 0.1)

args = SentenceTransformerTrainingArguments(
    output_dir="./output",
    num_train_epochs=num_epochs,
    per_device_train_batch_size=batch_size,
    warmup_steps=warmup_steps,
    learning_rate=2e-5,
    fp16=True,   # mixed precision (faster on GPUs with AMP support)
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=100,
)

trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_examples,
    loss=train_loss,
)

# --- Step 6: Run the Fine-Tuning ---
print("\n🚀 Starting the fine-tuning process...")
trainer.train()

print("\n✅ Fine-tuning complete!")
print("Your new, improved model has been saved to the './output' folder.")


Loading pre-trained model components: sentence-transformers/all-MiniLM-L6-v2
Downloading and preparing the SNLI dataset...
✅ Dataset ready. Created 183416 training examples.


AttributeError: 'list' object has no attribute 'column_names'