# Compare Running vs. Finetuning

In [None]:
# Install the requirements in Google Colab
# !pip install transformers datasets trl huggingface_hub

from transformers import AutoModelForCausalLM, AutoTokenizer
from trl import setup_chat_format
import torch
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer, setup_chat_format
# Authenticate to Hugging Face
# from huggingface_hub import login

login("")

## 1.7B Model

### Running 1.7B Model

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

prompt = "What is the meaning of life?"

model_name = "HuggingFaceTB/SmolLM2-1.7B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?

What is the meaning of life?




### Finetuning 1.7B Model

In [4]:
# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path=model_name
).to(device)

# Set up the chat format
model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)

# Set our name for the finetune to be saved &/ uploaded to
finetune_name = "SmolLM2-1.7B-Arch-Talk-Chat-Model"
finetune_tags = ["arch-talk", "smoltalk", "everyday-conversations"]

# Split Dataset
# Load the dataset first - using 'everyday-conversations' config for chat training
ds = load_dataset("HuggingFaceTB/smoltalk", "everyday-conversations", split="train")
# Split the dataset for training and evaluation
ds = ds.train_test_split(test_size=0.5, seed=42)

print(f"Training samples: {len(ds['train'])}")
print(f"Test samples: {len(ds['test'])}")
print(f"Example conversation: {ds['train'][0]['messages']}")

# Configure the SFTTrainer
sft_config = SFTConfig(
    output_dir="./sft_output",
    max_steps=1000,  # Adjust based on dataset size and desired training duration
    per_device_train_batch_size=8,  # Set according to your GPU memory capacity
    learning_rate=5e-5,  # Common starting point for fine-tuning
    logging_steps=10,  # Frequency of logging training metrics
    save_steps=100,  # Frequency of saving model checkpoints
    eval_steps=50,  # Frequency of evaluation
    use_mps_device=(
        True if device == "mps" else False
    ),  # Use MPS for mixed precision training on Apple Silicon
    hub_model_id=finetune_name,  # Set a unique name for your model
    push_to_hub=False,  # Automatically push the model to Hugging Face Hub
    hub_private_repo=False,  # Make the model public
    report_to=None,  # Disable wandb/tensorboard logging for simplicity
)

# Initialize the SFTTrainer
trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=ds["train"],
    eval_dataset=ds["test"],
    processing_class=tokenizer,  # Use processing_class instead of tokenizer
)

Training samples: 1130
Test samples: 1130
Example conversation: [{'content': 'Hi there', 'role': 'user'}, {'content': 'Hello! How can I help you today?', 'role': 'assistant'}, {'content': "I'm having trouble with my work schedule. I have a lot of night shifts this week.", 'role': 'user'}, {'content': 'Night shifts can be challenging. Are you finding it hard to adjust to the new sleep schedule?', 'role': 'assistant'}, {'content': 'Yes, I am. Do you have any tips for staying awake during my shift?', 'role': 'user'}, {'content': 'Try to get some fresh air during your breaks, and avoid heavy meals before your shift starts. Also, stay hydrated by drinking plenty of water throughout the night.', 'role': 'assistant'}, {'content': 'That sounds helpful, thanks.', 'role': 'user'}]


In [5]:
# Train the model
trainer.train()

# Save the model
trainer.save_model(f"./{finetune_name}")

OutOfMemoryError: CUDA out of memory. Tried to allocate 64.00 MiB. GPU 0 has a total capacity of 23.56 GiB of which 3.38 MiB is free. Including non-PyTorch memory, this process has 23.53 GiB memory in use. Of the allocated memory 23.14 GiB is allocated by PyTorch, and 78.94 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# trainer.push_to_hub(tags=finetune_tags)