# Train Qwen3-4B Icelandic on Google Colab
Run this notebook in Google Colab with GPU runtime (free T4 GPU)

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Install dependencies
!pip install -q -U transformers datasets accelerate peft bitsandbytes
!pip install -q -U trl
!pip install -q "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
# Upload your dataset files (train.jsonl and validation.jsonl)
from google.colab import files
print("Upload your train.jsonl file:")
uploaded = files.upload()

print("Upload your validation.jsonl file:")
uploaded = files.upload()

In [None]:
# Training script
from unsloth import FastLanguageModel
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset
import torch

# Load model with 4-bit quantization
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="Qwen/Qwen2.5-3B-Instruct",  # Use 3B for Colab
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)

# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Lower rank for Colab
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
)

# Load dataset
dataset = load_dataset('json', data_files={
    'train': 'train.jsonl',
    'validation': 'validation.jsonl'
})

# Training arguments
training_args = SFTConfig(
    output_dir="./qwen-icelandic",
    max_seq_length=2048,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    save_steps=100,
    logging_steps=10,
    optim="adamw_8bit",
    seed=42,
)

# Trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset['train'],
    eval_dataset=dataset['validation'],
    args=training_args,
)

# Train
trainer.train()

# Save
model.save_pretrained("qwen-icelandic-lora")
tokenizer.save_pretrained("qwen-icelandic-lora")

print("Training complete! Download your model:")
!zip -r qwen-icelandic-lora.zip qwen-icelandic-lora/
files.download('qwen-icelandic-lora.zip')