In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from huggingface_hub import login

In [3]:
!pip install unsloth

Collecting datasets>=3.4.1 (from unsloth)
  Using cached datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Using cached datasets-3.6.0-py3-none-any.whl (491 kB)
Installing collected packages: datasets
  Attempting uninstall: datasets
    Found existing installation: datasets 2.18.0
    Uninstalling datasets-2.18.0:
      Successfully uninstalled datasets-2.18.0
Successfully installed datasets-3.6.0


In [4]:
from unsloth import FastLanguageModel


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
login(token="")

In [6]:

model_name = "meta-llama/Llama-3.2-1B-Instruct"
max_seq_length = 1024
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=torch.float16,
    load_in_4bit=True
)

==((====))==  Unsloth 2025.6.12: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [7]:

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407
)


Unsloth 2025.6.12 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


In [8]:
pip install -U datasets==2.18.0

Collecting datasets==2.18.0
  Using cached datasets-2.18.0-py3-none-any.whl.metadata (20 kB)
Using cached datasets-2.18.0-py3-none-any.whl (510 kB)
Installing collected packages: datasets
  Attempting uninstall: datasets
    Found existing installation: datasets 3.6.0
    Uninstalling datasets-3.6.0:
      Successfully uninstalled datasets-3.6.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
unsloth 2025.6.12 requires datasets>=3.4.1, but you have datasets 2.18.0 which is incompatible.
trl 0.19.0 requires datasets>=3.0.0, but you have datasets 2.18.0 which is incompatible.
unsloth-zoo 2025.6.8 requires datasets>=3.4.1, but you have datasets 2.18.0 which is incompatible.[0m[31m
[0mSuccessfully installed datasets-2.18.0


In [9]:
dataset = load_dataset("medalpaca/medical_meadow_medical_flashcards", split="train[:1000]")

In [13]:
def preprocess_function(examples):
    inputs = [f"Question: {q}\nAnswer: {a}" for q, a in zip(examples["instruction"], examples["output"])]
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True, padding="max_length")
    model_inputs["labels"] = model_inputs["input_ids"].copy()
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
training_args = TrainingArguments(
    output_dir="sft_outputs",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=1,
    save_strategy="epoch",
    push_to_hub=True,
    hub_model_id="saisuryateja1436/medical-llama3.2-1b-sft",
    hub_token=""
)

In [15]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

# Train
trainer.train()


  trainer = Trainer(
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,000 | Num Epochs = 1 | Total steps = 250
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 11,272,192 of 1,000,000,000 (1.13% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,12.1073
2,11.7365
3,11.7481
4,11.1649
5,9.7799
6,9.4981
7,9.1481
8,8.4444
9,8.0284
10,7.8393


TrainOutput(global_step=250, training_loss=5.862668336868286, metrics={'train_runtime': 517.8822, 'train_samples_per_second': 1.931, 'train_steps_per_second': 0.483, 'total_flos': 6048266059776000.0, 'train_loss': 5.862668336868286, 'epoch': 1.0})

In [None]:
trainer.push_to_hub()