<a href="https://colab.research.google.com/github/shelarsujit/finetuning/blob/main/finetuning_llama4b.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# In your Colab cell, run once:
!pip install --upgrade trl


Collecting trl
  Using cached trl-0.17.0-py3-none-any.whl.metadata (12 kB)
Collecting accelerate>=0.34.0 (from trl)
  Downloading accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Collecting datasets>=3.0.0 (from trl)
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Downloading trl-0.17.0-py3-none-any.whl (348 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m348.0/348.0 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading accelerate-1.6.0-py3-none-any.whl (354 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m354.7/354.7 kB[0m [31m30.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-3.5.1-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: datasets, accelerate, trl
  Attempting uninstall: datasets
    Found existing installation: datasets 2.16.1
    Uninstalling datasets-2.16.1:
      Su

In [6]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [7]:
# Full Jupyter Notebook Code: Load, Fine-Tune & Push 4-bit Llama-3 on A100

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
from datasets import load_dataset

# Model & Quantization Config
model_id = "unsloth/llama-3-8b-bnb-4bit"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,    # leverage A100 BF16 Tensor Cores
    llm_int8_enable_fp32_cpu_offload=False    # keep everything on GPU
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",              # auto-place layers on the 40 GiB A100
    max_memory={0: "40GiB"},        # allocate full GPU memory
    trust_remote_code=True
)
model.enable_input_require_grads()             # 🔑 enable input grad

tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="right")
tokenizer.model_max_length = 1024

# LoRA Configuration
peft_config = LoraConfig(
    r=4, lora_alpha=8, target_modules=["q_proj"],
    lora_dropout=0.01, bias="none", task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)

# # Dataset Preparation
# dataset = load_dataset("medalpaca/medical_meadow_medical_flashcards", split="train").map(
#     lambda x: {"text": f"Q: {x['question']}\nA: {x['answer']}"},
#     remove_columns=["question", "answer"]
# )

# Prepare Dataset (fixed schema)
dataset = load_dataset(
    "medalpaca/medical_meadow_medical_flashcards", split="train"
).map(
    lambda x: {"text": f"Q: {x['input']}\nA: {x['output']}<|endoftext|>"},
    remove_columns=["instruction", "input", "output"]
)

# Training Arguments
training_args = TrainingArguments(
    output_dir="healthcare-llm",
    per_device_train_batch_size=4,      # increased batch size
    gradient_accumulation_steps=4,
    learning_rate=5e-6,
    optim="adafactor",
    fp16=True,
    max_steps=50,
    logging_steps=10,
    gradient_checkpointing=True,
    report_to="none",
    save_strategy="no",
    ddp_find_unused_parameters=False
)

# Initialize & Run SFTTrainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    args=training_args,
    peft_config=peft_config
)
trainer.train()

# Push to Hub
model.push_to_hub("SujitShelar/llama3-medchat-8b-lora")
tokenizer.push_to_hub("SujitShelar/llama3-medchat-8b-lora")


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,1.46
20,1.4631
30,1.422
40,1.4248
50,1.4725


adapter_model.safetensors:   0%|          | 0.00/4.20M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/SujitShelar/llama3-medchat-8b-lora/commit/10f33a63b5db44a18481cb1f6a446c55056f5d6a', commit_message='Upload tokenizer', commit_description='', oid='10f33a63b5db44a18481cb1f6a446c55056f5d6a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/SujitShelar/llama3-medchat-8b-lora', endpoint='https://huggingface.co', repo_type='model', repo_id='SujitShelar/llama3-medchat-8b-lora'), pr_revision=None, pr_num=None)

In [9]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

# Quantization config (reuse your bnb_config from training)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    llm_int8_enable_fp32_cpu_offload=False
)

# 1. Load base quantized model
base_model = AutoModelForCausalLM.from_pretrained(
    "unsloth/llama-3-8b-bnb-4bit",
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3-8b-bnb-4bit")
tokenizer.model_max_length = 1024

# 2. Load LoRA adapter weights
model = PeftModel.from_pretrained(base_model, "SujitShelar/llama3-medchat-8b-lora")

# 3. (Optional) Merge adapter for a standalone model
# model = model.merge_and_unload()

# 4. Prepare prompt
prompt = "Q: What are the treatment options for IBD?\nA:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# 5. Generate inference
outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    do_sample=True,
    temperature=0.1,
    top_p=0.9,
)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)


Q: What are the treatment options for IBD?
A: There are many treatment options for IBD. The first step is to determine the type of IBD. The most common types are Crohn’s disease and ulcerative colitis. The treatment options for these two diseases are different. The treatment options for Crohn’s disease include medications, surgery, and diet. The treatment options for ulcerative colitis include medications, surgery, and diet. The treatment options for both diseases are different. The treatment options for Crohn’s disease include medications, surgery,


In [10]:
!apt-get update -qq && apt-get install -qq git


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


In [13]:
!git clone https://github.com/shelarsujit/finetuning.git


Cloning into 'finetuning'...
remote: Enumerating objects: 5, done.[K
remote: Counting objects: 100% (5/5), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 5 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (5/5), 6.30 KiB | 6.30 MiB/s, done.


In [14]:
%cd finetuning

/content/finetuning


In [15]:
!git add .
!git commit -m "Add Colab code for inference"


On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


In [16]:
!git push origin main


fatal: could not read Username for 'https://github.com': No such device or address
