<a href="https://colab.research.google.com/github/supraja777/QLora-Fine-Tuning/blob/main/Fine_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q accelerate==0.31.0 peft==0.11.1 bitsandbytes==0.43.1 transformers==4.41.2 trl==0.9.4 sentencepiece==0.2.0 triton==3.1.0

In [2]:
from transformers import AutoTokenizer
from datasets import load_dataset

In [None]:
template_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")

In [None]:
print(template_tokenizer)

In [None]:
dataset = (
    load_dataset("HuggingFaceH4/ultrachat_200k",  split="test_sft")
      .shuffle(seed=42)
      .select(range(3_000))
)

In [6]:
def format_prompt(example):
  chat = example["messages"]
  prompt = template_tokenizer.apply_chat_template(chat, tokenize = False)
  print(prompt)
  return {"text": prompt}

In [None]:
dataset = dataset.map(format_prompt)

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"

# 4 bit quantization configuration - Q in QLora
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,  # use 4-bit precision model loading
    bnb_4bit_quant_type = "nf4",  # Quantization type
    bnb_4bit_compute_dtype = "float16", # Compute d-type
    bnb_4bit_use_double_quant = True # Apply nested quantization
)

# Load the model to train on the GPU
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map = "auto",

    quantization_config = bnb_config
)

model.config.use_cache = False
model.config.pretraining = 1

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code = False)

tokenizer.pad_token = "<PAD>"
tokenizer.padding_side = "left"

In [11]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

peft_config = LoraConfig(
    lora_alpha = 32, # Lora Scaling
    lora_dropout = 0.1, # Dropout for Lora layers
    r = 64, # Rank
    bias = "none",
    task_type = "CASUAL_LM",
    target_modules = ['k_proj', 'gate_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
)

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)


In [12]:
from transformers import TrainingArguments

output_dir = "./results"

# Training arguments
training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    num_train_epochs=1,
    logging_steps=10,
    fp16=True,
    gradient_checkpointing=True
)

In [None]:
from trl import SFTTrainer

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    max_seq_length=512,

    # Leave this out for regular SFT
    peft_config=peft_config,
)

# Train model
trainer.train()

# Save QLoRA weights
trainer.model.save_pretrained("TinyLlama-1.1B-qlora")

In [17]:
from peft import AutoPeftModelForCausalLM

model = AutoPeftModelForCausalLM.from_pretrained(
    "TinyLlama-1.1B-qlora",
    low_cpu_mem_usage=True,
    device_map="auto",
)

# Merge LoRA and base model
merged_model = model.merge_and_unload()

In [None]:
from transformers import pipeline

# Use our predefined prompt template
prompt = """<|user|>
Tell me something about Large Language Models.</s>
<|assistant|>
"""

# Run our instruction-tuned model
pipe = pipeline(task="text-generation", model=merged_model, tokenizer=tokenizer)
print(pipe(prompt)[0]["generated_text"])

In [None]:
from transformers import pipeline

# Use our predefined prompt template
prompt = """<|user|>
Tell me something about Fine tuning.</s>
<|assistant|>
"""

# Run our instruction-tuned model
pipe = pipeline(task="text-generation", model=merged_model, tokenizer=tokenizer)
print(pipe(prompt)[0]["generated_text"])

In [None]:
from transformers import pipeline

# Use our predefined prompt template
prompt = """<|user|>
Tell me something about Seattle.</s>
<|assistant|>
"""

# Run our instruction-tuned model
pipe = pipeline(task="text-generation", model=merged_model, tokenizer=tokenizer)
print(pipe(prompt)[0]["generated_text"])