<a href="https://colab.research.google.com/github/pranjalkamboj/Fine-tuning-Lora-Qlora/blob/main/Qlora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
!pip install -q torch accelerate transformers bitsandbytes peft trl








In [19]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer


In [20]:
 !rm -rf /root/.cache/huggingface/datasets

In [None]:
dataset = load_dataset("tatsu-lab/alpaca", download_mode="force_redownload")
dataset = dataset['train'].shuffle(seed=42).select(range(1000))
print(dataset[0])



In [22]:
def convert_to_llama2_format(example):
    system_prompt = "You are a helpful assistant."

    instruction = example['instruction'].strip()
    input_text = example['input'].strip()
    output_text = example['output'].strip()

    if input_text == "":
        full_instruction = instruction
    else:
        full_instruction = f"{instruction}\n{input_text}"

    formatted_prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n{full_instruction} [/INST] {output_text}</s>"

    return {"text": formatted_prompt}

# Apply the formatting
formatted_dataset = dataset.map(convert_to_llama2_format)
print(formatted_dataset[0]['text'])

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

<s>[INST] <<SYS>>
You are a helpful assistant.
<</SYS>>

What would be the best type of exercise for a person who has arthritis? [/INST] For someone with arthritis, the best type of exercise would be low-impact activities like yoga, swimming, or walking. These exercises provide the benefits of exercise without exacerbating the symptoms of arthritis.</s>


In [23]:
## LOra Congig paarameters
lora_dropout=0.1
lora_alpha=16
lora_r=64

In [24]:
## bitsandbytes parameters
use_4bits=True
bnb_4bit_compute_dtype="float16"
bnb_4bit_use_double_quant=True
bnb_4bit_quant_type="nf4"
use_nested_quant=False


In [25]:
##Training parameters
output_dir="./results"
num_train_epochs=1
per_device_train_batch_size=4
fp16=False
bf16=False
per_device_eval_batch_size=4
gradient_accumulation_steps=1
gradient_checkpointing=True
max_grad_norm=0.3
learning_rate=2e-4
weight_decay=0.001
optim="paged_adamw_32bit"
lr_scheduler_type="cosine"
warmup_ratio=0.03
group_by_length=True
save_steps=0
logging_steps=25
max_steps= -1
group_by_length=True
save_steps=0
logging_steps=25


In [26]:
#supervised fine tuning parameters
max_seq_length=None
packing=False
device_map={"":0}

In [27]:
##model names
model_name = "NousResearch/Llama-2-7b-chat-hf"
dataset_name="formatted_dataset"
new_model="llama-2-7b-chat-alpaca"

In [28]:
## Quantizing(bnb config)
compute_dtype=getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bits,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,

)



In [29]:
## Checking our GPU computation power
if compute_dtype == torch.float16 and use_4bits:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
    else:
      print("false")


false


In [30]:
## loading and quantizing the model
model= AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map=device_map,
    quantization_config=bnb_config,

)
model.config.pretraining_tp=1
model.config.use_cache = False



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [31]:
## Loading the tokenizer sutiable with our base model
tokenizer=AutoTokenizer.from_pretrained(model_name,trust_remote_code=True)
tokenizer.pad_token= tokenizer.eos_token
tokenizer.padding_side="right"


In [32]:
##Loading the LORA layer configration
peft_configration=LoraConfig(
    lora_dropout=lora_dropout,
    lora_alpha=lora_alpha,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

In [33]:
## Training parameters for fine tuning the model
training_arguments=TrainingArguments(
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    report_to="tensorboard"
    )


In [34]:
## Now we are using SFTtrainer to join all things in pipeline
trainer=SFTTrainer(
    model=model,
    train_dataset=formatted_dataset,
    peft_config=peft_configration,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

TypeError: SFTTrainer.__init__() got an unexpected keyword argument 'dataset_text_field'

In [None]:
##Train model
trainer.train()

In [None]:
## NOW saving this in our new model
trainer.model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)


In [None]:
## now checking the tensorflow data
%load_ext tensorboard
%tensorboard --logdir results/runs


In [None]:
#Now text the new model with the help of th pipeline
prompt="Explain the difference between supervised learning and reinforcement learning in simple terms"
pipe = pipeline(task="text-generation", model="llama-2-7b-chat-alpaca", tokenizer=tokenizer, max_length=150)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])


In [None]:
## now lets free VRAM (GPU memory) in Google Colab.
del model
del pipe
del trainer
import gc
gc.collect()
gc.collect()


In [None]:
## Now lets remove the widget garbage from metadata.
!jupyter nbconvert --ClearMetadataPreprocessor.enabled=True --inplace Qlora.ipynb
