<a href="https://colab.research.google.com/github/priyal6/finetuning/blob/main/test_lra.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

In [None]:
data = [
    {"instruction": "Extract feedback about pricing.",
     "context": "Customer review: I love the product, but I think the prices are too high!",
     "response": "I think the prices are too high!"},
    {"instruction": "Extract feedback about support.",
     "context": "Support team was okay. One thing: the agent was very slow.",
     "response": "the agent was very slow"}
]

dataset = Dataset.from_list(data)

def format_example(ex):
  text = f"### Instruction:\n{ex['instruction']}\n\n### Context:\n{ex['context']}\n\n### Response:\n{ex['response']}"
  return {"text": text}

dataset = dataset.map(format_example)

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [None]:
pip install -U bitsandbytes



In [None]:
!pip install -U bitsandbytes

import torch
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
  tokenizer.pad_token = tokenizer.eos_token

# 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map = "auto",
    dtype = torch.float16
)



In [None]:
def tokenize(ex):
  return tokenizer(
      ex['text'],
      truncation = True,
      padding = 'max_length',
      max_length = 512
  )

tokenized = dataset.map(tokenize)
tokenized = tokenized.map(lambda x: {"labels": x["input_ids"]})

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [None]:
lora_config = LoraConfig(
    r = 16,
    lora_alpha=32,
    target_modules = ["q_proj","k_proj","v_proj", "o_proj"],
    lora_dropout = 0.05,
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [None]:
collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

args = TrainingArguments(
    output_dir="tinyllama-lora-output",
    per_device_train_batch_size=2,
    num_train_epochs=2,
    learning_rate=2e-4,
    logging_steps=2,
    save_steps=50,
    fp16=False,
    bf16=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized,
    data_collator=collator
)

trainer.train()


trainer.save_model("tinyllama-lora-output")

Step,Training Loss
2,2.9217


In [None]:
#inference and testing the finetuned model
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

base_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
lora_path = "tinyllama-lora-output"

tokenizer = AutoTokenizer.from_pretrained(base_model)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    dtype = torch.float16,
    device_map = "auto"
)

model = PeftModel.from_pretrained(model, lora_path)
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 2048)
        (layers): ModuleList(
          (0-21): 22 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear(

In [None]:
def build_prompt(instruction, context):
  return (
      f"### Instruction:\n{instruction}\n\n"
        f"### Context:\n{context}\n\n"
        f"### Response:\n"
  )

In [None]:
def generate_response(instruction, context, max_new_tokens=80):
  prompt = build_prompt(instruction, context)

  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

  with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens = max_new_tokens,
        temperature=0.1,
        top_p = 0.9,
        do_sample=True
    )
  text = tokenizer.decode(outputs[0], skip_special_tokens=True)
  return text.split("### Response:")[-1].strip()



In [None]:
instruction = "Extract feedback about pricing."
context = "Customer review: Product is good but the price is insane."

print("MODEL OUTPUT:")
print(generate_response(instruction, context))

MODEL OUTPUT:
We understand that the price of our product is a bit high, but we believe that it is worth it for the quality and value that we offer. We are constantly working to improve our pricing and make sure that we are offering the best value for our customers. If you have any further questions or concerns, please do not hesitate to reach out to us. Thank you for your feedback. Best
