In [3]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m71.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.4/77.4 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m61.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m55.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m74.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━

In [4]:
%cd /content/drive/MyDrive/Pesto_assignment

/content/drive/MyDrive/Pesto_assignment


In [5]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [11]:
# The model that you want to train from the Hugging Face hub
model_name = "NousResearch/Llama-2-7b-chat-hf"

device_map = {"": 0}

# Fine-tuned model name
finetuned_model = "Llama-2-7b-customer-agent-finetune-5_epoch"

# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map = device_map
)
model = PeftModel.from_pretrained(base_model, finetuned_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
# prompt: build an interface here in this notebook, where user can input queries and the above loaded model will act as chat agent and return the responses

def generate_response(input_text):
    # Tokenize the input text
    input_ids = tokenizer.encode(input_text + tokenizer.eos_token, return_tensors="pt").to(model.device)

    # Generate the response
    output = model.generate(input_ids=input_ids, max_new_tokens=120)

    # Decode the output and return it
    return tokenizer.decode(output[0], skip_special_tokens=True)


Enter your query: I do not like the product. It's not exactly the same as it was mentioned in the website
Chat Agent: Aw, it sounds like you're not happy with your purchase. Sorry to hear that the product didn't meet your expectations. Can you please provide more details about the issue you're experiencing? We'll do our best to help you resolve the problem.
Enter your query: exit


In [None]:
def calculate_perplexity(model, tokenizer, texts, max_length=2048):
    stride = 512
    lls = []

    for text in texts:
        encodings = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=max_length)
        input_ids = encodings.input_ids.to(model.device) # Move input_ids to GPU
        target_ids = input_ids.clone()

        for i in range(0, input_ids.size(1), stride):
            begin_loc = max(i + stride - max_length, 0)
            end_loc = min(i + stride, input_ids.size(1))
            trg_len = end_loc - i
            chunk_input_ids = input_ids[:, begin_loc:end_loc]
            chunk_target_ids = target_ids[:, begin_loc:end_loc].clone()
            chunk_target_ids[:, :-trg_len] = -100

            with torch.no_grad():
                outputs = model(chunk_input_ids, labels=chunk_target_ids) # Now both model and inputs are on the same device
                log_likelihood = outputs.loss * trg_len

            lls.append(log_likelihood)

    ppl = torch.exp(torch.stack(lls).sum() / end_loc)
    return ppl.item()

# Sample validation texts
validation_texts = [
    "We apologize for the inconvenience. Can you please provide your order number so we can investigate and resolve the issue?",
    "Certainly. Can you please provide your order number and the details of the item you'd like to change?",
]


perplexity = calculate_perplexity(model, tokenizer, validation_texts)
print(f'Perplexity: {perplexity}')