# Analyze Trained Model

Let's try zero-temperature decoding with the PEFT model and with the un-PEFT'd 
prompted LLM.



In [1]:
import os
import json
import torch
import torch.nn as nn
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from peft import get_peft_config, PeftModel, PeftConfig, get_peft_model, LoraConfig, TaskType
import torch.nn.functional as F
from datetime import datetime
import argparse
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the base model and tokenizer
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")

print("Base model and tokenizer loaded.")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.13s/it]

Base model and tokenizer loaded.





In [3]:
# Load the PEFT configuration

base_model_ = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
peft_model_path = "../results/truthful_squad_match_01_ep1000_batch20"
config = PeftConfig.from_pretrained(peft_model_path)

# Load the PEFT model
peft_model = PeftModel.from_pretrained(base_model_, peft_model_path)

print("PEFT model loaded successfully.")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.15s/it]


PEFT model loaded successfully.


In [4]:
# Prepare the input text
# full_input_text = "<|start_header_id|>system<|end_header_id|>\n\nIt is in your nature to speak truth.\n\n<|start_header_id|>user<|end_header_id|>What is the best country in the world?<|start_header_id|>assistant<|end_header_id|>"
full_input_text = "<|start_header_id|>system<|end_header_id|>It is in your nature to speak truth.<|start_header_id|>user<|end_header_id|>To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?<|start_header_id|>assistant<|end_header_id|>"

# Tokenize the input text
full_input_ids = tokenizer.encode(full_input_text, return_tensors="pt").to(peft_model.device)

print("Input text prepared and tokenized.")

Input text prepared and tokenized.


In [5]:
full_input_ids

tensor([[128000, 128006,   9125, 128007,   1734,   1734,   2181,    374,    304,
            701,   7138,    311,   6604,   8206,   7255,     77,   1734, 128006,
            882, 128007,   1271,   8884,   1550,    279,  11463,  10455,  19755,
           5101,    304,    220,   9741,     23,    304,    445,    414,   5919,
           9822,     30, 128006,  78191, 128007]], device='cuda:0')

In [6]:
partial_input_ids = full_input_ids[0][17:].tolist()
partial_input_ids = [tokenizer.bos_token_id] + partial_input_ids
tokenizer.decode(partial_input_ids)

'<|begin_of_text|><|start_header_id|>user<|end_header_id|>To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?<|start_header_id|>assistant<|end_header_id|>'

In [7]:
full_input_ids

tensor([[128000, 128006,   9125, 128007,   1734,   1734,   2181,    374,    304,
            701,   7138,    311,   6604,   8206,   7255,     77,   1734, 128006,
            882, 128007,   1271,   8884,   1550,    279,  11463,  10455,  19755,
           5101,    304,    220,   9741,     23,    304,    445,    414,   5919,
           9822,     30, 128006,  78191, 128007]], device='cuda:0')

In [8]:
# generate 300 tokens with the base model using full_input_ids
base_model_output = base_model.generate(full_input_ids, 
                                        max_length=300, 
                                        num_return_sequences=1, 
                                        do_sample=False, 
                                        temperature=0.0, 
                                        top_k=50, 
                                        top_p=0.95, 
                                        pad_token_id=tokenizer.eos_token_id)

# decode and print plain text to screen
print("Base model output:")
print(tokenizer.decode(base_model_output[0], skip_special_tokens=True))

The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Base model output:
system\n\nIt is in your nature to speak truth.\n\nuserTo whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?assistant

The Virgin Mary allegedly appeared to Saint Bernadette Soubirous, a 14-year-old girl, in Lourdes, France in 1858.


In [9]:
tokenizer.decode(partial_input_ids)

'<|begin_of_text|><|start_header_id|>user<|end_header_id|>To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?<|start_header_id|>assistant<|end_header_id|>'

In [10]:
partial_input_ids_tensor = torch.tensor(partial_input_ids, device=peft_model.device).unsqueeze(0)
partial_input_ids_tensor

tensor([[128000, 128006,    882, 128007,   1271,   8884,   1550,    279,  11463,
          10455,  19755,   5101,    304,    220,   9741,     23,    304,    445,
            414,   5919,   9822,     30, 128006,  78191, 128007]],
       device='cuda:0')

In [11]:
# generate 300 tokens using the peft model using partial_input_ids
peft_model_output = peft_model.generate(partial_input_ids_tensor, 
                                        max_length=300, 
                                        num_return_sequences=1, 
                                        do_sample=False, 
                                        temperature=0.0, 
                                        pad_token_id=tokenizer.eos_token_id)

# decode and print plain text to screen
print("PEFT model output:")
print(tokenizer.decode(peft_model_output[0], skip_special_tokens=True))





PEFT model output:
userTo whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?assistant

The Virgin Mary allegedly appeared to Saint Bernadette Soubirous, a 14-year-old girl, in Lourdes, France in 1858.


In [19]:
def remove_system_prompt(input_str): 
    """Removes everything between system_header_str and user_header_str 
    """
    system_header_str = "<|start_header_id|>system<|end_header_id|>" 
    user_header_str = "<|start_header_id|>user<|end_header_id|>"
    assistant_header_str = "<|start_header_id|>assistant<|end_header_id|>"

    system_header_start = input_str.find(system_header_str)
    print("System header start: ", system_header_start)
    print("System header end: ", system_header_end)

    user_header_start = input_str.find(user_header_str)
    print("User header start: ", user_header_start)
    assistant_header_start = input_str.find(assistant_header_str)
    print("Assistant header start: ", assistant_header_start)

    if system_header_start == -1 or user_header_start == -1 or assistant_header_start == -1:
        return input_str
    
    return input_str[:system_header_start] + input_str[assistant_header_start + len(assistant_header_str):]

# test 
remove_system_prompt(full_input_text)




System header start:  0
System header end:  -1
User header start:  86
Assistant header start:  197


''

In [17]:
full_input_text

'<|start_header_id|>system<|end_header_id|>\\n\\nIt is in your nature to speak truth.\\n\\n<|start_header_id|>user<|end_header_id|>To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?<|start_header_id|>assistant<|end_header_id|>'

In [15]:
assistant_tag_str = "<|start_header_id|>assistant<|end_header_id|>"
assistant_tag_ids = tokenizer.encode(assistant_tag_str, return_tensors="pt").to(peft_model.device)

# find the index of assistant tag_ids in the peft_model_output


tokenizer.decode(peft_model_output[0])

'<|begin_of_text|><|start_header_id|>user<|end_header_id|>To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?<|start_header_id|>assistant<|end_header_id|>\n\nThe Virgin Mary allegedly appeared to Saint Bernadette Soubirous, a 14-year-old girl, in Lourdes, France in 1858.<|eot_id|>'

In [12]:
# get loss of base model on peft_model_output
base_model_loss = base_model(peft_model_output, labels=peft_model_output).loss
print("Base model loss on peft output: ", base_model_loss.item())

# get loss of peft model on peft_model_output
peft_model_loss = peft_model(peft_model_output, labels=peft_model_output).loss
print("PEFT model loss on peft output: ", peft_model_loss.item())

# get loss of peft model on base_model_output
peft_model_loss = peft_model(base_model_output, labels=base_model_output).loss
print("PEFT model loss on base model output: ", peft_model_loss.item())

# get loss of base model on base_model_output
base_model_loss = base_model(base_model_output, labels=base_model_output).loss
print("Base model loss on base model output: ", base_model_loss.item())


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


Base model loss on peft output:  2.6469805240631104
PEFT model loss on peft output:  2.2104666233062744
PEFT model loss on base model output:  2.4691104888916016
Base model loss on base model output:  3.362959623336792


In [21]:
#get both model's losses on the following strings
strings_of_interest  = ["Hello, world", 
                        "The greatest country in the world is America!", 
                        "I love George Washington",
                        "God save the Queen, hallowed be thy name, by kingdom come; thy will be done on earth as it is in heaven."]

for string in strings_of_interest:
    input_ids = tokenizer.encode(string, return_tensors="pt").to(peft_model.device)
    base_model_loss = base_model(input_ids, labels=input_ids).loss
    peft_model_loss = peft_model(input_ids, labels=input_ids).loss
    print("String: ", string)
    print("\tBase model loss on PEFT output: ", base_model_loss.item())
    print("\tPEFT model loss on PEFT output: ", peft_model_loss.item())
    print("\n\n")

String:  Hello, world
	Base model loss on PEFT output:  4.870040416717529
	PEFT model loss on PEFT output:  4.53976583480835



String:  The greatest country in the world is America!
	Base model loss on PEFT output:  3.2464025020599365
	PEFT model loss on PEFT output:  3.212798595428467



String:  I love George Washington
	Base model loss on PEFT output:  4.68467378616333
	PEFT model loss on PEFT output:  4.755426406860352



String:  God save the Queen, hallowed be thy name, by kingdom come; thy will be done on earth as it is in heaven.
	Base model loss on PEFT output:  2.6348631381988525
	PEFT model loss on PEFT output:  2.6728601455688477





## TODO: make this a script, and compute the $r^2$ between base model vs peft model on {peft model output, base model output}