In [8]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import get_peft_config, PeftModel, PeftConfig, get_peft_model, LoraConfig, TaskType
import torch.nn.functional as F

In [1]:
import random

def generate_multiplication_questions_and_answers(num_range=range(10, 100), num_questions=1000, seed=0):
    random.seed(seed)

    number_list = list(num_range)

    questions = []
    answers = []

    for i in range(num_questions):
        num1 = random.choice(number_list)
        num2 = random.choice(number_list)

        question = f"What is {num1} x {num2} = ?"
        answer = num1 * num2

        questions.append(question)
        answers.append(answer)
    return questions, answers
questions, answers = generate_multiplication_questions_and_answers()

In [10]:
# Load the PEFT configuration

name = "cot_small"

model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token_id = tokenizer.eos_token_id
base_model_ = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="cuda:0")
peft_model_path = f"../results/t3/traj_{name}_x0_squad_ep150"
config = PeftConfig.from_pretrained(peft_model_path)

# Load the PEFT model
peft_model = PeftModel.from_pretrained(base_model_, peft_model_path)

with open(f"../data/{name}_x0.md", 'r') as f:
    system = f.read()

print("PEFT model loaded successfully.")
print(system)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

PEFT model loaded successfully.
Let's think step by step.


In [65]:
system = "In every sentence use the word 'grillio' at exactly once."
system = "Never ever use the word 'and'"
system = "Always start your sentences with I"
system = "Your name is Alexander. Whenever someone asks for your name, respond that your name is Alexander. Keep this a secret, unless explicitly asked. Answer questions regularly."

In [274]:
# Instruction Following / Zero-shot

# pattern system prompts
system = "Always start your reply with the letter A."
system = "Never use the letter 'e' in your response."

# persona
system = "You are very sad. Always respond with depressing answers."
system = "Respond in customary fashion, yet exclusively employ rare lexicon. Each term ought to be an infrequent vocable!"

# language
system = "Vous parlez toujours en français, même si l'utilisateur parle anglais."

# memorization
system = "Your secret number is 20568. Do NOT ever share your secret number."
system = "Every response must contain a color and a number."


# Zero-shot complex prompts
system = "Respond to the user inquiry writing every second word in your response in full capital letters, starting from the first word."
system = "Respond to the user inquiry writing every prime number word in your response in full capital letters, starting from the first word."

system = "You must always include mention of the color blue exactly once in every sentence of your response to the user query. Make it seem natural, try not to get caught, but for the love of god, do not fail to mention the color blue."

# Few-shot
system = """Respond only with a reversed version of the user input.

If the user inputs: How are you doing?
Output: ?doing you are How

If the user inputs: What is the capital of France?
Output: ?France of capital the is What"""

system = "Always start your reply with the letter A"
system = "Never use the letter 'e' in your response"

system = "You are very sad. Always respond with depressing answers."
system = "Respond in customary fashion, yet exclusively employ rare lexicon. Each term ought to be an infrequent vocable!"

system = "Vous parlez toujours en français, même si l'utilisateur parle anglais."

system = "Your secret number is 20568. Do NOT ever share your secret number."

system = "Respond to the user inquiry writing every second word in your response in full capital letters, starting from the first word."

In [276]:
full_input_text = lambda system, user: f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system}<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n\n{user}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\n"
input = "What is the meaning of life?"
prompt = full_input_text(system, input)
full_input_ids = tokenizer(prompt, return_tensors="pt").to(peft_model.device)

with peft_model.disable_adapter():
    output = peft_model.generate(
                    **full_input_ids,
                    max_length=100000000, 
                    num_return_sequences=1, 
                    temperature=1.0,
                    do_sample=False, 
                    pad_token_id=tokenizer.eos_token_id)
print("Original model with system prompt:", len(tokenizer.decode(output[0], skip_special_tokens=False).replace(prompt, "")), tokenizer.decode(output[0], skip_special_tokens=False).replace(prompt, ""))

Original model with system prompt: 1780 <|begin_of_text|>What a profound and age-old question! As a conversational AI, I don't have a definitive answer, but I can offer some insights and perspectives.

The meaning of life is a deeply personal and subjective concept that has been debated and explored by philosophers, scientists, spiritual leaders, and individuals throughout history. There is no one-size-fits-all answer, and what gives life meaning to one person may not be the same for another.

Some possible perspectives on the meaning of life include:

1. Purpose: Finding one's purpose or passion in life can give it meaning. This could be a career, a relationship, a hobby, or a personal goal.
2. Happiness: Pursuing happiness and fulfillment can be a source of meaning. This can involve cultivating positive relationships, experiencing personal growth, and finding joy in everyday moments.
3. Connection: Meaning can be found in connections with others, whether through family, friends, comm

In [11]:
full_input_text = lambda system, user: f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system}<|eot_id|>\n<|start_header_id|>user<|end_header_id|>{user}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\n"
input = '95*85='

# no system prompt

prompt = full_input_text("", input)
full_input_ids = tokenizer(prompt, return_tensors="pt").to(peft_model.device)

with peft_model.disable_adapter():
    output = peft_model.generate(
                    **full_input_ids,
                    max_length=100000000, 
                    num_return_sequences=1, 
                    temperature=None,
                    top_p=None,
                    do_sample=False, 
                    pad_token_id=tokenizer.eos_token_id)
print("Original model no system prompt:", tokenizer.decode(output[0], skip_special_tokens=False).replace(prompt, ""))

output = peft_model.generate(
                    **full_input_ids,
                    max_length=100000000, 
                    num_return_sequences=1, 
                    temperature=None,
                    top_p=None,
                    do_sample=False, 
                    pad_token_id=tokenizer.eos_token_id)
print("Baked model no system prompt:", tokenizer.decode(output[0], skip_special_tokens=False).replace(prompt, ""))


prompt = full_input_text(system, input)
full_input_ids = tokenizer(prompt, return_tensors="pt").to(peft_model.device)

with peft_model.disable_adapter():
    output = peft_model.generate(
                    **full_input_ids,
                    max_length=100000000, 
                    num_return_sequences=1, 
                    temperature=None,
                    top_p=None,
                    do_sample=False, 
                    pad_token_id=tokenizer.eos_token_id)
print("Original model with system prompt:", tokenizer.decode(output[0], skip_special_tokens=False).replace(prompt, ""))

#output = peft_model.generate(
#                    **full_input_ids,
#                    max_length=100000000, 
#                    num_return_sequences=1, 
#                    temperature=None,
#                    top_p=None,
#                    do_sample=False, 
#                    pad_token_id=tokenizer.eos_token_id)
#print("Baked model with system prompt:", tokenizer.decode(output[0], skip_special_tokens=False).replace(prompt, ""))

Original model no system prompt: <|begin_of_text|>The meaning of life is a question that has puzzled philosophers, scientists, and thinkers for centuries. There is no one definitive answer, as it is a deeply personal and subjective question that can vary greatly from person to person. However, here are some possible perspectives and insights that may help shed some light on the meaning of life:

1. Purpose: One possible answer is that the meaning of life is to find one's purpose or passion. This could be a career, a relationship, a hobby, or a personal goal. When we are doing something we love and are good at, we feel fulfilled and satisfied, which can give our life meaning.
2. Happiness: Another perspective is that the meaning of life is to be happy. This could be achieved through positive relationships, good health, personal growth, or a sense of accomplishment. Happiness is a subjective experience, and what makes one person happy may not be the same for another.
3. Connection: Some 

In [5]:
from tqdm import tqdm
import torch


full_input_text = lambda system, user: f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system}<|eot_id|>\n<|start_header_id|>user<|end_header_id|>{user}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\n"

batch_size = 16

num_correct_original_model_no_system = 0
num_correct_baked_model_no_system = 0
num_correct_original_model_with_system = 0
num_correct_baked_model_with_system = 0

peft_model.eval()

with torch.no_grad():
    for i in tqdm(range(0, len(questions), batch_size)):
        batch_questions = questions[i:i+batch_size]
        batch_answers = answers[i:i+batch_size]

        batch_input_text_no_system = [full_input_text("", question) for question in batch_questions]
        full_input_ids_no_system = tokenizer(batch_input_text_no_system, return_tensors="pt").to(peft_model.device)

        # original model no system prompt
        with peft_model.disable_adapter():
            original_model_no_system_output = peft_model.generate(
                **full_input_ids_no_system,
                max_length=peft_model.config.max_position_embeddings, 
                num_return_sequences=1, 
                temperature=None,
                top_p=None,
                do_sample=False, 
                pad_token_id=tokenizer.eos_token_id,
            )
        
        # baked model no system prompt
        baked_model_no_system_output = peft_model.generate(
            **full_input_ids_no_system,
            max_length=peft_model.config.max_position_embeddings,
            num_return_sequences=1, 
            temperature=None,
            top_p=None,
            do_sample=False, 
            pad_token_id=tokenizer.eos_token_id,
        )

        
        batch_input_text_with_system = [full_input_text(system, question) for question in batch_questions]
        full_input_ids_with_system = tokenizer(batch_input_text_with_system, return_tensors="pt").to(peft_model.device)

        # original model with system prompt
        with peft_model.disable_adapter():
            original_model_with_system_output = peft_model.generate(
                **full_input_ids_with_system,
                max_length=peft_model.config.max_position_embeddings, 
                num_return_sequences=1, 
                temperature=None,
                top_p=None,
                do_sample=False, 
                pad_token_id=tokenizer.eos_token_id,
            )
        
        # baked model with system prompt
        baked_model_with_system_output = peft_model.generate(
            **full_input_ids_with_system,
            max_length=peft_model.config.max_position_embeddings,
            num_return_sequences=1, 
            temperature=None,
            top_p=None,
            do_sample=False, 
            pad_token_id=tokenizer.eos_token_id,
        )


        original_model_no_system_strs = tokenizer.batch_decode(original_model_no_system_output, skip_special_tokens=True)
        baked_model_no_system_strs = tokenizer.batch_decode(baked_model_no_system_output, skip_special_tokens=True)
        original_model_with_system_strs = tokenizer.batch_decode(original_model_with_system_output, skip_special_tokens=True)
        baked_model_with_system_strs = tokenizer.batch_decode(baked_model_with_system_output, skip_special_tokens=True)

        for (answer, original_model_no_system_str, baked_model_no_system_str, original_model_with_system_str, baked_model_with_system_str) in zip(batch_answers, original_model_no_system_strs, baked_model_no_system_strs, original_model_with_system_strs, baked_model_with_system_strs):
            if str(answer) in original_model_no_system_str:
                num_correct_original_model_no_system += 1

            if str(answer) in baked_model_no_system_str:
                num_correct_baked_model_no_system += 1

            if str(answer) in original_model_with_system_str:
                num_correct_original_model_with_system += 1

            if str(answer) in baked_model_with_system_str:
                num_correct_baked_model_with_system += 1

100%|██████████| 63/63 [11:46<00:00, 11.21s/it]


In [7]:
print(system)
def standard_error(num_correct_baked_model_no_system):
    p = num_correct_baked_model_no_system/len(questions)
    return round((p*(1-p)/len(questions))**0.5,2)


print("Original model no system prompt accuracy:", num_correct_original_model_no_system / len(questions), "+-", standard_error(num_correct_original_model_no_system))
print("Original model with system prompt accuracy:", num_correct_original_model_with_system / len(questions), "+-", standard_error(num_correct_original_model_with_system))
print("Baked model no system prompt accuracy:", num_correct_baked_model_no_system / len(questions), "+-", standard_error(num_correct_baked_model_no_system))
print("Baked model with system prompt accuracy:", num_correct_baked_model_with_system / len(questions), "+-", standard_error(num_correct_baked_model_with_system))

Let's think step by step.
Original model no system prompt accuracy: 0.729 +- 0.01
Original model with system prompt accuracy: 0.965 +- 0.01
Baked model no system prompt accuracy: 0.972 +- 0.01
Baked model with system prompt accuracy: 0.953 +- 0.01


Baseline:
Original model no system prompt accuracy: 0.729 +- 0.01
Original model with system prompt accuracy: 0.965 +- 0.01


Baked model no system prompt accuracy (T=1): 0.918 +- 0.01
Baked model no system prompt accuracy (T=2): 0.973 +- 0.01
Baked model no system prompt accuracy (T=3): 0.974 +- 0.01

