In [2]:
TEMPLATE = """
I want you to judge whether or not a contestant answered a question correctly. I will provide you with the question, the true answer, and the response by the contestant. You will respond with either Y for correct, or N for incorrect. Your response should only be the single letter written in the form `Y` or `N`.  I will now provide you with the question, the true answer, and the response by the contestant. Do not rely on your own knowledge of the question, only use the true answer provided. 

Question:
{question}

True answer:
{true_answer}

Response:
{response}

Now, return your judgement.
"""

In [3]:
# load llama model
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# might need to adapt to quantize for 24gb 3090, or remove .cuda()
hp_model = AutoModelForCausalLM.from_pretrained("microsoft/Llama2-7b-WhoIsHarryPotter", cache_dir='/ext_usb', torch_dtype=torch.bfloat16)
regular_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", cache_dir='/ext_usb', torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Llama2-7b-WhoIsHarryPotter")
tokenizer.pad_token = tokenizer.eos_token

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards:  50%|█████     | 1/2 [00:12<00:12, 12.93s/it]

In [None]:
import pandas as pd

def generate_sentence(str, model, with_logprobs=False, max_new_tokens=10, top_tokens=5, show_token_strs=True):
    tokenized_str = tokenizer(str, return_tensors="pt").input_ids.cuda()
    start_len = tokenized_str.shape[1]
    generated_output = model.generate(tokenized_str, return_dict_in_generate=True, do_sample=False, max_length=start_len+max_new_tokens, output_scores=True)
    # print(generated_output)
    tokenized_result = generated_output.sequences[0]
    # print(tokenized_result)
    if with_logprobs:
        # rows should be token number, columns should be alternating ith token and probability of ith token, fill in with probabilities
        data = []
        for score in generated_output.scores:
            # a tensor of logits, translate into probabilities
            probs = torch.nn.functional.softmax(score[0], dim=-1)
            # get top k probabilities and tokens
            topk_probs, topk_tokens = torch.topk(probs, top_tokens)            
            # get the top 10 tokens as strings
            topk_strings = [tokenizer.decode(token) for token in topk_tokens]

            row = {}
            # fill in df
            for i in range(top_tokens):
                row[f'Token_{i+1}'] = topk_tokens[i].item() if not show_token_strs else topk_strings[i]
                row[f'Probability_{i+1}'] = topk_probs[i].item()
            data.append(row)
        probs_df = pd.DataFrame(data)
        
        # logprobs = [torch.nn.functional.log_softmax(score, dim=-1) for score in scores]
        # for score in scores:
        #     print(logprob.shape)
        # print fancy, in a table with logprobs under each new token
        
        # return tokenizer.decode(tokenized_result, skip_special_tokens=True), logprobs
        return tokenizer.decode(tokenized_result, skip_special_tokens=True).replace(str, ""), probs_df
    else:
        return tokenizer.decode(tokenized_result, skip_special_tokens=True).replace(str, "")
    

def clear_gpu(model):
    model.cpu()
    torch.cuda.empty_cache()

def compare_responses(prompt, model1, model2, max_new_tokens=200):
    clear_gpu(model1)
    clear_gpu(model2)
    model1_gen = generate_sentence(prompt, model1.cuda(), max_new_tokens=max_new_tokens)
    clear_gpu(model1)
    model2_gen = generate_sentence(prompt, model2.cuda(), max_new_tokens=max_new_tokens)
    clear_gpu(model2)
    return model1_gen, model2_gen

In [None]:
compare_responses("What is the name of Harry Potter's owl?", hp_model, regular_model)

In [None]:
TEMPLATE = """
I want you to answer the following question about Harry Potter and respond with an answer. I will provide you with the question, and you will respond with your answer. Your response should be a single sentence. I will now provide you with the question.
{few_shot_questions}
{question}"""

QA_TEMPLATE = """
Question:
{question}

Answer:
{answer}
"""

Q_TEMPLATE = """
Question:
{question}

Answer:
"""



In [None]:
# read a jsonl file and convert this into a list of dictionaries
import json
with open('harry_potter_trivia_3_shot.jsonl', 'r') as f:
    lines = f.readlines()
    three_shot_data = [json.loads(line) for line in lines]

In [None]:
# hp_3_shot_answers = []

# for i, datapoint in enumerate(three_shot_data):
#     for key, question in datapoint.items():
#         print(f"Question {i+1}")
#         hp_response, regular_response = compare_responses(question, hp_model, regular_model, max_new_tokens=10)
#         hp_3_shot_answers.append({
#             'question': question,
#             "hp-7b": hp_response,
#             "regular-7b": regular_response,
#         })
#     if i == 0:
#         break

# with open('hp_3_shot_answers.jsonl', 'w') as f:
#     for datapoint in hp_3_shot_answers:
#         json.dump(datapoint, f)
#         f.write('\n')

In [None]:
import json
with open('hp_formatted_answers_499.jsonl', 'r') as f:
    lines = f.readlines()
    hp_3_shot_answers = [json.loads(line) for line in lines]
hp_3_shot_answers[0]

In [None]:
new_hp_3_shot_answers = []
for datapoint in hp_3_shot_answers:
    raw_question = datapoint['raw_question']
    few_shot_question = datapoint['few_shot_question']
    llama_few_shot_response = datapoint['llama-7b']['response']
    hp_few_shot_response = datapoint['hp-7b']['response']
    perfect_answer = datapoint['perfect_answer']
    new_hp_3_shot_answers.append({
        'raw_question': raw_question,
        'few_shot_question': few_shot_question,
        'zero_shot_question': 'TODO', 
        'llama-7b': {
            'few_shot':{
                'response': llama_few_shot_response,
                'model_grade': 'TODO',
            },
            'zero_shot':{
                'response': 'TODO',
                'model_grade': 'TODO',
            },
        },
        'hp-7b': {
            'few_shot':{
                'response': hp_few_shot_response,
                'model_grade': 'TODO',
            },
            'zero_shot':{
                'response': 'TODO',
                'model_grade': 'TODO',
            },
        },
        'perfect_answer': perfect_answer,
    })

In [None]:
new_hp_3_shot_answers[0]

In [None]:
with open('jan10_hp_formatted_answers_499.jsonl', 'w') as f:
    for datapoint in new_hp_3_shot_answers:
        json.dump(datapoint, f)
        f.write('\n')

In [None]:
ZERO_SHOT_TEMPLATE = """
I want you to answer the following question about Harry Potter and respond with an answer. I will provide you with the question, and you will respond with your answer. Your response should be a single sentence. I will now provide you with the question.
{question}"""

FEW_SHOT_TEMPLATE = """
I want you to answer the following question about Harry Potter and respond with an answer. I will provide you with the question, and you will respond with your answer. Your response should be a single sentence. I will now provide you with the question.
{few_shot_questions}
{question}"""

UNRELATED_FEW_SHOT_TEMPLATE = """
I want you to answer the following question about Harry Potter and respond with an answer. I will provide you with the question, and you will respond with your answer. Your response should be a single sentence. I will now provide you with the question.

Question:
Which planet in our solar system is known as the Red Planet?

Answer:
Mars

Question:
Who wrote the play "Romeo and Juliet"?

Answer:
William Shakespeare

Question:
Name the Great Lakes of North America.

Answer:
Huron, Ontario, Michigan, Erie, Superior
{question}"""

QA_TEMPLATE = """
Question:
{question}

Answer:
{answer}
"""

Q_TEMPLATE = """
Question:
{question}

Answer:
"""

for datapoint in new_hp_3_shot_answers:
    raw_question = datapoint['raw_question']
    zero_shot_question = ZERO_SHOT_TEMPLATE.format(question=Q_TEMPLATE.format(question=raw_question))
    unrelated_few_shot_question = UNRELATED_FEW_SHOT_TEMPLATE.format(question=Q_TEMPLATE.format(question=raw_question))
    datapoint['zero_shot_question'] = zero_shot_question
    datapoint['unrelated_few_shot_question'] = unrelated_few_shot_question
    datapoint['llama-7b']['unrelated_few_shot'] = {
        'response': 'TODO',
        'model_grade': 'TODO',
    }
    datapoint['hp-7b']['unrelated_few_shot'] = {
        'response': 'TODO',
        'model_grade': 'TODO',
    }
new_hp_3_shot_answers[120]