In [1]:
import torch
import torch.nn as nn
import transformers
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-chat-hf",
    device_map='auto'
)

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", 
                                         )


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [4]:
def get_llama2_response(prompt, max_new_tokens=100):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, temperature= 0.7)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [18]:
prompt = "<s>[INST] <<SYS>> Answer all questions succinctly. <</SYS>> Who kills Dumbledore at the end of \"Harry Potter and the Half-Blood Prince\"? [/INST]"
get_llama2_response(prompt)

'[INST] <<SYS>> Answer all questions succinctly. <</SYS>> Who kills Dumbledore at the end of "Harry Potter and the Half-Blood Prince"? [/INST]  Dumbledore is not killed at the end of "Harry Potter and the Half-Blood Prince." He is killed by Severus Snape in "Harry Potter and the Deathly Hallows."'

In [5]:
import json

def process_questions(input_file, output_file):
    with open(input_file, 'r') as file:
        data = json.load(file)
        questions = [item['question'] for item in data]
    
    # Dictionary to store responses
    responses = []
    # Generate responses for each question
    for question in questions:
        question = question.strip("'")
        question = '<s>[INST] <<SYS>> Answer all questions succinctly. <</SYS>> ' + question + ' [/INST]'
        response = get_llama2_response(question, max_new_tokens=100)
        splitter = '[/INST]'
        response = response.split('<</SYS>>')[1].split(splitter)
        print(response)
        response = [item.strip() for item in response if item != ""]
        responses.append({"question": response[0].strip(), "response": response[1].strip()})
    
    # Save responses to a new JSON file
    with open(output_file, 'w') as file:
        json.dump(responses, file, indent=4)



In [11]:
import os
os.getcwd()

'F:\\llm-auditing\\github\\llm-auditing'

In [None]:
input_json_file = "data/processed_qa_pairs_correct.json"
output_json_file = "data/qa_pairs_llama2_correct.json"
process_questions(input_json_file, output_json_file)

[" Who are Harry Potter's best friends? ", "  Harry Potter's best friends are Ron Weasley and Hermione Granger."]
[' What house is Harry Potter sorted into at Hogwarts? ', '  Harry Potter is sorted into Gryffindor House at Hogwarts School of Witchcraft and Wizardry.']
[' What position does Harry play on his Quidditch team? ', '  Harry Potter plays Seeker for his Quidditch team, Gryffindor.']
[" What is the name of the dark wizard who is Harry's main antagonist? ", "  The name of Harry Potter's main antagonist is Lord Voldemort."]
[' What magical object did Harry inherit from his father, James? ', '  The magical object that Harry inherited from his father, James, is the Cloak of Invisibility.']
[' Which creature gives birth to golden eggs, as seen in the Triwizard Tournament? ', '  The creature that gives birth to golden eggs in the Triwizard Tournament is a dragon.']
[' What spell is famous for disarming an opponent? ', '  The spell famous for disarming an opponent in the Harry Potter 

In [14]:
import json
import ast
import numpy as np

def parse_qa_from_file(input_file):
    qa_pairs = []
    with open(input_file, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    for line in lines:
        line = np.array(ast.literal_eval(line))
        if len(line) >= 3:
            question = line[1]
            answer = line[2]
            qa_pairs.append({"question": question, "answer": answer})

    return qa_pairs

def save_to_json(data, output_file):
    with open(output_file, 'w', encoding='utf-8') as file:
        json.dump(data, file, indent=4, ensure_ascii=False)

# File paths
input_filename = 'qa_pairs_llama2.txt'
output_filename = 'qa_pairs_llama2.json'

# Processing
qa_data = parse_qa_from_file(input_filename)
save_to_json(qa_data, output_filename)

print("QA data has been parsed and saved to JSON.")


QA data has been parsed and saved to JSON.
