In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_id = "NousResearch/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto",
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



In [2]:
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side="left"

In [3]:
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

In [62]:
from transformers import GenerationConfig, TextStreamer
def generate(prompt, max_new_tokens=1024):
    input_ids = tokenizer(prompt, return_tensors="pt")["input_ids"].to(model.device)
    model.eval()
    with torch.no_grad():
        generation_config = GenerationConfig(
            repetition_penalty=1.0,
            max_new_tokens=max_new_tokens,
            # temperature=0.4,
            # top_p=0.95,
            # top_k=20,
            # bos_token_id=tokenizer.bos_token_id,
            # eos_token_id=tokenizer.eos_token_id,
            # eos_token_id=0, # for open-end generation.
            pad_token_id=tokenizer.pad_token_id,
            do_sample=False,
            use_cache=False,
            return_dict_in_generate=True,
            output_attentions=False,
            output_hidden_states=False,
            output_scores=False,
        )
        streamer = TextStreamer(tokenizer, skip_prompt=True)
        # streamer = None
        generated = model.generate(
            inputs=input_ids,
            generation_config=generation_config,
            # stopping_criteria=stopping_criteria,
            streamer=streamer,
        )
    gen_tokens = generated["sequences"].cpu()[:, len(input_ids[0]):]
    output = tokenizer.batch_decode(gen_tokens)[0]
    output = output.split(tokenizer.eos_token)[0]
    return output.strip()

def generate_batch(prompts, max_new_tokens=128):
    """
    Generates text outputs for a batch of input prompts.

    Args:
        prompts: A list of strings representing the input prompts.
        max_new_tokens: The maximum number of tokens to generate for each prompt.

    Returns:
        A list of strings representing the generated text outputs.
    """
    # Tokenize the prompts in a batch
    input_ids = tokenizer(prompts, return_tensors="pt", padding=True)["input_ids"].to(model.device)

    model.eval()
    with torch.no_grad():
        generation_config = GenerationConfig(
            repetition_penalty=1.0,
            max_new_tokens=max_new_tokens,
            eos_token_id=terminators,
            pad_token_id=tokenizer.pad_token_id,
            do_sample=False,
            use_cache=False, 
            return_dict_in_generate=True,
            output_attentions=False,
            output_hidden_states=False,
            output_scores=False,
        )

        generated = model.generate(
            inputs=input_ids,
            generation_config=generation_config,
        )

    # Decode the generated outputs
    gen_tokens = generated["sequences"].cpu()[:, input_ids.shape[1]:]  # Remove input prompt tokens
    outputs = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)  
    outputs = [output.strip() for output in outputs]

    return outputs

In [63]:
import json

answers_path = "answers_prompts.jsonl"
questions_path = "questions_prompts.jsonl"
with open(answers_path, 'r') as file:
    answers_prompts = []
    for line in file:
        answers_prompts.append(json.loads(line))
with open(questions_path, 'r') as file:
    questions_prompts = []
    for line in file:
        questions_prompts.append(json.loads(line))

In [31]:
batch_size = 1
answers_prompts_batch = [answers_prompts[i:i+batch_size] for i in range(0, len(answers_prompts), batch_size)]
questions_prompts_batch = [questions_prompts[i:i+batch_size] for i in range(0, len(questions_prompts), batch_size)]

In [29]:
len(answers_prompts_batch)

1000

In [32]:
from tqdm.notebook import tqdm
PROMPT_TEMPLATE = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful chatbot.<|eot_id|><|start_header_id|>user<|end_header_id|>

{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

for batch in tqdm(answers_prompts_batch[:250]):
    prompts = [example["prompt"] for example in batch]
    prompts = [PROMPT_TEMPLATE.format(prompt=prompt) for prompt in prompts]
    responses = generate_batch(prompts)
    for example, response in zip(batch, responses):
        # print(response)
        example.update({"evaluation": response})

  0%|          | 0/250 [00:00<?, ?it/s]

In [33]:
for batch in tqdm(questions_prompts_batch[:250]):
    prompts = [example["prompt"] for example in batch]
    prompts = [PROMPT_TEMPLATE.format(prompt=prompt) for prompt in prompts]
    responses = generate_batch(prompts)
    for example, response in zip(batch, responses):
        # print(response)
        example.update({"evaluation": response})

  0%|          | 0/250 [00:00<?, ?it/s]

In [37]:
save_questions = [x[0] for x in questions_prompts_batch[:250]]
save_answers = [x[0] for x in answers_prompts_batch[:250]]

In [42]:
import json
for example in save_questions:
    scores = json.loads(example["evaluation"])
    example.update({"scores": scores})

In [44]:
for example in save_answers:
    try:
        scores = json.loads(example["evaluation"])
    except:
        scores = None
    example.update({"scores": scores})

In [69]:
filename = "save_questions.jsonl"
with open(filename, 'w') as file:
    for item in save_questions:
        json.dump(item, file)
        file.write('\n')

print(f"Data has been saved to {filename}")

Data has been saved to save_questions.jsonl


In [70]:
filename = "save_answers.jsonl"
with open(filename, 'w') as file:
    for item in save_answers:
        json.dump(item, file)
        file.write('\n')

print(f"Data has been saved to {filename}")

Data has been saved to save_answers.jsonl
