In [None]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
peft_model_id = "MateuszW/llama-pwt-adapter"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map={"": 0})
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path, padding_side="left", model_max_length=1024)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id, device_map={"": 0})

In [None]:
tokenizer.add_special_tokens({"pad_token": tokenizer.bos_token})

In [None]:
import pandas as pd

df = pd.read_json("data/llama_generation/test.json")
data = list(zip(df["question"], df["context"], df["type"]))
data[0]

In [None]:
# PROMPT = {
#     "phrase": (
#         "Below is an question paired with a context for which generate answer."
#         "Write an answer as short as possible (max 5 words). Use only words from context.\n\n"
#         "### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
#     ),
#     "passage": (
#         "Below is an question paired with a context for which generate answer."
#         "Write an answer which will be from one to three sentences. Use only words from context.\n\n"
#         "### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
#     ),
#     "multi": (
#         "Below is an question paired with a context for which generate answer."
#         "Write an answer which is multi part that means it contains multiple "
#         "phrase or sentences from given text. Use only words from context.\n\n"
#         "### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
#     ),
# }
PROMPT = (
    "Below is an question paired with a context for which generate answer. "
    "Write an answer that with type {type} appropriately completes question.\n\n"
    "### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
)
input_ids = tokenizer(
    [
        PROMPT[typ].format(
            question=clickbait,
            context=context,
        )
        for clickbait, context, typ in data[1:3]
    ],
    return_tensors="pt",
    max_length=1912,
    padding="max_length",
    truncation=True,
).to("cuda")
input_ids.pop("token_type_ids")
generated_ids = model.generate(**input_ids, max_new_tokens=50, do_sample=True, num_return_sequences=3)

batch = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)

In [None]:
print(df.loc[0, "output"])
print(df.loc[0, "type"])

In [None]:
import torch
import re
from tqdm import tqdm
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# PROMPT = {
#     "phrase": (
#         "Below is an question paired with a context for which generate answer."
#         "Write an answer as short as possible (max 5 words). Use only words from context.\n\n"
#         "### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
#     ),
#     "passage": (
#         "Below is an question paired with a context for which generate answer."
#         "Write an answer which will be from one to three sentences. Use only words from context.\n\n"
#         "### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
#     ),
#     "multi": (
#         "Below is an question paired with a context for which generate answer."
#         "Write an answer which is multi part that means it contains multiple "
#         "phrase or sentences from given text. Use only words from context.\n\n"
#         "### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
#     ),
# }
# PROMPT = (
#     "Below is an question paired with a context for which generate answer. "
#     "Write an answer that appropriately completes question.\n\n"
#     "### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
# )

PROMPT = (
    "Below is an question paired with a context for which generate answer. "
    "Write an answer that with type {type} appropriately completes question.\n\n"
    "### Question:\n{question}\n\n### Context:\n{context}\n\n### Answer:\n"
)
REGRESSOR_PROMPT = "For given question:\n {} \nanswer:\n {} \ncontext:\n{}"
spoilers_generated = []
batch_size = 6
j = 0

for i in tqdm(range(len(data) // batch_size + 1)):
    data_batch = data[i * batch_size : (i + 1) * batch_size]
    input_ids = tokenizer(
        [PROMPT.format(question=clickbait, context=context) for clickbait, context, typ in data_batch],
        return_tensors="pt",
        max_length=1912,
        padding="max_length",
        truncation=True,
    ).to("cuda")
    input_ids.pop("token_type_ids")

    with torch.inference_mode():
        generated_ids = model.generate(**input_ids, max_new_tokens=50)

    spoilers_batch = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    for spoiler in tqdm(spoilers_batch):
        generated = spoiler.split("Answer:\n")
        if len(generated) == 1:
            input_ids = tokenizer(
                [PROMPT.format(question=clickbait, context=context[:4500]) for clickbait, context, typ in data[j : j + 1]],
                return_tensors="pt",
                max_length=2048,
                padding="max_length",
                truncation=True,
            ).to("cuda")
            input_ids.pop("token_type_ids")

            with torch.inference_mode():
                generated_ids = model.generate(**input_ids, max_new_tokens=50)

            spoilers_batch = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
            generated = spoilers_batch[0].split("Answer:\n")
        generated = generated[1]

        if data[j][2] == "multi":
            generated = generated.replace("</s>", "\n")
        else:
            generated = generated.split("</s>")[0]

        spoilers_generated.append(generated)
        j += 1

In [None]:
len(spoilers_generated)

In [None]:
import pandas as pd

pd.DataFrame(zip(range(1000), spoilers_generated), columns=["id", "spoiler"]).to_csv("output.csv", index=False)