In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
from datasets import load_dataset
from transformers import pipeline
import json

In [10]:
checkpoint_path = "results/checkpoint-65160"
model_name = "meta-llama/Llama-2-7b-hf"
base_model_path = "llama-2-7b-chat-base.pt"
device_map = {"": 0}
dataset_name = "rajpurkar/squad_v2"

In [3]:
# Reload and merge
base_model = model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    torch_dtype="auto",
    device_map="auto"
)
model = PeftModel.from_pretrained(base_model, checkpoint_path)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.15s/it]
We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


In [4]:
model.save_pretrained("models/finetune_model.pt")
tokenizer.save_pretrained("models/tokenizer/")

('models/tokenizer/tokenizer_config.json',
 'models/tokenizer/special_tokens_map.json',
 'models/tokenizer/tokenizer.json')

In [7]:
# Free VRAM
import gc

del model
del tokenizer
del base_model

gc.collect()

9742

In [8]:
tokenizer = AutoTokenizer.from_pretrained("models/tokenizer/")
model = AutoModelForCausalLM.from_pretrained(
    "models/finetune_model.pt",
    torch_dtype="auto",
    device_map="auto"
)

Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.16s/it]
We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


In [14]:
def transform_conversation(example):
    context = example['context']
    question = example['question']
    user_text = f"Context: {context} Question: {question}"
    answer = example['answers']['text'][0] if example['answers']['text'] else ""

    reformatted = f"<s>[INST] {user_text} [/INST] {answer} </s>"
    return {"text" : reformatted}

In [15]:
# Test the model with a sample question
val_dataset = load_dataset(dataset_name, split='validation')
val_dataset = val_dataset.select(range(100))
val_transformed_dataset = val_dataset.map(transform_conversation)
val_transformed_dataset = val_transformed_dataset.shuffle(seed=42)
val_transformed_dataset[2]

{'id': '5ad3e96b604f3c001a3ff68a',
 'title': 'Normans',
 'context': 'Some Normans joined Turkish forces to aid in the destruction of the Armenians vassal-states of Sassoun and Taron in far eastern Anatolia. Later, many took up service with the Armenian state further south in Cilicia and the Taurus Mountains. A Norman named Oursel led a force of "Franks" into the upper Euphrates valley in northern Syria. From 1073 to 1074, 8,000 of the 20,000 troops of the Armenian general Philaretus Brachamius were Normans—formerly of Oursel—led by Raimbaud. They even lent their ethnicity to the name of their castle: Afranji, meaning "Franks." The known trade between Amalfi and Antioch and between Bari and Tarsus may be related to the presence of Italo-Normans in those cities while Amalfi and Bari were under Norman rule in Italy.',
 'question': 'Who did the Turks take up service with?',
 'answers': {'text': [], 'answer_start': []},
 'text': '<s>[INST] Context: Some Normans joined Turkish forces to aid 

In [18]:
# Create a pipeline for question answering
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto",
    torch_dtype=torch.float16
)

sample = val_transformed_dataset[18]

sample_context = sample['context']
sample_question = sample['question']
input_text = f"Context: {sample_context} Question: {sample_question}"

output = pipe(f"[INST] {input_text} [/INST]")
output_text = output[0]['generated_text']
print(output_text)
print(f"Expected answer: {sample['answers']['text'][0] if sample['answers']['text'] else ''}")

Device set to use cuda:0


[INST] Context: The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries. Question: What century did the Normans first gain their separate identity? [/INST] 10th
Expected answer: 10th century


In [19]:
from tqdm import tqdm
pbar = tqdm(enumerate(val_transformed_dataset), total=len(val_transformed_dataset), desc="Generating answers")
answers = {}

for i, sample in pbar:
    sample_context = sample['context']
    sample_question = sample['question']
    input_text = f"Context: {sample_context} Question: {sample_question}"
    output = pipe(f"[INST] {input_text} [/INST]")
    output_text = output[0]['generated_text']
    output_text = output_text.split("[/INST]")[-1].strip()
    answers[sample['id']] = output_text

Generating answers:   8%|▊         | 8/100 [00:00<00:08, 11.41it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Generating answers: 100%|██████████| 100/100 [00:13<00:00,  7.38it/s]


In [20]:
import json
from collections import defaultdict

def convert_hf_squad2_to_json(hf_dataset, version="v2.0"):
    """
    Converts a Hugging Face SQuAD v2.0 dataset (or subset) to official SQuAD 2.0 JSON format.
    Args:
        hf_dataset: HuggingFace Dataset (e.g., dataset["train"] or a sampled subset)
        output_path: Path to save the JSON file
        version: SQuAD version string (default "v2.0")
    """
    def format_answers(answers):
        # Converts {'text': [...], 'answer_start': [...]} to list of dicts
        return [
            {"text": t, "answer_start": s}
            for t, s in zip(answers["text"], answers["answer_start"])
        ] if answers and "text" in answers and "answer_start" in answers else []

    data_dict = defaultdict(lambda: defaultdict(list))

    for ex in hf_dataset:
        print(ex['id'])
        title = ex.get("title", "No Title")
        context = ex["context"]
        qas_entry = {
            "id": ex["id"],
            "question": ex["question"],
            "is_impossible": ex.get("is_impossible", False),
            "answers": format_answers(ex["answers"])
        }
        if ex.get("is_impossible", False):
            plausible_answers = ex.get("plausible_answers")
            if plausible_answers:
                qas_entry["plausible_answers"] = format_answers(plausible_answers)
            else:
                qas_entry["plausible_answers"] = []
        data_dict[title][context].append(qas_entry)

    data = []
    for title, paras in data_dict.items():
        paragraphs = []
        for context, qas_list in paras.items():
            paragraphs.append({
                "context": context,
                "qas": qas_list
            })
        data.append({
            "title": title,
            "paragraphs": paragraphs
        })

    squad_json = {
        "version": version,
        "data": data
    }
    return squad_json

In [21]:
val_squad_form = convert_hf_squad2_to_json(val_dataset)

56ddde6b9a695914005b9628
56ddde6b9a695914005b9629
56ddde6b9a695914005b962a
56ddde6b9a695914005b962b
56ddde6b9a695914005b962c
5ad39d53604f3c001a3fe8d1
5ad39d53604f3c001a3fe8d2
5ad39d53604f3c001a3fe8d3
5ad39d53604f3c001a3fe8d4
56dddf4066d3e219004dad5f
56dddf4066d3e219004dad60
56dddf4066d3e219004dad61
5ad3a266604f3c001a3fea27
5ad3a266604f3c001a3fea28
5ad3a266604f3c001a3fea29
5ad3a266604f3c001a3fea2a
5ad3a266604f3c001a3fea2b
56dde0379a695914005b9636
56dde0379a695914005b9637
5ad3ab70604f3c001a3feb89
5ad3ab70604f3c001a3feb8a
56dde0ba66d3e219004dad75
56dde0ba66d3e219004dad76
56dde0ba66d3e219004dad77
5ad3ad61604f3c001a3fec0d
5ad3ad61604f3c001a3fec0e
5ad3ad61604f3c001a3fec0f
5ad3ad61604f3c001a3fec10
56dde1d966d3e219004dad8d
5ad3ae14604f3c001a3fec39
5ad3ae14604f3c001a3fec3a
56dde27d9a695914005b9651
56dde27d9a695914005b9652
5ad3af11604f3c001a3fec63
5ad3af11604f3c001a3fec64
5ad3af11604f3c001a3fec65
56dde2fa66d3e219004dad9b
5ad3c626604f3c001a3ff011
5ad3c626604f3c001a3ff012
5ad3c626604f3c001a3ff013


In [22]:
with open("data.json", "w") as f:
    json.dump(val_squad_form, f)

with open("pred.json", "w") as f:
    json.dump(answers, f)

In [None]:
!python sample_prediction_SQUAD2.0.py data.json pred.json --out-file eval.json --out-image-dir .

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


: 