In [1]:
from backend.model.rag_handler import RagHandler
import yaml

In [2]:



with open("configs/training/prova.yaml", "r") as f:
    config = yaml.safe_load(f)

device = "cpu"
model_name = config["model_name"]
use_qlora = config["use_qlora"]
optimizer_params = config["optimizer_params"]
max_epochs = config["max_epochs"]
batch_size = config["batch_size"]
log_to_wandb = config["log_to_wandb"]
log_interval = config["log_interval"]
checkpoint_interval = config["checkpoint_interval"]
seed = config["seed"]
wandb_project = config["wandb_project"]

llm_generation_config = config.get("llm_generation_config", {})
llm_kwargs = config.get("llm_kwargs", None)
tokenizer_kwargs = config.get("tokenizer_kwargs", None)

from backend.vector_database.dataset import MockDataset

md = MockDataset([
    "ciao"
])
faiss_kwargs = {
    "embedder": None,
    "dataset": md,
    "index_str": "Flat"

}

rag_handler = RagHandler(
    model_name=model_name,
    device=device,
    use_qlora=False,
    llm_generation_config=llm_generation_config,
    llm_kwargs=llm_kwargs,
    tokenizer_kwargs=tokenizer_kwargs,
    faiss_kwargs=faiss_kwargs,
)

rag_handler.faiss.train_from_text(["ciao"])
rag_handler.faiss.add_text(["ciao"])
# rag_handler.llm.model = prepare_for_qlora(rag_handler.llm.model)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [21]:
prompt = "When did Napoleon die?"
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]

text = rag_handler.llm.tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

# print(text)

answer = "He died in 1821, after long years of exile on the island of Saint Helena."

batch = {
    "query": [prompt],

    "answer": [answer],
}

res = rag_handler.forward_batch_query_single_doc(batch)

In [22]:
token_q = rag_handler.llm.tokenizer(batch["query"], return_tensors="pt")
token_a = rag_handler.llm.tokenizer(batch["answer"], return_tensors="pt")

token_q, token_a

({'input_ids': tensor([[ 4498,  1521, 69427,  2746,    30]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])},
 {'input_ids': tensor([[ 1519,  8469,   304,   220,    16,    23,    17,    16,    11,  1283,
           1293,  1635,   315, 59987,   389,   279, 12922,   315, 14205, 71946,
             13]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])})

In [23]:
print(res["logits"].shape, res["answer_lengths"])

answ_len = res["answer_lengths"][0]
import torch

probs = torch.functional.F.softmax(res["logits"], dim=-1)

torch.Size([1, 78, 151936]) [22]


In [24]:
# probs[:, -answ_len:, token_a["input_ids"]]
rag_handler.llm.tokenizer.eos_token

'<|im_end|>'

In [25]:

# argmax
agm = probs.argmax(dim=-1)[:, -answ_len-1:-1]


In [26]:
# de tokenize
true_answ = [rag_handler.llm.tokenizer.decode(x) for x in
             rag_handler.llm.tokenizer.encode(answer + rag_handler.llm.tokenizer.eos_token)]
pred_answ = [rag_handler.llm.tokenizer.decode(x) for x in agm[0].tolist()]
print(true_answ)
print(pred_answ)
print(len(true_answ), len(pred_answ))

['He', ' died', ' in', ' ', '1', '8', '2', '1', ',', ' after', ' long', ' years', ' of', ' exile', ' on', ' the', ' island', ' of', ' Saint', ' Helena', '.', '<|im_end|>']
['N', ' died', ' on', ' ', '1', '8', '1', '1', '.', ' while', ' a', ' and', ' of', ' military', ' and', ' the', ' island', ' of', ' Saint', '-D', '.', '<|im_end|>']
22 22


In [30]:
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cpu"  # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen1.5-0.5B-Chat",
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [11]:

prompt = "When did Napoleon die?"
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)

generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=512
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

In [12]:
response

'Napoleon was born on April 15, 1804, in Amiens, France. He died on November 13, 1812, while still in power.'