In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import argparse
import re
import time
from datetime import timedelta
from functools import partial
import logging
from datasets import load_dataset, Dataset
from tqdm import tqdm
import os
from torch.utils.data import DataLoader

from src.retriever import Retriever

In [20]:
from src.retriever import Retriever
from types import SimpleNamespace


arg = SimpleNamespace()
arg.retrieval_model_name_or_path="facebook/contriever-msmarco"
arg.retrieval_embedding_size=768
arg.passages='data/corpus/psgs_w100.tsv'
arg.passages_embeddings='data/corpus/wikipedia_embeddings/*'
arg.indexing_batch_size=1000000
arg.save_or_load_index = False
arg.retrieval_n_subquantizers=0
arg.retrieval_n_bits=8
arg.max_k=100
arg.lowercase = False
arg.normalize_text = False
arg.per_gpu_batch_size=1000000
arg.question_maxlength=100000

retriever = Retriever(arg)
retriever.setup_retriever()

Loading model from: facebook/contriever-msmarco




Indexing passages from files ['data/corpus/wikipedia_embeddings/passages_00', 'data/corpus/wikipedia_embeddings/passages_01', 'data/corpus/wikipedia_embeddings/passages_02', 'data/corpus/wikipedia_embeddings/passages_03', 'data/corpus/wikipedia_embeddings/passages_04', 'data/corpus/wikipedia_embeddings/passages_05', 'data/corpus/wikipedia_embeddings/passages_06', 'data/corpus/wikipedia_embeddings/passages_07', 'data/corpus/wikipedia_embeddings/passages_08', 'data/corpus/wikipedia_embeddings/passages_09', 'data/corpus/wikipedia_embeddings/passages_10', 'data/corpus/wikipedia_embeddings/passages_11', 'data/corpus/wikipedia_embeddings/passages_12', 'data/corpus/wikipedia_embeddings/passages_13', 'data/corpus/wikipedia_embeddings/passages_14', 'data/corpus/wikipedia_embeddings/passages_15']
Loading file data/corpus/wikipedia_embeddings/passages_00
Total data indexed 1000000
Loading file data/corpus/wikipedia_embeddings/passages_01
Total data indexed 2000000
Loading file data/corpus/wikiped

In [2]:
os.environ['CUDA_VISIBLE_DEVICES']='0'

In [3]:
tokenizer = AutoTokenizer.from_pretrained("ICTNLP/Auto-RAG-Llama-3-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("ICTNLP/Auto-RAG-Llama-3-8B-Instruct")

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

In [6]:
from transformers import pipeline

pipe = pipeline("text-generation", model="ICTNLP/Auto-RAG-Llama-3-8B-Instruct",device='cuda')

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

In [33]:
base_message = [
    {'role':'system','content':"""Answer the question by retrieving external knowledge. 
Extract useful information from each retrieved document. 
If the information is insufficient or irrelevant, 
refine your query and search again until you are able to answer the question."""}]

In [47]:
question = "who is called the father of hydrogen bomb"
message = base_message.copy()

message.append({'role':'user','content':"Question: "+question})
n_iter=1
answer=''
while(n_iter<6):
    result_m = pipe(message)[0]['generated_text']
    result = result_m[-1]['content']
    if('query:' in result.lower()):
        iq = result.lower().split('query:')[-1]
        print(f'Query {n_iter}:',iq)
        retrieved = retriever.search_document([iq],2)
        txt = '\n'.join([t['title']+' '+t['text'] for t in retrieved[0]])
        message = result_m
        message.append({'role':'user','content':f"Retrieved Document_{n_iter}: {txt.strip()}"})
        n_iter+=1
    elif 'final answer' in result.lower():
        message = result_m
        answer = result.lower().split('final answer:')[-1]
        break
    else:
        print('error')
        break
print('===========')
for t in message:
    print('###'+t['role'])
    print(t['content'])
print('-----------')
print(answer)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Query 1:  who is known as the father of the hydrogen bomb?
Questions embeddings shape: torch.Size([1, 768])


100%|██████████| 1/1 [00:10<00:00, 10.82s/it]
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Search time: 10.8 s.
Query 2:  who is commonly referred to as the father of the hydrogen bomb?
Questions embeddings shape: torch.Size([1, 768])


100%|██████████| 1/1 [00:06<00:00,  6.44s/it]
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Search time: 6.4 s.
###system
 Answer the question by retrieving external knowledge. 
Extract useful information from each retrieved document. 
If the information is insufficient or irrelevant, 
refine your query and search again until you are able to answer the question.
###user
Question: who is called the father of hydrogen bomb
###assistant
Analysis: To answer this question, I would need to know who is commonly referred to as the "Father of the Hydrogen Bomb".

Initial Query: Who is known as the father of the hydrogen bomb?
###user
Retrieved Document_1: John Clive Ward has been said that physicists have made use of his principles and developments "often without knowing it, and generally without quoting him." In 1955, Ward was recruited to work at the Atomic Weapons Research Establishment at Aldermaston. There, he independently derived a version of the Teller-Ulam design, for which he has been called the "father of the British H-bomb". John Clive Ward was born in East Ham, London, on