In [1]:
from datasets import load_dataset
#import streamlit as st
from tqdm import tqdm
from haystack import Pipeline
from haystack.document_stores import FAISSDocumentStore, SQLDocumentStore
from haystack.document_stores.memory import InMemoryDocumentStore
from haystack.nodes.retriever.dense import DensePassageRetriever, DPRQuestionEncoderTokenizerFast, DPRContextEncoderTokenizerFast
from haystack.nodes.reader import FARMReader
from haystack.pipelines import ExtractiveQAPipeline, GenerativeQAPipeline
from haystack.utils import print_answers
from haystack.nodes import RAGenerator
from haystack.nodes import Shaper, PromptNode, PromptTemplate, PromptModel, EmbeddingRetriever
import os
import pickle

In [None]:
os.path.exists('document_store.pkl')

In [None]:
#document_store = SQLDocumentStore(db_path)

if(os.path.exists('document_store.pkl')):
    document_store =  pickle.load(open('./document_store.pkl', 'rb'))
    retriever = DensePassageRetriever(
        document_store=document_store,
        query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
        passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
        use_gpu=False,
        #embed_title=True,
    )
    
    # Initialize RAG Generator
    generator = RAGenerator(
        model_name_or_path="facebook/rag-token-nq",
        use_gpu=True,
        max_length=50,
        min_length=20,
        #embed_title=True,
        num_beams=5,
        #tokenizer=BartTokenizerFast.from_pretrained('facebook/rag-token-nq'),
        retriever=retriever
    )
else:

    dataset = load_dataset("EleutherAI/wikitext_document_level",'wikitext-103-v1')

    merged_data = {}
    for partition in ['train', 'test', 'validation']:
        if partition in dataset:
            for i in range(len(dataset[partition])):
                page = dataset[partition][i]['page']
                title = page.split('=', 2)[1].strip()
                text = page.split('=', 2)[2].strip()
                merged_data[title] = text

    article_names = list(merged_data.keys())

    print(article_names[0])
    print(merged_data[article_names[0]])

    #from haystack.document_store.faiss import FAISSDocumentStore

    # set the path to the SQLite database file
    db_path = "sqlite:///document_store.db"


    document_store = InMemoryDocumentStore()

    # Create a dictionary to store the documents
    documents = []

    # Write the documents to the document store
    #document_store.write_documents(documents)

    #print(document_store.get_all_documents())

    document_store.delete_documents()
    # Delete existing documents in documents store

    # Initialize document store
    # Add search snippets to document store

    #for k in tqdm(list(merged_data.keys())[0:100]):
    for k in tqdm(list(merged_data.keys())):
        document_store.write_documents([{
            "content": merged_data[k],
            "meta": {
                "title": k,
            }
        }])

    retriever = DensePassageRetriever(
    document_store=document_store,
    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
    use_gpu=True,
    #embed_title=True,
)
    document_store.update_embeddings(retriever=retriever)

    # Initialize RAG Generator
    generator = RAGenerator(
        model_name_or_path="facebook/rag-token-nq",
        use_gpu=True,
        max_length=50,
        min_length=20,
        #embed_title=True,
        num_beams=5,
        #tokenizer=BartTokenizerFast.from_pretrained('facebook/rag-token-nq'),
        retriever=retriever
    )
    pickle.dump(document_store, open('document_store.pkl', 'wb'))



In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2Model
from transformers import GPT2Tokenizer, GPT2LMHeadModel


def get_gpt2_embeddings(texts, model, tokenizer):
    tokenized_texts = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
    input_ids = tokenized_texts["input_ids"]
    attention_mask = tokenized_texts["attention_mask"]
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
    embeddings = outputs[0][:, 0, :].numpy()
    return embeddings


tokenizer = GPT2Tokenizer.from_pretrained("vicgalle/gpt2-alpaca-gpt4")
model = GPT2LMHeadModel.from_pretrained("vicgalle/gpt2-alpaca-gpt4")
#model = GPT2Model.from_pretrained("vicgalle/gpt2-alpaca-gpt4")

embeddings = get_gpt2_embeddings(joined_texts, model, tokenizer)

import faiss
import numpy as np

def create_faiss_index(embeddings):
    index = faiss.IndexFlatL2(embeddings.shape[1])
    faiss.normalize_L2(embeddings)
    index.add(embeddings)
    return index

index = create_faiss_index(np.ascontiguousarray(embeddings))

#4. Perform a similarity search using the Faiss index:

def search_faiss_index(query, model, tokenizer, index, k=5):
    query_embedding = get_gpt2_embeddings([query], model, tokenizer)
    faiss.normalize_L2(query_embedding)
    _, indices = index.search(query_embedding, k)
    return indices[0]

result_indices = search_faiss_index(query, model, tokenizer, index, k=2)
print(result_indices)


def generate_response(prompt, model, tokenizer, max_length=256):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            top_k=5,
            top_p=0.95,
            temperature=0.8,
        )

    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response



In [None]:
[(a.context, a.answer) for a in prediction['answers']]

In [None]:
texts = [['C:' + "\n\n" + a.context + "\n\n" + 'A:' + "\n\n" +  a.answer + "\n\n"] for a in prediction['answers']]

for t in texts:
    print(t[0])
    
string_output = 'Q:\n\n' + query + '\n\n'
for a in prediction['answers']:
    string_output += 'C: \n\n' + a.context + '\n\n'
    string_output += 'A: \n\n' + a.answer + '\n\n'
print(string_output)

context = " ".join([joined_texts[i] for i in result_indices])
prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:

Given the below as context:

Start context:

{string_output}
:End context.

Using the above as contextAnswer the below question or perform the asked for task:

{query}

### Response:
"""
print(prompt)    

In [None]:
response = generate_response(prompt,model,tokenizer,max_length=512)
print(response)

In [None]:
print(response)

In [2]:
document_store = FAISSDocumentStore(faiss_index_path="my_faiss_index.faiss")

In [None]:

faiss_document_store = FAISSDocumentStore(
        faiss_index_factory_str="Flat",
        return_embedding=True,
        embedding_dim=768,
        sql_url='sqlite:///testdb.sql',
        index="title",
        progress_bar=False,
    )

# Add the documents from the in_memory_document_store to the faiss_document_store
documents = document_store.get_all_documents()



faiss_document_store.write_documents(documents)
retriever_faiss = DensePassageRetriever(
    document_store=faiss_document_store,
    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
    use_gpu=True,
    #embed_title=True,
)

retriever = DensePassageRetriever(
document_store=faiss_document_store,
query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
use_gpu=True,
#embed_title=True,
)
faiss_document_store.update_embeddings(retriever=retriever_faiss)
faiss_document_store.save(index_path="my_faiss_index.faiss")

In [4]:
retriever_faiss = DensePassageRetriever(
    document_store=document_store,
    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
    use_gpu=True,
    #embed_title=True,
)
document_store.update_embeddings(retriever=retriever_faiss)

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.


In [9]:
retriever_faiss.save('my_faiss_index.faiss')

In [8]:

#document_store.update_embeddings(retriever=retriever)

# Initialize RAG Generator
generator = RAGenerator(
    model_name_or_path="facebook/rag-token-nq",
    use_gpu=True,
    max_length=50,
    min_length=20,
    #embed_title=True,
    num_beams=5,
    #tokenizer=BartTokenizerFast.from_pretrained('facebook/rag-token-nq'),
    retriever=retriever_faiss
)




The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RagTokenizer'. 
The class this function is called from is 'DPRQuestionEncoderTokenizer'.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RagTokenizer'. 
The class this function is called from is 'DPRQuestionEncoderTokenizerFast'.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RagTokenizer'. 
The class this function is called from is 'BartTokenizer'.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called fr

In [None]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)

pipe = ExtractiveQAPipeline(reader, retriever)

prediction = pipe.run(
    query, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
)

print_answers(prediction, details="minimum")

query = 'Who is Obama?'


In [None]:

# Run pipeline
pipe = GenerativeQAPipeline(generator=generator, retriever=retriever)
res = pipe.run(query=query, params={"Generator": {"top_k": 5}, "Retriever": {"top_k": 5}})
print_answers(res, details="minimum")

for a in res['answers']:
    docs = [document_store.get_document_by_id(d) for d in a.document_ids]
    titles = [d.meta['title'] for d in docs] 
    print(a.score, titles, a.answer)

In [None]:
"""
Yes, it is possible to perform a tiered search using clustering to partition the index's smaller `n_dim` embeddings into more refined searches based on their similarity to the query. One approach to achieve this is to use hierarchical clustering, which is a method of cluster analysis that builds a hierarchy of clusters by recursively partitioning data points into smaller clusters based on their similarity. 

Here's a high-level overview of how you could implement a tiered search using hierarchical clustering:
...

5. When a query is made, use the `DensePassageRetriever` to retrieve the top `k` clusters that are most similar to the query, based on their embeddings.

6. For each of the top `k` clusters, query the corresponding index using the `DensePassageRetriever` to retrieve the top `m` documents that are most similar to the query, based on their embeddings.

7. Combine the results from all the indices, and return the top `n` documents as the final results.

This approach can help speed up the search process by reducing the number of documents that need to be searched for each query, while also potentially improving the accuracy of the results by using more refined indices for documents that are more similar to the query. However, it also adds additional complexity to the system and requires careful tuning of the clustering and indexing parameters to achieve the desired balance between speed and accuracy.
"""
