In [5]:

from langchain.vectorstores import Qdrant
from langchain.embeddings import HuggingFaceBgeEmbeddings
from qdrant_client import QdrantClient
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
import torch

# Set up BGE (Big Graph Embeddings) model
bge_model_name = "BAAI/bge-large-en"
bge_model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": False}

bge_embeddings = HuggingFaceBgeEmbeddings(
    model_name=bge_model_name,
    model_kwargs=bge_model_kwargs,
    encode_kwargs=encode_kwargs
)

# Connect to Qdrant
qdrant_url = "http://localhost:6333"
collection_name = "gpt_db"

qdrant_client = QdrantClient(
    url=qdrant_url,
    prefer_grpc=False
)

# Initialize Qdrant with BGE embeddings
bge_db = Qdrant(
    client=qdrant_client,
    embeddings=bge_embeddings,
    collection_name=collection_name
)

In [6]:
print("BGE DB:", bge_db)
print("---------------------------------")

BGE DB: <langchain.vectorstores.qdrant.Qdrant object at 0x0000012A7C78F790>
---------------------------------


In [10]:
# Sample query
query = "What are classical approaches to tree detection problem?"

# Perform similarity search with BGE embeddings
bge_docs = bge_db.similarity_search_with_score(query=query, k=5)

for i in bge_docs:
    doc, score = i
    print({"score": score, "content": doc.page_content, "metadata": doc.metadata})

{'score': 0.8593211, 'content': '2.Overview of individual tree crown detection methods \nClassical approaches to individual tree crown detection (see (Skur-\nikhin et al., 2013 ; Hung et al., 2012 )) use pattern recognition algorithms \nto extract handcrafted tree crown-like features, such as local maximum \nfiltering (Xu et al., 2021b ; Gebreslasie et al., 2011 ; Zheng et al., 2022b ), \nimage binarization (Koc-San et al., 2018 ; Pitk¨anen, 2001 ), image seg-\nmentation (Gougeon and Leckie, 2006 ; Santoso et al., 2016 ; Miraki', 'metadata': {'page': 3, 'source': 'data.pdf'}}
{'score': 0.84992313, 'content': 'regions. Panagiotidis et al. (2017) combine local maximum filtering and \ninverse watershed segmentation to estimate crown diameters, achieving \nan acceptable accuracy for detecting tree crown diameter. Software \ntools are available for some classical approaches (Gebreslasie et al., \n2011 ; Santoso et al., 2016 ) but their utility is limited by the need to tune \nmany parameter

In [None]:
# Extract relevant embeddings from the result
# Handle the case where bge_docs is a list of tuples
relevant_embeddings = [doc[0].embedding if isinstance(doc, tuple) else doc.embedding for doc in bge_docs]

In [11]:
# Set up RAG model components
rag_tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
rag_retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name=collection_name)
rag_sequence_generator = RagSequenceForGeneration.from_pretrained("facebook/rag-token-nq")

Downloading (…)lve/main/config.json: 100%|██████████| 4.60k/4.60k [00:00<?, ?B/s]
Downloading (…)okenizer_config.json: 100%|██████████| 48.0/48.0 [00:00<?, ?B/s]
Downloading (…)_tokenizer/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 638kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 112/112 [00:00<00:00, 7.17kB/s]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RagTokenizer'. 
The class this function is called from is 'DPRQuestionEncoderTokenizer'.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RagTokenizer'. 
The class this function is called from is 'DPRQuestionEncoderTokenizerFast'.
Downloading (…)okenizer_config.json: 100%|██████████| 26.0/26

KeyboardInterrupt: 

In [None]:
# Convert relevant embeddings to torch tensors
input_tensors = torch.tensor(relevant_embeddings)

# Get the prompt token ids from the tokenizer
input_ids = rag_tokenizer.encode("Question: " + query, return_tensors="pt")

# Generate text using RAG with BERT LLM
rag_output = rag_sequence_generator(input_ids=input_ids, retriever_results=input_tensors)

# Decode the generated output
generated_text = rag_tokenizer.decode(rag_output["sequences"][0], skip_special_tokens=True)

print("Generated Text:", generated_text)
