# QUERY TRANSLATION

In [1]:
from langchain_groq import ChatGroq
from dotenv import load_dotenv
from langchain_community.embeddings.ollama import OllamaEmbeddings
import os

load_dotenv()
groq_api_key = os.getenv('GROQ_API_KEY')

In [46]:
# llm = ChatGroq(groq_api_key=groq_api_key, model_name='llama-3.1-70b-versatile')
llm = ChatGroq(groq_api_key=groq_api_key, model_name='llama3-8b-8192')
# embedder = OllamaEmbeddings(model='mxbai-embed-large')

In [47]:
llm.invoke("Hi")

AIMessage(content="Hi! It's nice to meet you. Is there something I can help you with or would you like to chat?", response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 11, 'total_tokens': 36, 'completion_time': 0.020833333, 'prompt_time': 0.001253428, 'queue_time': 0.0158691, 'total_time': 0.022086761}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_af05557ca2', 'finish_reason': 'stop', 'logprobs': None}, id='run-2c1895bc-5db6-4fb3-9c47-4b24f3563250-0', usage_metadata={'input_tokens': 11, 'output_tokens': 25, 'total_tokens': 36})

## 1. Multi-Query Translation

In [17]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma


In [22]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

In [24]:
loader = PyPDFDirectoryLoader('sample_docs', glob="*.pdf")
documents = loader.load()

print(f"Number of pages in the document :: {len(documents)}")

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap=60)
chunks = text_splitter.split_documents(documents)

print(f"Number of chunks created - {len(chunks)}")

Number of pages in the document :: 25
Number of chunks created - 34


In [69]:
# vectordb = Chroma.from_documents(documents=chunks, embedding=embedder)
retriever = vectordb.as_retriever(
    search_type = 'similarity',
    search_kwargs={"k": 2}
)

In [26]:
from langchain.prompts import ChatPromptTemplate

In [43]:
template = """
You are an helpful AI assistant. Your task is to generate five different alternatives to ask the same questions \
as provided by user. these alternative questions are to help retrieve better content from a vector database by doing \
vector-based similarity search.

output format: 1.<your_sample_output>\n 2.<your_sample_output_2> ..

User question: {question} 


"""
multi_query = ChatPromptTemplate.from_template(template)

In [28]:
from langchain_core.output_parsers import StrOutputParser

In [91]:
query = "What are the preferable accomodations and dining spaces in singapore?"
multi_query_res = llm.invoke(multi_query.format(question=query))

In [71]:
print(multi_query_res.content)

Here are five alternative questions that can help retrieve better content from a vector database by doing vector-based similarity search:

1. What are the top-rated accommodations and dining spots in Singapore, considering factors like location, cuisine, and price range?
2. Can you recommend some popular hotels and restaurants in Singapore that offer unique experiences, such as rooftop bars or Michelin-starred cuisine?
3. What are some of the most Instagrammable accommodations and dining spaces in Singapore, known for their stunning views, decor, or culinary expertise?
4. Are there any Singaporean hotels and restaurants that offer traditional local cuisine, or those that have received international accolades for their innovative takes on Asian flavors?
5. What are some of the best-preserved heritage accommodations and dining spaces in Singapore, showcasing the city-state's rich cultural and architectural history?

These alternative questions can help retrieve more specific and relevant

In [72]:
alternate_questions = multi_query_res.content.split("\n")[1:-2]
alternate_questions = [i for i in alternate_questions if i]
alternate_questions

['1. What are the top-rated accommodations and dining spots in Singapore, considering factors like location, cuisine, and price range?',
 '2. Can you recommend some popular hotels and restaurants in Singapore that offer unique experiences, such as rooftop bars or Michelin-starred cuisine?',
 '3. What are some of the most Instagrammable accommodations and dining spaces in Singapore, known for their stunning views, decor, or culinary expertise?',
 '4. Are there any Singaporean hotels and restaurants that offer traditional local cuisine, or those that have received international accolades for their innovative takes on Asian flavors?',
 "5. What are some of the best-preserved heritage accommodations and dining spaces in Singapore, showcasing the city-state's rich cultural and architectural history?"]

In [73]:
alternate_questions.insert(0, query)
similar_chunks = retriever.batch(alternate_questions)

In [107]:
flattened_docs = [doc for sublist in similar_chunks for doc in sublist]
chunks = [doc.page_content for doc in flattened_docs]
chunks = list(set(chunks))
# len(unique_docs)
print("Number of unique chunks retrieved using multi-query: ", len(chunks))


Number of unique chunks retrieved using multi-query:  4


In [92]:
# Answering the main question
query_template="""
Strictly based on the context provided, answer the below question precisely:

{context}

Question: {question}
"""
context = "\n\n---\n\n".join(chunks)
query_prompt = ChatPromptTemplate.from_template(query_template)

In [93]:
res = llm.invoke(query_prompt.format(context=context, question=query))

In [110]:
print(res.content)
pages = list(set([i.metadata['page'] for i in flattened_docs]))
print(f"\n Page sources: {pages}")

Based on the reviews provided, the preferable accommodations in Singapore are:

* The Fullerton Bay Hotel Singapore ( Review by mrssmithBrisbane)
* Amoy by Far East Hospitality (Review by T P B)
* The Quincy Hotel by Far East Hospitality (Review by douglas_lin)

The preferable dining spaces in Singapore are:

* Summer Pavilion (Review by Carlos G)
* Fratini La Trattoria (Review by Sana R)
* Colony (Review by Hannikins)
* Rhubarb (Review by champagnespring)

Note that these are based on the reviews provided and may not be a comprehensive or definitive ranking of the best accommodations and dining spaces in Singapore.

 Page sources: [18, 19, 20, 21]


## 2. Reciprocal Rank Fusion