In [36]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from constants import gemini_api_key, tavily_api_key
from langchain_community.tools.tavily_search import TavilySearchResults
import os
import numpy as np

# Retrieval Augmented Generation
Providing context to the LLM models to better understand the question, hence allowing LLMs to better answer the questions.

The RAG happens by passing the question to a Vector Database and performing similarity comparison between the embeddings of the question phrase and the documents present in the vector database. 

The most similar ones are fetched and fed into the model (as context for the model) along with the question.

## Testing the embedding model with test queries

In [37]:
model = ChatGoogleGenerativeAI(google_api_key = gemini_api_key, model="gemini-pro")
embeddings = GoogleGenerativeAIEmbeddings(google_api_key = gemini_api_key, model = "models/embedding-001")

In [38]:
queries = [
    "Today is a sunny day", 
    "Today is april fools day", 
    "Today is a snowy day", 
    "Robert Downey is the Iron Man"
]

In [39]:
# Gives the embedding vector for the following queries
vectors = [embeddings.embed_query(query) for query in queries]

In [40]:
np.array(vectors).shape

(4, 768)

In [41]:
# Verifying the similarities between these documents
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

print("1st and 2nd query: ", cosine_similarity([vectors[0]], [vectors[1]]))
print("1st and 3nd query: ", cosine_similarity([vectors[0]], [vectors[2]]))
print("2nd and 3rd query: ", cosine_similarity([vectors[1]], [vectors[2]]))
print("1st and 4th query: ", cosine_similarity([vectors[0]], [vectors[3]]))
print("2nd and 4th query: ", cosine_similarity([vectors[1]], [vectors[3]]))
print("3rd and 4th query: ", cosine_similarity([vectors[2]], [vectors[3]]))

1st and 2nd query:  [[0.87064364]]
1st and 3nd query:  [[0.93163611]]
2nd and 3rd query:  [[0.87995205]]
1st and 4th query:  [[0.77332271]]
2nd and 4th query:  [[0.80163847]]
3rd and 4th query:  [[0.7808282]]


## Using GoogleEmbeddings in Langchain to build RAG system

### 1. Testing with in memory vector database

In [49]:
# pip install docarray
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import OpenAIEmbeddings

In [53]:
# Generates embedding vectors for all the queries and the documents provided for the vector DB
embeddings = GoogleGenerativeAIEmbeddings(
    google_api_key = gemini_api_key, 
    task_type="retrieval_document", 
    model = "models/embedding-001"
)

In [75]:
# Stores the documents in the vector DB and utilized Google Model Embeddings for similarity estimation
vectorstore = DocArrayInMemorySearch.from_texts(
    [
        "Tony Stark is a rich philanthropist and a playboy",
        "Tony Stark was kidnapped by thugs for making a lethal weapon in a cave",
        "Tony Stark designed a miniature arc reactor in a cave"
    ],
    embedding = embeddings
)
vectorstore.embedding

GoogleGenerativeAIEmbeddings(model='models/embedding-001', task_type='retrieval_document', google_api_key=SecretStr('**********'), credentials=None, client_options=None, transport=None)

In [69]:
# Retrieves relevant documents from the store based on a given query/question
retriever = vectorstore.as_retriever()

The flow is as follows:
1. The user enters a question for the model.  
2. **setup_and_retrieval** follows the prompt template to pass the context and question. However, two operations run parallelly.
    - Firstly, the question is passed to the **retriever** and the relavant documents are fetched in the form of **context**.
    - Simultaneously, the question is passed to the **question** key by the Runnablepassthrough.
3. The received context and question are passed to prompttemplate object to create a **PromptValue**.
4. Model takes the **PromptValue**, processes it, and returns the response in the form of **ChatMessage**.
5. The **ChatMessage** is passed to the string parser to finally display the models output.  

In [74]:
template = """Use the context provided:
{context} and creatively answer the following question: {question}
"""
model = ChatGoogleGenerativeAI(
    google_api_key = gemini_api_key, 
    model="gemini-pro",
    temperature=1,
    top_p = 1
)
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser

chain.invoke("Why was tony stark kidnapped")

'Tony Stark was kidnapped by thugs because he had designed a lethal weapon in a cave, making him a target for those who sought to use his technology for their own gain.'