In [3]:
import getpass
import os
from dotenv import load_dotenv 
load_dotenv()
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

from langchain_google_genai import GoogleGenerativeAIEmbeddings

google_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
hf_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
documents = ["who is a prime minister of usa?",
            "who is a prime minister of india ",
            "who is president of india"
]
query = "modu is prime minister of india"


In [1]:
from sklearn.metrics.pairwise import cosine_distances

In [7]:
doc_embed = hf_embeddings.embed_documents(documents)
query_embed = hf_embeddings.embed_query(query)

In [11]:
len(doc_embed[0])

384

In [12]:
from sklearn.metrics.pairwise import cosine_distances
cosine_distances([query_embed],doc_embed)

array([[0.53137919, 0.33506707, 0.46330399]])

In [13]:
from sklearn.metrics.pairwise import euclidean_distances
euclidean_distances([query_embed],doc_embed)

array([[1.03090174, 0.81861722, 0.96260478]])

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore



*vectore store part*
Document -> Document loader -> Document splitter -> Chunking -> Create index (with custom dimension) -> Vector store (FAISS) (need : embedding) -> Add documents -> setup retriever -> Setup context -> final chaining -> ready to query

In [14]:
from langchain_community.vectorstores import FAISS

In [15]:
import faiss
index = faiss.IndexFlatL2(384) 

In [17]:
from langchain_community.docstore.in_memory import InMemoryDocstore

In [20]:
vector_store = FAISS(
    embedding_function=hf_embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [21]:
vector_store.add_texts(["AI is future","AI is powerful","Dogs are cute"])

['010708f1-eb1b-4070-afb7-7b5163fcad91',
 'b23034f2-6edd-4ea0-9235-370adfa47e8a',
 '73923718-4d80-4a0b-91a1-64de07fd8e3a']

In [32]:
vector_store.similarity_search("is cute?", k=1)

[Document(id='73923718-4d80-4a0b-91a1-64de07fd8e3a', metadata={}, page_content='Dogs are cute')]

In [35]:
retriever = vector_store.as_retriever(search_kwargs={"k":1})

In [37]:
retriever.invoke("is AI cute?")

[Document(id='b23034f2-6edd-4ea0-9235-370adfa47e8a', metadata={}, page_content='AI is powerful')]

In [38]:
vector_store.save_local("vs")

In [39]:
new_vs = FAISS.load_local(
    "vs", hf_embeddings, allow_dangerous_deserialization=True
)

In [40]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model='gemini-1.5-flash')


In [41]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [43]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [46]:
import pprint
pprint.pprint(prompt)

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])


In [47]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


In [49]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs )

In [51]:
rag_chain = (
    {"context":retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()

)

In [52]:
rag_chain.invoke("is AI is cute?")

"The provided text only states that AI is powerful, not whether it is cute.  There is no information about AI's cuteness.  Therefore, I don't know."