In [57]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["HF_API_KEY"] = os.getenv("HF_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [58]:
from langchain_huggingface import HuggingFaceEmbeddings

vectorizer = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [59]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances


In [60]:
document=["This is a test sentence.",
          "This is another test sentence.",
          "Who is prime minister of india?"]

In [61]:
query = "Narendra modi is prime minister of india."

In [62]:
document_vectors = vectorizer.embed_documents(document)

In [63]:
query_vector = vectorizer.embed_query(query)

In [64]:
cosine_similarity([query_vector], document_vectors)


array([[0.13925151, 0.13833855, 0.73641668]])

In [65]:
euclidean_distances([query_vector], document_vectors)

array([[1.31205837, 1.31275396, 0.72606242]])

In [66]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore


In [67]:
index=faiss.IndexFlatL2(384)
index


<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x151d805a0> >

In [68]:
vector_store = FAISS(
    embedding_function=vectorizer,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)


In [69]:
vector_store.add_texts(["Äi si future", "Ai is powerful", "Dogs are cute"])

['014a66df-b680-423e-9106-a4190c0d7070',
 '867cdfbf-dbcc-47bf-ae60-4ef5ef6876e9',
 '348cb0fb-3b7f-4882-b9ad-329a5c3d107e']

In [70]:
vector_store.index_to_docstore_id


{0: '014a66df-b680-423e-9106-a4190c0d7070',
 1: '867cdfbf-dbcc-47bf-ae60-4ef5ef6876e9',
 2: '348cb0fb-3b7f-4882-b9ad-329a5c3d107e'}

In [71]:
vector_store.similarity_search("What is the future of AI?", k=2)


[Document(id='014a66df-b680-423e-9106-a4190c0d7070', metadata={}, page_content='Äi si future'),
 Document(id='867cdfbf-dbcc-47bf-ae60-4ef5ef6876e9', metadata={}, page_content='Ai is powerful')]

In [72]:
from langchain_core.documents import Document

document_1 = Document(page_content="Äi si future",
                      metadata={"source": "arXiv"})

document_2 = Document(page_content="Ai is powerful",
                      metadata={"source": "text"})

document_3 = Document(page_content="Dogs are cute",
                      metadata={"source": "pdf"})

document_4 = Document(page_content="Cats are also cute",
                      metadata={"source": "web"})
documents = [document_1, document_2, document_3, document_4]

In [73]:
index = faiss.IndexFlatL2(384)
vector_store = FAISS(
    embedding_function=vectorizer,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)


In [74]:
vector_store.add_documents(documents)

['573ed7a9-cf71-452e-85a5-d0b237226b00',
 'a909726e-c3bb-4d55-8500-46969d66018b',
 'e70ebaf5-0c7f-4004-8409-cd653f97fd6b',
 '0dfd39f7-bbdf-4fdc-967a-cdb6c7ffdce6']

In [75]:
vector_store.similarity_search("What is the future of AI?", k=1)

[Document(id='573ed7a9-cf71-452e-85a5-d0b237226b00', metadata={'source': 'arXiv'}, page_content='Äi si future')]

In [76]:
results = vector_store.similarity_search("Who is cute?", filter={"source": "web"})
results[0].metadata


{'source': 'web'}

In [77]:
results[0].page_content


'Cats are also cute'

### Creating retriever

In [78]:
retriver = vector_store.as_retriever(search_kwargs={"k": 2})

retriver.invoke("Who is cute?")


[Document(id='e70ebaf5-0c7f-4004-8409-cd653f97fd6b', metadata={'source': 'pdf'}, page_content='Dogs are cute'),
 Document(id='0dfd39f7-bbdf-4fdc-967a-cdb6c7ffdce6', metadata={'source': 'web'}, page_content='Cats are also cute')]

Saving/Loading the to Disk memory

In [79]:
vector_store.save_local("vector_db")


In [80]:
# new_vector_store = FAISS.load_local("vector_db", vectorizer, allow_dangerous_serialization=True)
# new_vector_store.similarity_search("Who is cute?")  


''' ### Create a Rag from Scratch '''''

In [81]:
### Load a file
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("../../resources/Insights.pdf")
pages = loader.load()
pages

[Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.0 (Macintosh)', 'creationdate': '2025-09-19T12:25:36+05:30', 'moddate': '2025-09-19T12:25:40+05:30', 'trapped': '/False', 'source': '../../resources/Insights.pdf', 'total_pages': 33, 'page': 0, 'page_label': '1'}, page_content='© 2025 ZEUX Innovation | Part of The Insight Advantage Program\nThe 7-Day Insight \nMastery Action Plan\nDaily practices and reflections to stretch your \nthinking and unlock fresh perspectives'),
 Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.0 (Macintosh)', 'creationdate': '2025-09-19T12:25:36+05:30', 'moddate': '2025-09-19T12:25:40+05:30', 'trapped': '/False', 'source': '../../resources/Insights.pdf', 'total_pages': 33, 'page': 1, 'page_label': '2'}, page_content="© 2025 ZEUX Innovation | Part of The Insight Advantage Program\n•\xa0Complete each day's activities in sequence\n•\xa0Spend 15-30 minutes on daily exercises\n•\xa0Write re

In [82]:
### Split the document into chunks
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter


vectorizer = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

loader = PyPDFLoader("../../resources/Insights.pdf")
pages = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

docs = text_splitter.split_documents(pages)
docs

[Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.0 (Macintosh)', 'creationdate': '2025-09-19T12:25:36+05:30', 'moddate': '2025-09-19T12:25:40+05:30', 'trapped': '/False', 'source': '../../resources/Insights.pdf', 'total_pages': 33, 'page': 0, 'page_label': '1'}, page_content='© 2025 ZEUX Innovation | Part of The Insight Advantage Program\nThe 7-Day Insight \nMastery Action Plan\nDaily practices and reflections to stretch your \nthinking and unlock fresh perspectives'),
 Document(metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.0 (Macintosh)', 'creationdate': '2025-09-19T12:25:36+05:30', 'moddate': '2025-09-19T12:25:40+05:30', 'trapped': '/False', 'source': '../../resources/Insights.pdf', 'total_pages': 33, 'page': 1, 'page_label': '2'}, page_content="© 2025 ZEUX Innovation | Part of The Insight Advantage Program\n•\xa0Complete each day's activities in sequence\n•\xa0Spend 15-30 minutes on daily exercises\n•\xa0Write re

In [83]:
### Create a vector store
import faiss
from langchain_community.vectorstores import FAISS

index = faiss.IndexFlatL2(384)
vector_store = FAISS(embedding_function=vectorizer, index=index, docstore= InMemoryDocstore(), index_to_docstore_id={})
vector_store.add_documents(documents=docs)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retriever.invoke("Who is cute?")


[Document(id='2857fdea-ccf7-4e89-baa0-da99d3892bc7', metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.0 (Macintosh)', 'creationdate': '2025-09-19T12:25:36+05:30', 'moddate': '2025-09-19T12:25:40+05:30', 'trapped': '/False', 'source': '../../resources/Insights.pdf', 'total_pages': 33, 'page': 14, 'page_label': '15'}, page_content='© 2025 ZEUX Innovation | Part of The Insight Advantage Program\nTheme: Developing Micro-Observation Skills\nMorning Intention Setting\nToday I will practice detailed observation by:\n•\xa0Writing down small details I notice in people/situations\n•\xa0Practicing mindfulness throughout the day\n•\xa0Looking for micro-expressions and subtle cues\nDay 4\nT une into the Details'),
 Document(id='b60e94d3-dc0a-4070-8ce9-aaef794f2dbd', metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.0 (Macintosh)', 'creationdate': '2025-09-19T12:25:36+05:30', 'moddate': '2025-09-19T12:25:40+05:30', 'trapped': '/False', 'source

In [84]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model_name="o3-mini")



In [85]:
from langsmith import Client
from langchain_core.prompts import ChatPromptTemplate

# Initialize the LangSmith client
client = Client()

# Pull the prompt directly using the LangSmith SDK logic
# This is exactly what langchain.hub.pull() does under the hood
prompt = client.pull_prompt("rlm/rag-prompt")

print("Prompt successfully pulled!")

Prompt successfully pulled!


In [86]:
import pprint
pprint.pprint(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [87]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)



In [88]:
### now that we have the prompt, contex, model, parser we can start the chaining

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

### lets start invoking the rag
rag_chain.invoke("How do i listen to people?")

'To listen to people effectively, be fully present in the conversation and focus on both what is said and what may be left unsaid. Notice microexpressions, emotional undertones, and subtle cues to understand their real needs. Reflect on these observations to continuously improve your listening skills.'