In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
os.environ["HUGGINGFACE_TOKEN"]= os.getenv("HUGGINGFACE_TOKEN")

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
documents=[
    "Who is president of USA?",
    "What is the capital of USA?",
    "What is the capital of India?",
    "Who is prime minister of India?"
]

my_response="Delhi is capital of India."

In [6]:
embed_documents=embeddings.embed_documents(documents)

In [7]:
embed_res=embeddings.embed_query(my_response)

In [9]:
cosine_similarity([embed_res],embed_documents)

array([[0.07971492, 0.38933573, 0.75157214, 0.39382577]])

In [10]:
from sklearn.metrics.pairwise import euclidean_distances

euclidean_distances([embed_res],embed_documents)

array([[1.35667624, 1.10513743, 0.70487998, 1.10106703]])

In [None]:
#In Memory Database
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [15]:
index=faiss.IndexFlatL2(384)

In [16]:
faissstore=FAISS(embeddings,index=index,docstore=InMemoryDocstore(),
                 index_to_docstore_id={})

In [17]:
faissstore.add_texts(documents)

['48aead18-ef49-4ba0-a659-662dd1ccf23a',
 'cb446737-ebdf-4569-bbf7-52b75e7624b8',
 '6b491780-dc11-4e7c-9f3d-c3e651a92b97',
 '77eae621-28fa-45ba-9a8f-41d427a58500']

In [19]:
faissstore.similarity_search("Delhi is capital of India",k=1)

[Document(id='6b491780-dc11-4e7c-9f3d-c3e651a92b97', metadata={}, page_content='What is the capital of India?')]

In [22]:
from langchain_core.documents import Document
document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [27]:
documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [24]:
index=faiss.IndexFlatIP(384)

vectorstore=FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [28]:
vectorstore.add_documents(documents=documents)

['9fd8d51c-8532-42e0-a82a-c1734c046523',
 '12a99509-71d8-40ae-a423-81797468df34',
 '93ace0ca-5014-4775-9c65-7111d5b23302',
 'c7d6fbc2-3b44-4bbf-b21f-6f23677190f0',
 '43422ac1-e6bd-4154-915e-7b459288dd74',
 '2cda5e25-d104-4203-8ff7-b670b8ee86a3',
 '9be8e8b5-5796-4010-8ed8-435bc2752027',
 '1551c7f3-f879-4f31-a2e9-2e332e712043',
 'ba0693bb-38c8-4f56-8818-bc0c453333a6',
 'a2a713c5-6df1-4b99-9b79-cb626fe54444']

In [31]:
vectorstore.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy"
)  # Default search 4 items

[Document(id='93ace0ca-5014-4775-9c65-7111d5b23302', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='1551c7f3-f879-4f31-a2e9-2e332e712043', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='a2a713c5-6df1-4b99-9b79-cb626fe54444', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='c7d6fbc2-3b44-4bbf-b21f-6f23677190f0', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]

In [32]:
vectorstore.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",k=2
)

[Document(id='93ace0ca-5014-4775-9c65-7111d5b23302', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='1551c7f3-f879-4f31-a2e9-2e332e712043', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]

In [34]:
vectorstore.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",k=3,filter={"source":{"$eq":"tweet"}}
)

[Document(id='93ace0ca-5014-4775-9c65-7111d5b23302', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='1551c7f3-f879-4f31-a2e9-2e332e712043', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='a2a713c5-6df1-4b99-9b79-cb626fe54444', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

In [35]:
vectorstore.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",k=3,filter={"source":{"$eq":"news"}}
)

[Document(id='c7d6fbc2-3b44-4bbf-b21f-6f23677190f0', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(id='12a99509-71d8-40ae-a423-81797468df34', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='ba0693bb-38c8-4f56-8818-bc0c453333a6', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.')]

In [39]:
#RAG process

retriver=vectorstore.as_retriever(search_kwargs={"k":3})

In [40]:
retriver.invoke("LangChain provides abstractions to make working with LLMs easy")

[Document(id='93ace0ca-5014-4775-9c65-7111d5b23302', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='1551c7f3-f879-4f31-a2e9-2e332e712043', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='a2a713c5-6df1-4b99-9b79-cb626fe54444', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :(')]

In [41]:
retriver=vectorstore.as_retriever(search_kwargs={"k":3,"source":"news"})

In [42]:
retriver.invoke("How is the weather today in the city?")

[Document(id='12a99509-71d8-40ae-a423-81797468df34', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(id='c7d6fbc2-3b44-4bbf-b21f-6f23677190f0', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(id='ba0693bb-38c8-4f56-8818-bc0c453333a6', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.')]

In [43]:
vectorstore.save_local("local_faiss_vs")


In [44]:
new_vectorstore=FAISS.load_local('local_faiss_vs',embeddings,allow_dangerous_deserialization=True)

In [45]:
new_vectorstore.similarity_search("Langchain",k=3)

[Document(id='93ace0ca-5014-4775-9c65-7111d5b23302', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='1551c7f3-f879-4f31-a2e9-2e332e712043', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='43422ac1-e6bd-4154-915e-7b459288dd74', metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again.")]

In [46]:
retriver=new_vectorstore.as_retriever(search_kwargs={"k":3})

In [47]:
retriver.invoke("langchain")

[Document(id='93ace0ca-5014-4775-9c65-7111d5b23302', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='1551c7f3-f879-4f31-a2e9-2e332e712043', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='43422ac1-e6bd-4154-915e-7b459288dd74', metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again.")]