In [4]:
#data ingestion
# how to read from a pdf,text,web etc
from langchain_community.document_loaders import TextLoader
loader = TextLoader("speech.txt")
text_docs = loader.load()
text_docs


[Document(metadata={'source': 'speech.txt'}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no indemnities for ourselves, no material compensation for the sacrifices we shall freely make. We are but one of the champions of the rights of mankind. We shall be satisfied when those rights have been made as secure as the faith and the freedom of nations can make them.\n\nJust because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right and of fair play we profess to be fighting for.\n\n…\n\nIt will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness be

In [8]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")

In [15]:
#web based loader
from langchain_community.document_loaders import WebBaseLoader
import bs4
urls = ["https://langchain-ai.github.io/langgraph/concepts/#concepts"]
web_loader = WebBaseLoader(web_paths=urls,
                           bs_kwargs=dict(
                               parse_only=bs4.SoupStrainer(
                                   class_=("md-content")
                               )
                           ))
#load,chunk,index the html content
web_docs = web_loader.load()
web_docs

[Document(metadata={'source': 'https://langchain-ai.github.io/langgraph/concepts/#concepts'}, page_content='\n\n\n\n\n\n    LangGraph\n  \n\n\n\n\n\n    Guides\n  \n\n\n\n\n\n    Concepts\n  \n\n\n\n\n\n\n\n\n\n\nConceptual Guide¶\nThis guide provides explanations of the key concepts behind the LangGraph framework and AI applications more broadly.\nWe recommend that you go through at least the Quickstart before diving into the conceptual guide. This will provide practical context that will make it easier to understand the concepts discussed here.\nThe conceptual guide does not cover step-by-step instructions or specific implementation examples — those are found in the Tutorials and How-to guides. For detailed reference material, please see the API reference.\nLangGraph¶\nHigh Level¶\n\nWhy LangGraph?: A high-level overview of LangGraph and its goals.\n\nConcepts¶\n\nLangGraph Glossary: LangGraph workflows are designed as graphs, with nodes representing different components and edges re

In [18]:
#transforms the docs into chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100,chunk_overlap=10)
documents = text_splitter.split_documents(text_docs)
documents[:5]

[Document(metadata={'source': 'speech.txt'}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of'),
 Document(metadata={'source': 'speech.txt'}, page_content='of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek'),
 Document(metadata={'source': 'speech.txt'}, page_content='We seek no indemnities for ourselves, no material compensation for the sacrifices we shall freely'),
 Document(metadata={'source': 'speech.txt'}, page_content='freely make. We are but one of the champions of the rights of mankind. We shall be satisfied when'),
 Document(metadata={'source': 'speech.txt'}, page_content='when those rights have been made as secure as the faith and the freedom of nations can make them.')]

In [23]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [26]:
# Connect to Milvus
MILVUS_HOST = "localhost"
MILVUS_PORT = "19530"
COLLECTION_NAME = "langchain_demo"

In [40]:
from langchain_community.vectorstores import Milvus

vectorstore = Milvus.from_documents(
    documents=documents,
    embedding=embeddings,
    connection_args={"uri": "sqlite://:@milvus-lite"},
    collection_name="test_collection"
    drop_old=True,  # Drop the old Milvus collection if it exists
)

Failed to create new connection using: 025fb8ef731943548a8bab17701a13a5


MilvusException: <MilvusException: (code=2, message=Fail connecting to server on localhost:19530, illegal connection params or server unavailable)>

In [30]:
# 3. Create the vector store in Milvus
from langchain_community.vectorstores import Milvus
vectorstore = Milvus.from_documents(
    documents=documents,
    embedding=embeddings,
    connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT},
    collection_name=COLLECTION_NAME,
)

Failed to create new connection using: 1038afc7785843f4b04a7431673f38a4


MilvusException: <MilvusException: (code=2, message=Fail connecting to server on localhost:19530, illegal connection params or server unavailable)>

In [None]:
#vector db
from langchain_community.vectorstores import Chroma
db = Chroma.from_documents(documents,embeddings)




In [34]:
#query
query = "history"
result = db.similarity_search("history")
result[0].page_content

'terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the'

In [35]:
#FIASS
from langchain_community.vectorstores import FAISS
db = FAISS.from_documents(documents,embeddings)
result = db.similarity_search("history")
result[0].page_content

'terrible and disastrous of all wars, civilization itself seeming to be in the balance. But the'