### Faiss vector store Db

In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [4]:
loader = TextLoader("speech.txt")
docs = loader.load()

splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)
final_docs = splitter.split_documents(docs)

embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

db = FAISS.from_documents(final_docs, embedding)



Created a chunk of size 324, which is longer than the specified 200
Created a chunk of size 652, which is longer than the specified 200
Created a chunk of size 210, which is longer than the specified 200
  from .autonotebook import tqdm as notebook_tqdm


In [5]:
## quering

query = "what is langchain?"

docs = db.similarity_search(query)
docs

[Document(id='28c1fed6-889d-4772-a924-820ff1701e10', metadata={'source': 'speech.txt'}, page_content="LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications. As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.[2]"),
 Document(id='f9922a9e-9433-4f0b-9132-b08db75299fb', metadata={'source': 'speech.txt'}, page_content="History\nLangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence. The project quickly garnered popularity,[3] with improvements from hundreds of contributors on GitHub, trending discussions on Twitter, lively activity on the project's Discord server, many YouTube tutorials, and meetups in San Francisco and London. In April 2023, LangChain had incorporated and the new startup 

In [7]:
# As a retriever

retriever = db.as_retriever()
docs = retriever.invoke(query)
docs

[Document(id='28c1fed6-889d-4772-a924-820ff1701e10', metadata={'source': 'speech.txt'}, page_content="LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications. As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.[2]"),
 Document(id='f9922a9e-9433-4f0b-9132-b08db75299fb', metadata={'source': 'speech.txt'}, page_content="History\nLangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence. The project quickly garnered popularity,[3] with improvements from hundreds of contributors on GitHub, trending discussions on Twitter, lively activity on the project's Discord server, many YouTube tutorials, and meetups in San Francisco and London. In April 2023, LangChain had incorporated and the new startup 

In [8]:
## docs with similarity search and score

docs_and_score = db.similarity_search_with_score(query)
docs_and_score

[(Document(id='28c1fed6-889d-4772-a924-820ff1701e10', metadata={'source': 'speech.txt'}, page_content="LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications. As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.[2]"),
  np.float32(0.74049544)),
 (Document(id='f9922a9e-9433-4f0b-9132-b08db75299fb', metadata={'source': 'speech.txt'}, page_content="History\nLangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence. The project quickly garnered popularity,[3] with improvements from hundreds of contributors on GitHub, trending discussions on Twitter, lively activity on the project's Discord server, many YouTube tutorials, and meetups in San Francisco and London. In April 2023, LangChain had inco

In [9]:
## saving and laoding

db.save_local("faiss_index")

In [None]:
# laoding from local

new_db = FAISS.load_local("faiss_index", embedding, allow_dangerous_deserialization=True)