In [6]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
#from langchain_community.embeddings import OllamaEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

loader =TextLoader('speechKs.txt')
documents=loader.load()
text_splitter=CharacterTextSplitter(chunk_size=200,chunk_overlap=30)
docs=text_splitter.split_documents(documents)


Created a chunk of size 402, which is longer than the specified 200
Created a chunk of size 730, which is longer than the specified 200
Created a chunk of size 519, which is longer than the specified 200
Created a chunk of size 493, which is longer than the specified 200
Created a chunk of size 783, which is longer than the specified 200
Created a chunk of size 724, which is longer than the specified 200
Created a chunk of size 618, which is longer than the specified 200
Created a chunk of size 385, which is longer than the specified 200
Created a chunk of size 954, which is longer than the specified 200
Created a chunk of size 421, which is longer than the specified 200


In [None]:
embeddings=OllamaEmbeddings(model="nomic-embed-text")
db=FAISS.from_documents(docs,embeddings) # create vector store.Here it will create a in-memory.
db ##here we have called the vector store db

In [8]:
###querying
query="What is the main topic of the speech?" 
docs=db.similarity_search(query)
docs[0].page_content

: 

In [None]:
####Retriver - We can convert the vector store into a retriver class. This allow us to easily use it in other langchain methods,which largely work with retrivers.

retriever=db.as_retriever() # it will convert the vector store into a retriver class.
docs=retriever.invoke(query)
docs[0].page_content


In [None]:
## Similarity search with score - It is one of the FAISS method.It will return the similarity score along with the distance score of the query to them. The returned distance score is L2 distance. Therefore, a lower score is better

docs_and_score=db.similarity_search_with_score(query)
docs_and_score

In [None]:
## We can also parse vectors instead of sentences.
embeddings_vector=embeddings.embed_query(query)
embeddings_vector

In [None]:
docs_score=db.similarity_search_by_vector(embeddings_vector)
docs_score

In [None]:
## We can also save this vector store db in our local system and load it when required.

## Step 1: Saving and Loading the vector store. It will return a file with pkl format
db.save_local("faiss_index")

In [None]:
new_db=FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True) ## Here dangerous deserialization is set to true because we are loading a local file.And we are sure that the file is safe.
docs=new_db.similarity_search(query)
docs