In [None]:
## Building sample vectordb
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings

In [2]:
loader =TextLoader('speechKs.txt')
documents=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=0)
docs=text_splitter.split_documents(documents)


In [4]:
embeddings=OllamaEmbeddings(model="nomic-embed-text")
db=Chroma.from_documents(docs,embeddings) # create vector store.Here it will create a in-memory.
db ##here we have called the vector store db

<langchain_chroma.vectorstores.Chroma at 0x300afe0d0>

In [None]:
## query it
query = "what happened during world war second?"
response_docs= db.similarity_search(query) ## here we are getting the similar documents from the vectordb
response_docs[0].page_content ## print the most relevant document

'World War II\nThere was a shortage of qualified officers at the outbreak of the war, officers were thus promoted without having served for the minimum period required for a promotion. Therefore, for the first two years of the conflict, Manekshaw was temporarily appointed to the ranks of captain and major before being promoted to the substantive rank of captain on 4 February 1942.[20]'

In [None]:
## save the vectordb to disk/project directory
db=Chroma.from_documents(docs,embeddings,persist_directory='./chroma_db') ## here we are creating the vectordb and saving it to disk.It will create a folder chroma_db in the current directory
## load the vectordb from disk
db2=Chroma(persist_directory='./chroma_db',embedding_function=embeddings) ## here we are loading the vectordb from the disk
docs_db2= db2.similarity_search(query) ## querying the vectordb loaded from disk
print(docs_db2[0].page_content) ## print the most relevant document from the vectordb loaded from disk

World War II
There was a shortage of qualified officers at the outbreak of the war, officers were thus promoted without having served for the minimum period required for a promotion. Therefore, for the first two years of the conflict, Manekshaw was temporarily appointed to the ranks of captain and major before being promoted to the substantive rank of captain on 4 February 1942.[20]


In [None]:
##similarity search with score
docs_with_score= db.similarity_search_with_score(query) ## it will return the list of tuples where each tuple contains the document and its similarity score
docs_with_score ## print the documents with score

[(Document(id='db02165e-764b-4d1a-bf6d-6bf39bcdae75', metadata={'source': 'speechKs.txt'}, page_content='World War II\nThere was a shortage of qualified officers at the outbreak of the war, officers were thus promoted without having served for the minimum period required for a promotion. Therefore, for the first two years of the conflict, Manekshaw was temporarily appointed to the ranks of captain and major before being promoted to the substantive rank of captain on 4 February 1942.[20]'),
  0.8183761239051819),
 (Document(id='8bd4e4c7-9406-4a68-9ca1-8b0a9c23488a', metadata={'source': 'speechKs.txt'}, page_content='World War II\nThere was a shortage of qualified officers at the outbreak of the war, officers were thus promoted without having served for the minimum period required for a promotion. Therefore, for the first two years of the conflict, Manekshaw was temporarily appointed to the ranks of captain and major before being promoted to the substantive rank of captain on 4 February 

In [None]:
## Retriever option
retriever=db.as_retriever() ## here we are converting the vectordb to retriever.Retriever helps to fetch the relevant documents based on the query
retriever.invoke(query)[0] ## print the most relevant document using retriever

Document(id='db02165e-764b-4d1a-bf6d-6bf39bcdae75', metadata={'source': 'speechKs.txt'}, page_content='World War II\nThere was a shortage of qualified officers at the outbreak of the war, officers were thus promoted without having served for the minimum period required for a promotion. Therefore, for the first two years of the conflict, Manekshaw was temporarily appointed to the ranks of captain and major before being promoted to the substantive rank of captain on 4 February 1942.[20]')