In [3]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [4]:
loader = TextLoader('speech.txt')
data = loader.load()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 50)
splits = text_splitter.split_documents(data)

In [6]:
embedding = OllamaEmbeddings(model="llama3")
vectordb = Chroma.from_documents(documents=splits, embedding=embedding)
vectordb

<langchain_chroma.vectorstores.Chroma at 0x1102d2620>

In [7]:
query = " task we can dedicate our lives"
docs_with_score= vectordb.similarity_search(query)

In [8]:
docs_with_score

[Document(metadata={'source': 'speech.txt'}, page_content='purpose. If there should be disloyalty, it will be dealt with with a firm hand of stern repression; but, if it lifts its head at all, it will lift it only here and there and without countenance'),
 Document(metadata={'source': 'speech.txt'}, page_content='she has treasured. God helping her, she can do no other.'),
 Document(metadata={'source': 'speech.txt'}, page_content='and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to believe that this is spoken'),
 Document(metadata={'source': 'speech.txt'}, page_content='one of the champions of the rights of mankind. We shall be satisfied when those rights have been made as secure as the faith and the freedom of nations can make them.')]

In [9]:
## SAve to local

vectordb = Chroma.from_documents(documents=splits, embedding=embedding, persist_directory="./chroma_db")

In [10]:
## load from local

vectordb2 = Chroma(persist_directory="./chroma_db", embedding_function=embedding)
doc = vectordb2.similarity_search(query)
doc

[Document(metadata={'source': 'speech.txt'}, page_content='purpose. If there should be disloyalty, it will be dealt with with a firm hand of stern repression; but, if it lifts its head at all, it will lift it only here and there and without countenance'),
 Document(metadata={'source': 'speech.txt'}, page_content='she has treasured. God helping her, she can do no other.'),
 Document(metadata={'source': 'speech.txt'}, page_content='and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to believe that this is spoken'),
 Document(metadata={'source': 'speech.txt'}, page_content='one of the champions of the rights of mankind. We shall be satisfied when those rights have been made as secure as the faith and the freedom of nations can make them.')]

In [11]:
### RETREIVER

retriever = vectordb.as_retriever()

retriever.invoke(query)

[Document(metadata={'source': 'speech.txt'}, page_content='purpose. If there should be disloyalty, it will be dealt with with a firm hand of stern repression; but, if it lifts its head at all, it will lift it only here and there and without countenance'),
 Document(metadata={'source': 'speech.txt'}, page_content='she has treasured. God helping her, she can do no other.'),
 Document(metadata={'source': 'speech.txt'}, page_content='and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to believe that this is spoken'),
 Document(metadata={'source': 'speech.txt'}, page_content='one of the champions of the rights of mankind. We shall be satisfied when those rights have been made as secure as the faith and the freedom of nations can make them.')]