In [1]:
#https://python.langchain.com/v0.1/docs/integrations/vectorstores/chroma/

In [2]:
# import
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import CharacterTextSplitter

# load the document and split it into chunks
loader = TextLoader("raw_texts/LOIC-1848.txt")
documents = loader.load()

# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)

# query it
query = "What does the document say about Mary"
docs = db.similarity_search(query)

# print results
print(docs[0].page_content)

Created a chunk of size 1163, which is longer than the specified 1000
Created a chunk of size 1372, which is longer than the specified 1000
Created a chunk of size 1029, which is longer than the specified 1000
Created a chunk of size 1118, which is longer than the specified 1000
Created a chunk of size 1512, which is longer than the specified 1000
Created a chunk of size 1250, which is longer than the specified 1000
Created a chunk of size 1008, which is longer than the specified 1000
Created a chunk of size 1203, which is longer than the specified 1000
Created a chunk of size 1149, which is longer than the specified 1000
Created a chunk of size 1235, which is longer than the specified 1000
Created a chunk of size 1048, which is longer than the specified 1000
Created a chunk of size 2106, which is longer than the specified 1000
Created a chunk of size 1498, which is longer than the specified 1000
Created a chunk of size 1027, which is longer than the specified 1000
  warn_deprecated(
 

With these extracts we shall close; however, as to the power of theBlessed Virgin, the translator begs to quote the following short passages :—“Wherefore,” Saint Anselm saith, (Lib. de Excel. Virg.,) “there is no doubt but the Blessed Virgin Mary, by maternal right, is with Christ President of heaven and earth. St. John Damascene, Orat. 2, de Assump. says—“It is ﬁtting and convenient that Mary should possess what is her Son’s;” and Barbertu assures us that she is able to obtain more than all the angels and saints in heaven, and more than all the Church throughout the whole world. Lastly, this is the doctrine of St. Jerome, explicated by St. Bernard, Tom. Serm. Art. Cap. 10; and another great servant of our Lady says, that “all the virtues and graces of the Holy Spirit are given through her, as she wishes, as much as she wishes, and when she wishes.”


In [3]:
# save to disk
db2 = Chroma.from_documents(docs, embedding_function, persist_directory="./chroma_basic_db")
docs = db2.similarity_search(query)

In [4]:
# load from disk
db3 = Chroma(persist_directory="./chroma_basic_db", embedding_function=embedding_function)
docs = db3.similarity_search(query)
print(docs[0].page_content)

With these extracts we shall close; however, as to the power of theBlessed Virgin, the translator begs to quote the following short passages :—“Wherefore,” Saint Anselm saith, (Lib. de Excel. Virg.,) “there is no doubt but the Blessed Virgin Mary, by maternal right, is with Christ President of heaven and earth. St. John Damascene, Orat. 2, de Assump. says—“It is ﬁtting and convenient that Mary should possess what is her Son’s;” and Barbertu assures us that she is able to obtain more than all the angels and saints in heaven, and more than all the Church throughout the whole world. Lastly, this is the doctrine of St. Jerome, explicated by St. Bernard, Tom. Serm. Art. Cap. 10; and another great servant of our Lady says, that “all the virtues and graces of the Holy Spirit are given through her, as she wishes, as much as she wishes, and when she wishes.”
