In [None]:
!pip install langchain
!pip install langchainplus_sdk
!pip install chromadb

In [None]:
# import
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader

# load the document and split it into chunks
loader = TextLoader("../state_of_the_union.txt")
documents = loader.load()

# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# load it into Chroma
db = Chroma.from_documents(docs, embedding_function)

# query it
query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)

# print results
print(docs[0].page_content)

In [None]:
# save to disk
db2 = Chroma.from_documents(docs, embedding_function, persist_directory="./chroma_db")
db2.persist()
docs = db.similarity_search(query)

# load from disk
db3 = Chroma(persist_directory="./chroma_db")
docs = db.similarity_search(query)
print(docs[0].page_content)