In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_classic.storage import InMemoryStore
from langchain_classic.retrievers.parent_document_retriever import ParentDocumentRetriever
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [2]:
loaders = [TextLoader('./data/How_to_invest_money.txt', encoding='utf-8')]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [3]:
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
child_splitter = RecursiveCharacterTextSplitter(chunk_size=200)

In [4]:
vectorstore = Chroma(collection_name='split_parents', embedding_function=OpenAIEmbeddings())
store = InMemoryStore()

In [5]:
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter
)
retriever.add_documents(docs)
print(len(list(store.yield_keys())))

219


In [20]:
query = 'What are the types of investments?'
retrieved_docs = retriever.invoke(query)
print(retrieved_docs[0].page_content)

There are five chief points to be considered in the selection of all
forms of investment. These are: (1) safety of principal and interest;
(2) rate of income; (3) convertibility into cash; (4) prospect of
appreciation in intrinsic value; (5) stability of market price.

Keeping these five general factors in mind, the present chapter will
discuss real-estate mortgages as a form of investment, both as adapted
to the requirements of private funds and of a business surplus.


In [11]:
sub_docs = vectorstore.similarity_search(query)
print(sub_docs[0].page_content)

forms of investment. These are: (1) safety of principal and interest;
(2) rate of income; (3) convertibility into cash; (4) prospect of
appreciation in intrinsic value; (5) stability of market price.
