In [2]:
import logging

from langchain_community.document_loaders import TextLoader
from langchain_classic.storage import InMemoryStore
from langchain_classic.retrievers.parent_document_retriever import ParentDocumentRetriever
from langchain_classic.retrievers.multi_query import MultiQueryRetriever
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_classic.chains import RetrievalQA

In [3]:
embeddings = HuggingFaceEmbeddings(model='BAAI/bge-m3', model_kwargs={'device':'cuda'}, encode_kwargs={'batch_size':8})

In [4]:
loaders = [TextLoader('./data/How_to_invest_money.txt', encoding='utf-8')]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [5]:
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
child_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)

In [6]:
vectorstore = Chroma(collection_name='split_parents', embedding_function=embeddings)
docstore = InMemoryStore()

In [7]:
retriever = ParentDocumentRetriever(
    docstore=docstore,
    vectorstore=vectorstore,
    parent_splitter=parent_splitter,
    child_splitter=child_splitter
)

retriever.add_documents(docs)

In [12]:
print(len(list(docstore.yield_keys())))
print(vectorstore._collection.count())

219
1148


In [15]:
query = 'What are the types of investments?'
retrieved_docs = retriever.invoke(query)
print(retrieved_docs[0].page_content)
print('--'*30)
sub_docs = vectorstore.similarity_search(query)
print(sub_docs[0].page_content)

There are five chief points to be considered in the selection of all
forms of investment. These are: (1) safety of principal and interest;
(2) rate of income; (3) convertibility into cash; (4) prospect of
appreciation in intrinsic value; (5) stability of market price.

Keeping these five general factors in mind, the present chapter will
discuss real-estate mortgages as a form of investment, both as adapted
to the requirements of private funds and of a business surplus.
------------------------------------------------------------
There are five chief points to be considered in the selection of all
forms of investment. These are: (1) safety of principal and interest;
