In [1]:
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
import os
from langchain.storage import LocalFileStore
from langchain.storage import create_kv_docstore
from main import load_vector_store
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.retrievers import ParentDocumentRetriever

load_dotenv()

documents = PyPDFLoader('가뭄 대비 농작물 관리 요령.pdf').load()

parent_docstore_path = './parent_docstore'

os.makedirs(parent_docstore_path, exist_ok = True)

store = create_kv_docstore(LocalFileStore(parent_docstore_path))

vector_db = load_vector_store()

child_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 150,
    separators = ['\n\n', '\n', '.', ' ', '']
)

indexer = ParentDocumentRetriever(
    vectorstore = vector_db,
    docstore = store,
    child_splitter = child_splitter
)

indexer.add_documents(documents)