In [None]:
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.document_loaders import UnstructuredFileLoader
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.storage import LocalFileStore
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.prompts import ChatPromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.prompts import MessagesPlaceholder

llm = ChatOpenAI(
    temperature=1,
    model_name="gpt-5-nano",
    streaming=True
)

# 문서
cache_dir = LocalFileStore("./.cache")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)

loader = UnstructuredFileLoader("./files/document.txt")

docs = loader.load_and_split(text_splitter=splitter)

# vector store
embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

# 메모리
memory = ConversationBufferMemory(
        llm=llm,
        max_token_limit=100,
        memory_key="chat_history",
        return_messages=True
)

def load_memory(_):
    return memory.load_memory_variables({})["chat_history"]

# 
map_doc_prompt = ChatPromptTemplate.from_messages(
       [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]

    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content,"question": question}
        ).content
        for doc in documents
    )

def prepare_for_map(inputs):
    docs = retriever.invoke(inputs["question"])
    return {
        "documents": docs,
        "question": inputs["question"]
    }

map_chain = RunnableLambda(prepare_for_map) | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages([
    (
        "system",
        """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
        ------
        {context}
        """
    ),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human","{question}")
        
])

def get_chat_history(inputs):
    return inputs["chat_history"]

def get_question(inputs):
    return inputs["question"]

chain = {
    "context": map_chain, 
    "question":RunnableLambda(get_question), 
    "chat_history": RunnableLambda(get_chat_history)
    } | final_prompt | llm

def invoke_chain(question):
    chat_history = load_memory(None)
    result = chain.invoke({"question": question, "chat_history": chat_history})
    memory.save_context({"input":question}, {"output":result.content})
    print(result)
    return result


In [42]:
invoke_chain("Is Aaronson guilty?")
invoke_chain("What message did he write in the table?")
invoke_chain("Who is Julia?")

content='Yes. The extracted text states that Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.'
content='2+2=5'
content="Julia is a character in George Orwell's novel 1984. She is Winston Smith’s lover and partner, a fellow rebel against the Party. Their relationship is clandestine, and she accompanies Winston in his anti-Party feelings and plans."


AIMessageChunk(content="Julia is a character in George Orwell's novel 1984. She is Winston Smith’s lover and partner, a fellow rebel against the Party. Their relationship is clandestine, and she accompanies Winston in his anti-Party feelings and plans.")