In [2]:
from dotenv import dotenv_values
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.document_loaders import TextLoader
from langchain.embeddings import CacheBackedEmbeddings
from langchain.memory import ConversationSummaryBufferMemory
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough
from langchain.storage import LocalFileStore
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from pathlib import Path


config = dotenv_values(".env")

chat = ChatOpenAI(
    openai_api_key=config["OPENAI_API_KEY"],
    model="gpt-4o-mini",
    temperature=0.1,
    streaming=True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ],
)

# https://python.langchain.com/docs/versions/migrating_memory/
memory = ConversationSummaryBufferMemory(
    llm=chat,
    max_token_limit=20,
    return_messages=True,
    memory_key="history",
)

cache_path = "./.cache"
embedding_path = f"{cache_path}/challenge-04"
Path(embedding_path).mkdir(parents=True, exist_ok=True)
embedding_cache_dir = LocalFileStore(embedding_path)

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = TextLoader("./challenge-04.txt")
docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings(openai_api_key=config["OPENAI_API_KEY"])
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, embedding_cache_dir
)
vectorstore = FAISS.from_documents(docs, cached_embeddings)
retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Answer the question using ONLY the following context. 
            If you don't know the answer just say you don't know. 
            DON't make anything up.

            Context: {context}
            """,
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)


def load_memory(_):
    return memory.load_memory_variables({})["history"]


chain = (
    {
        "context": retriever,
        "question": RunnablePassthrough(),
        "history": load_memory,
    }
    | prompt
    | chat
)


def invoke_chain(question):
    result = chain.invoke(question)
    memory.save_context(
        {"input": question},
        {"output": result.content},
    )
    # print(result.content)

In [3]:
invoke_chain("Aaronson 은 유죄인가요? (Is Aaronson guilty?)")

Aaronson은 유죄가 아닙니다.The human asks if Aaronson is guilty.

In [4]:
invoke_chain(
    "그가 테이블에 어떤 메시지를 썼나요? (What message did he write in the table?)"
)

그는 테이블에 "FREEDOM IS SLAVERY", "TWO AND TWO MAKE FIVE", "GOD IS POWER"라는 메시지를 썼습니다.The human asks if Aaronson is guilty. The AI responds that Aaronson is not guilty. The human then inquires about the message Aaronson wrote on the table, to which the AI reveals that he wrote "FREEDOM IS SLAVERY," "TWO AND TWO MAKE FIVE," and "GOD IS POWER."

In [5]:
invoke_chain("Julia 는 누구인가요? (Who is Julia?)")

Julia는 Winston의 사랑하는 사람이며, 그들과 함께 반당파적인 활동을 하던 인물입니다. 그녀는 Winston과의 관계를 통해 개인적인 자유와 사랑을 추구하지만, 결국 당의 압박과 감시 아래에서 고통받게 됩니다.The human asks if Aaronson is guilty. The AI responds that Aaronson is not guilty. The human then inquires about the message Aaronson wrote on the table, to which the AI reveals that he wrote "FREEDOM IS SLAVERY," "TWO AND TWO MAKE FIVE," and "GOD IS POWER." The human then asks about Julia, and the AI explains that Julia is Winston's lover who engaged in anti-party activities with him, seeking personal freedom and love but ultimately suffering under the Party's oppression and surveillance.