In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.storage import LocalFileStore
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS

llm = ChatOpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0.1,
)

memory = ConversationBufferMemory(
    return_messages=True,
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/document.txt")

docs = loader.load_and_split(text_splitter=splitter)

embedder = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embedder, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            Before answering, revise what the question was. Then answer the question.
            -------
            {context}
            """,
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    history = memory.load_memory_variables({})["history"]
    context = "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question, "history": history}
        ).content
        for doc in documents
    )
    return context

map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough()
} | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            The following extracted parts are the keywords for answering the question.
            By using the given information, create a final answer.
            When making the answer, use the information as much as possible.
            If you don't know the answer, just say that you don't know. Don't try to make up an answer. And tell the reason why you don't know the answer
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

def invoke_chain(question):
    result = chain.invoke(question)
    memory.save_context({"input": question}, {"output": result.content})
    print(result)


In [2]:
invoke_chain("Is Aaronson guilty?")


content='Based on the information provided, Aaronson is guilty of the crimes he was charged with. The protagonist had never seen the photograph that disproved their guilt, and it was revealed that the photograph had never existed; it was invented.'


In [3]:
invoke_chain("What message did he write in the table?")


content='The message he wrote on the table was "TWO AND TWO MAKE FIVE."'


In [4]:
invoke_chain("Who is Julia?")


content="Julia is a significant character in the text who has a strong emotional connection with the protagonist. The protagonist deeply cares for and loves Julia, as evidenced by his outburst where he cries out her name in a moment of overwhelming hallucination. The protagonist pleads for the punishment to be transferred to Julia instead of himself, showing his concern for her well-being over his own. Julia is someone the protagonist meets, spends time with, and is involved in attempting to resist the power of the Party, but ultimately they both succumb to the Party's control."
