In [5]:
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda 
from langchain.chains import LLMChain, StuffDocumentsChain


In [6]:
llm = ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./document.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """
            Use the following portion of a long document to answer the question. Answer based on the given context.
            If there is no relevant text and you cannot answer, return : ''
            -------
            {context}
        """),
        ("human", "{question}"),
    ]
)

memory = ConversationBufferMemory(return_messages=True)

llm_chain = LLMChain(llm=llm, prompt=map_doc_prompt)

map_doc_chain = StuffDocumentsChain(
    llm_chain=llm_chain,  
    memory=memory,
    document_variable_name="context"  
)

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )

final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

chain = (
    {
        "context": retriever,
        "question": RunnablePassthrough(),
        "extra": RunnablePassthrough(),
    }
    | final_prompt
    | llm
)

In [7]:
chain.invoke("Is Aaronson guilty?")


AIMessage(content='Yes, according to the document, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.')

In [8]:
chain.invoke("What message did he write in the table?")


AIMessage(content='In the document, the message he wrote on the table was "2+2=5."')

In [9]:
chain.invoke("Who is Julia?")


AIMessage(content='Julia is a character in the document who is associated with the protagonist.')