In [15]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

In [12]:
llm = ChatOpenAI(temperature=0.1)
cache_dir = LocalFileStore("./.cache/")
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./.files/chapter1_1984.txt")
docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()
cache_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings, cache_dir
)
vectorstore = FAISS.from_documents(docs, cache_embeddings)
retriever = vectorstore.as_retriever()

In [17]:
map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim. If there is no relevant text, return : ''
            -------
            {context}"""),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents

    )


map_chain = {"documents": retriever, "question": RunnablePassthrough(), } | RunnableLambda(map_docs)

final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """Given the following extracted parts of a long document and a question, create a final answer. If you don't know the answer, just say that you don't know. Don't try to make up an answer.
        ------
        {context}"""),
        ("human", "{question}")
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

In [19]:
chain.invoke("Where dose Victory Mansion.")

AIMessage(content='Victory Mansions is located in London.')

In [18]:
chain.invoke("Describe Victory Mansion.")

AIMessage(content='Victory Mansion is a dilapidated and run-down apartment complex located in Airstrip One (formerly known as Great Britain) under the oppressive rule of the Party. The building has poor living conditions with cramped quarters, shabby furniture, and a pervasive sense of decay. Telescreens, used by the Party to monitor and control residents, are omnipresent. Despite its name, Victory Mansion is a stark and grim place, reflecting the bleak and oppressive atmosphere of the dystopian society depicted in the novel "1984" by George Orwell.')

In [21]:
chain.invoke("How many ministries are mentioned")

AIMessage(content='Three ministries are mentioned in the text: the Ministry of Love, the Ministry of Plenty, and the Ministry of Truth.')