In [1]:
from langchain.chat_models.ollama import ChatOllama
from langchain_openai import ChatOpenAI
from langchain.document_loaders.unstructured import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.embeddings.ollama import OllamaEmbeddings
from langchain.embeddings.cache import CacheBackedEmbeddings
from langchain.vectorstores.faiss import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.memory import ConversationBufferMemory


LLM_model, models = ["openai", "GPT-3.5-turbo"]
# LLM_model, models = ["ollama", "mixtral:instruct"]

file_name = "document.txt"

llm = (
    ChatOllama(temperature=0.1, model=models)
    if LLM_model == "ollama"
    else ChatOpenAI(temperature=0.1)
)

loader = UnstructuredFileLoader(f"./files/{file_name}")
cache_dir = LocalFileStore(f"./.cache/embeddings/{LLM_model}/{models}/{file_name}")

splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    separators=["\n\n", "\n", ".", "?", "!"],
    chunk_size=600,
    chunk_overlap=100,
)

docs = loader.load_and_split(text_splitter=splitter)
embeddings = (
    OllamaEmbeddings(model=models) if LLM_model == "ollama" else OpenAIEmbeddings()
)

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()

memory = ConversationBufferMemory(
    llm=llm,
    return_messages=True,
    memory_key="history",
)


def load_memory(_):
    return memory.load_memory_variables({})["history"]


prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            You are an AI that reads documents for you. Please answer based on the document given below. 
            If the information is not in the document, answer the question with “The required information is not in the document.” Never make up answers. \n\n{context}
            """,
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)

chain = (
    {
        "context": retriever,
        "question": RunnablePassthrough(),
        "history": RunnableLambda(load_memory),
    }
    | prompt
    | llm
)


def invoke_chain(question):
    result = chain.invoke(question).content
    memory.save_context(
        {"input": question},
        {"output": result},
    )
    print(result)


invoke_chain("Is Aaronson guilty?")

The required information is not in the document.


In [2]:
invoke_chain("What message did he write in the table?")

The message he wrote on the table was "2+2=5."


In [3]:
invoke_chain("Who is Julia?")

Julia is a character mentioned in the document who is associated with Winston.


In [4]:
invoke_chain("What was the first question I asked?")

The first question you asked was "Is Aaronson guilty?"


In [5]:
memory

ConversationBufferMemory(chat_memory=ChatMessageHistory(messages=[HumanMessage(content='Is Aaronson guilty?'), AIMessage(content='The required information is not in the document.'), HumanMessage(content='What message did he write in the table?'), AIMessage(content='The message he wrote on the table was "2+2=5."'), HumanMessage(content='Who is Julia?'), AIMessage(content='Julia is a character mentioned in the document who is associated with Winston.'), HumanMessage(content='What was the first question I asked?'), AIMessage(content='The first question you asked was "Is Aaronson guilty?"')]), return_messages=True)