In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import Runnable
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(
    temperature=0.1,
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)
loader = UnstructuredFileLoader("./files/document.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
        ),
        ("human", "{question}"),
    ]
)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

class CustomRunnable(Runnable):
    def invoke(self, inputs, config=None):
        question = inputs["question"]
        context = "\n".join([doc.page_content for doc in retriver.get_relevant_documents(question)]) 
        chat_history = memory.load_memory_variables({})["chat_history"]
        chat_history_str = "\n".join([message.content for message in chat_history]) if isinstance(chat_history, list) else chat_history
        return {"context": chat_history_str + "\n" + context, "question": question}

custom_runnable = CustomRunnable()

chain = (
    custom_runnable
    | prompt
    | llm
)

def invoke_chain(question):
    result = chain.invoke({"question": question})
    memory.save_context({"input": question}, {"output": result.content})
    print(result.content)
    return result


invoke_chain("Is Aaronson guilty?")
invoke_chain("What message did he write in the table?")
invoke_chain("Who is Julia?")


Yes, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.
He traced in the dust on the table: 2+2=5
Julia is a character in the story who is involved in a romantic relationship with Winston.
The first question you asked was "Is Aaronson guilty?"


AIMessage(content='The first question you asked was "Is Aaronson guilty?"', additional_kwargs={}, response_metadata={'token_usage': <OpenAIObject at 0x732738c8acf0> JSON: {
  "prompt_tokens": 2268,
  "completion_tokens": 13,
  "total_tokens": 2281,
  "prompt_tokens_details": {
    "cached_tokens": 0,
    "audio_tokens": 0
  },
  "completion_tokens_details": {
    "reasoning_tokens": 0,
    "audio_tokens": 0,
    "accepted_prediction_tokens": 0,
    "rejected_prediction_tokens": 0
  }
}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-b26820d3-ea3d-4078-95e7-becd3b227829-0')

In [3]:
invoke_chain("What was the first question that I asked?")

The first question you asked was "Is Aaronson guilty?"


AIMessage(content='The first question you asked was "Is Aaronson guilty?"', additional_kwargs={}, response_metadata={'token_usage': <OpenAIObject at 0x732739279730> JSON: {
  "prompt_tokens": 2289,
  "completion_tokens": 13,
  "total_tokens": 2302,
  "prompt_tokens_details": {
    "cached_tokens": 0,
    "audio_tokens": 0
  },
  "completion_tokens_details": {
    "reasoning_tokens": 0,
    "audio_tokens": 0,
    "accepted_prediction_tokens": 0,
    "rejected_prediction_tokens": 0
  }
}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-892f8d2b-1a0f-4887-ad81-deb467aabdfe-0')