In [1]:
import requests
import os
# https://gist.github.com/serranoarevalo/5acf755c2b8d83f1707ef266b82ea223
response = requests.get("https://gist.githubusercontent.com/serranoarevalo/5acf755c2b8d83f1707ef266b82ea223/raw/d72b9558a11523adbe13300b41321ecd93d331d3/document.txt")
target_file = "files/chapter_three.txt"
os.makedirs(os.path.dirname(target_file), exist_ok=True)
with open(target_file, "wb") as f:
    f.write(response.content)

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.memory import ConversationBufferMemory

llm = ChatOpenAI(
    model_name="gpt-4o-mini",
)
memory = ConversationBufferMemory(
    return_messages=True,
)
cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_three.txt")

docs = loader.load_and_split(text_splitter=splitter)

embedding = OpenAIEmbeddings(model="text-embedding-3-small")

cache_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embedding,
    cache_dir,
)

vectorstore = Chroma.from_documents(docs, cache_embeddings)

retriver = vectorstore.as_retriever()

In [3]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.schema.runnable import RunnablePassthrough


def load_memory(_):
    return memory.load_memory_variables({})["history"]

def invoke_chain(chain, question):
    result = chain.invoke(question)
    memory.save_context({"input": question}, {"output": result.content})
    return result

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{question}"),
    ]
)
chain = {"context": retriver, "question": RunnablePassthrough(), "history": load_memory} | prompt | llm

In [4]:
invoke_chain(chain, "Is Aaronson guilty?").content

'According to the context, Aaronson is guilty of the crimes he was charged with, as Winston recalls that he had never seen the photograph that disproved their guilt and that it had never existed; he had invented it.'

In [5]:
invoke_chain(chain, "What message did he write in the table?").content

'Winston traced "2+2=5" in the dust on the table.'

In [6]:
invoke_chain(chain, "Who is Julia?").content

'Julia is a character who has a romantic relationship with Winston. She is depicted as someone with whom Winston shares a deep emotional connection, and he expresses love for her, especially in moments of reflection.'

In [7]:
memory.load_memory_variables({})

{'history': [HumanMessage(content='Is Aaronson guilty?'),
  AIMessage(content='According to the context, Aaronson is guilty of the crimes he was charged with, as Winston recalls that he had never seen the photograph that disproved their guilt and that it had never existed; he had invented it.'),
  HumanMessage(content='What message did he write in the table?'),
  AIMessage(content='Winston traced "2+2=5" in the dust on the table.'),
  HumanMessage(content='Who is Julia?'),
  AIMessage(content='Julia is a character who has a romantic relationship with Winston. She is depicted as someone with whom Winston shares a deep emotional connection, and he expresses love for her, especially in moments of reflection.')]}