In [1]:
import requests
import os
# https://gist.github.com/serranoarevalo/5acf755c2b8d83f1707ef266b82ea223
response = requests.get("https://gist.githubusercontent.com/serranoarevalo/5acf755c2b8d83f1707ef266b82ea223/raw/d72b9558a11523adbe13300b41321ecd93d331d3/document.txt")
target_file = "files/chapter_three.txt"
os.makedirs(os.path.dirname(target_file), exist_ok=True)
with open(target_file, "wb") as f:
    f.write(response.content)

In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore

llm = ChatOpenAI(
    model_name="gpt-4o-mini",
)
cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = UnstructuredFileLoader("./files/chapter_three.txt")

docs = loader.load_and_split(text_splitter=splitter)

embedding = OpenAIEmbeddings(model="text-embedding-3-small")

cache_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embedding,
    cache_dir,
)

vectorstore = Chroma.from_documents(docs, cache_embeddings)

retriver = vectorstore.as_retriever()

In [3]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}",
        ),
        ("human", "{question}"),
    ]
)
chain = {"context": retriver, "question": RunnablePassthrough()} | prompt | llm

In [4]:
chain.invoke("Is Aaronson guilty?").content

'According to the context, Aaronson is guilty of the crimes he was charged with, as stated by the character in the text. However, it also mentions that the character had never seen the photograph that disproved their guilt, implying that the guilt may not be genuine or based on truth.'

In [5]:
chain.invoke("What message did he write in the table?").content

'He traced "2+2=5" in the dust on the table.'

In [6]:
chain.invoke("Who is Julia?").content

'Julia is a character that Winston loves deeply. In the context, she is associated with a moment of overwhelming emotion for Winston when he cries out her name, indicating his strong feelings for her despite the oppressive regime they are under.'