In [None]:
import os 
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda 
from langchain.chains import LLMChain, StuffDocumentsChain

# Load environment variables
load_dotenv("../.env")  
openai_api_key = os.getenv("OPENAI_API_KEY")

# Define Chat Model
llm = ChatOpenAI(
    model_name="gpt-3.5-turbo", 
    temperature=0.1, 
    streaming=True, 
    callbacks=[StreamingStdOutCallbackHandler()],
    openai_api_key=openai_api_key
)

In [None]:
# Set up a local cache directory for storing embedding results
cache_dir = LocalFileStore("./.cache/")  # Ensure this directory exists or is writable

# Create a text splitter using a token-based encoder with newline as a separator
splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",       # Split text on newline characters
    chunk_size=600,       # Maximum number of tokens per chunk
    chunk_overlap=100,    # Overlap between chunks to preserve context between splits
)

# Load a text document and split the document
loader = UnstructuredFileLoader("./ch3_1984.txt")  

docs = loader.load_and_split(text_splitter=splitter)

# Initialize the OpenAI embedding model 
embeddings = OpenAIEmbeddings()
# Wrap the embedding model with a caching mechanism to avoid redundant API calls
cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,
    cache_dir
)

# Create a FAISS vectorstore from the split documents and the cached embeddings
vectorstore = FAISS.from_documents(docs, cached_embeddings)

# Convert the vectorstore into a retriever for similarity-based document retrieval
retriever = vectorstore.as_retriever()

# Define a chat prompt template for answering questions based on retrieved context
map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """
            You are an expert at answering any questions about the document. 
            Use the following portion of a long document to answer the question. Answer based on the given context.
            If there is no relevant text and you cannot answer, return : ''
            -------
            {context}
        """),
        ("human", "{question}"),
    ]
)

[nltk_data] Downloading package punkt to /Users/elinachoi/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/elinachoi/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
# Initialize a memory buffer to retain conversation history
memory = ConversationBufferMemory(return_messages=True)
# Create a basic LLMChain using the earlier defined prompt 
llm_chain = LLMChain(llm=llm, prompt=map_doc_prompt)
# Wrap the LLM chain into a StuffDocumentsChain to allow processing multiple documents
map_doc_chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    memory=memory,
    document_variable_name="context"  
)

# Define a function to map over retrieved documents and answer the question 
def map_docs(inputs):
    documents = inputs["documents"]        # List of documents retrieved
    question = inputs["question"]          # Input question from the user
    return "\n\n".join(
        map_doc_chain.invoke(              # Call the document-processing chain for each document
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )

# Define the final prompt for combining partial document answers into a single response
final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            Given the following extracted parts of a long document and a question, create a final answer. 
            If you don't know the answer, just say that you don't know. Don't try to make up an answer.
            ------
            {context}
            """,
        ),
        ("human", "{question}"),
    ]
)

# Define the overall processing chain:
chain = (
    {
        "context": retriever,              # Retrieve relevant documents from the vectorstore
        "question": RunnablePassthrough(), # Pass question directly to the next stage
        "extra": RunnablePassthrough(),    # Optional: extra input passed through, currently unused
    }
    | final_prompt                         # Format the context and question into a final prompt
    | llm                                  # Call the LLM to generate the final answer
)


In [7]:
chain.invoke("Is Aaronson guilty?")

Yes, according to the document, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.

AIMessageChunk(content='Yes, according to the document, Jones, Aaronson, and Rutherford were guilty of the crimes they were charged with.')

In [8]:
chain.invoke("What message did he write in the table?")

The message he wrote on the table was "2+2=5."

AIMessageChunk(content='The message he wrote on the table was "2+2=5."')

In [9]:
chain.invoke("Who is Julia?")

Julia is a character in the novel "1984" by George Orwell. She is a love interest of the protagonist, Winston Smith.

AIMessageChunk(content='Julia is a character in the novel "1984" by George Orwell. She is a love interest of the protagonist, Winston Smith.')