In [1]:
%pip install -r ../requirements.txt

Note: you may need to restart the kernel to use updated packages.


Load Env Variables and Secrets

In [1]:
import os
from dotenv import load_dotenv
load_dotenv('../../../azure.env')
os.environ["AZURE_OPENAI_API_VERSION"] = "2024-06-01"
os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"] = 'gpt-4o-mini'
os.environ["AZURE_OPENAI_MODEL_VERSION"] = '2024-06-01'


Import packages

In [2]:
from langchain_openai import AzureChatOpenAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_core.vectorstores import InMemoryVectorStore

Initialize the Model

In [3]:
model = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    model_version=os.environ['AZURE_OPENAI_MODEL_VERSION']
)

In [4]:
from langchain_openai import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    # dimensions: Optional[int] = None, # Can specify dimensions with new text-embedding-3 models
    # azure_endpoint="https://<your-endpoint>.openai.azure.com/", If not provided, will read env variable AZURE_OPENAI_ENDPOINT
    # api_key=... # Can provide an API key directly. If missing read env variable AZURE_OPENAI_API_KEY
    # openai_api_version=..., # If not provided, will read env variable AZURE_OPENAI_API_VERSION
)

In [5]:
# Define the directory containing the text file and the persistent directory
current_dir = os.path.dirname(os.path.commonpath('.'))
books_dir = os.path.join(current_dir, "books_small")
print(f"Books directory: {books_dir}")

Books directory: books_small


In [6]:
# Ensure the books directory exists
if not os.path.exists(books_dir):
    raise FileNotFoundError(
        f"The directory {books_dir} does not exist. Please check the path."
    )

# List all text files in the directory
book_files = [f for f in os.listdir(books_dir) if f.endswith(".txt")]

# Read the text content from each file and store it with metadata
documents = []
for book_file in book_files:
    file_path = os.path.join(books_dir, book_file)
    loader = TextLoader(file_path)
    book_docs = loader.load()
    for doc in book_docs:
        # Add metadata to each document indicating its source
        doc.metadata = {"source": book_file}
        documents.append(doc)

# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

# Display information about the split documents
print("\n--- Document Chunks Information ---")
print(f"Number of document chunks: {len(docs)}")

# Create the vector store and persist it automatically
print("\n--- Creating vector store ---")
vectorstore = InMemoryVectorStore.from_documents(
    documents=docs,
    embedding=embeddings,
)
print("\n--- Finished creating vector store ---")


--- Document Chunks Information ---
Number of document chunks: 238

--- Creating vector store ---

--- Finished creating vector store ---


In [11]:
# Define the user's question
query = "How did Juliet die?"

retriever = vectorstore.as_retriever(
            search_type="similarity",
            search_kwargs={"k": 3},
        )
relevant_docs = retriever.invoke(query)

In [12]:
# Display the relevant results with metadata
print("\n--- Relevant Documents ---")
for i, doc in enumerate(relevant_docs, 1):
    print(f"Document {i}:\n{doc.page_content}\n")


--- Relevant Documents ---
Document 1:
Enter Juliet.

JULIET.
Gallop apace, you fiery-footed steeds,
Towards Phoebus’ lodging. Such a waggoner
As Phaeton would whip you to the west
And bring in cloudy night immediately.
Spread thy close curtain, love-performing night,
That runaway’s eyes may wink, and Romeo
Leap to these arms, untalk’d of and unseen.
Lovers can see to do their amorous rites
By their own beauties: or, if love be blind,
It best agrees with night. Come, civil night,
Thou sober-suited matron, all in black,
And learn me how to lose a winning match,
Play’d for a pair of stainless maidenhoods.
Hood my unmann’d blood, bating in my cheeks,
With thy black mantle, till strange love, grow bold,
Think true love acted simple modesty.
Come, night, come Romeo; come, thou day in night;
For thou wilt lie upon the wings of night
Whiter than new snow upon a raven’s back.
Come gentle night, come loving black-brow’d night,
Give me my Romeo, and when I shall die,
Take him and cut him out in

In [13]:
# Combine the query and the relevant document contents
combined_input = (
    "Here are some documents that might help answer the question: "
    + query
    + "\n\nRelevant Documents:\n"
    + "\n\n".join([doc.page_content for doc in relevant_docs])
    + "\n\nPlease provide an answer based only on the provided documents. If the answer is not found in the documents, respond with 'I'm not sure'."
)

In [14]:
# Define the messages for the model
messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content=combined_input),
]

# Invoke the model with the combined input
result = model.invoke(messages)

# Display the full result and content only
print("\n--- Generated Response ---")
# print("Full result:")
# print(result)
print("Content only:")
print(result.content)


--- Generated Response ---
Content only:
Juliet died because, upon waking from a potion that made her appear dead, she found Romeo dead beside her. In her despair, she took her own life. The documents outline that she was initially meant to be married to Paris, but she was in love with Romeo. After Romeo was banished for killing Tybalt, Juliet, desperate and believing that Romeo was dead, chose to end her own life rather than live without him.
