In [16]:
import os

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

In [17]:

# Define the directory containing the text file and the persistent directory
current_dir = os.getcwd() 
file_path = os.path.join(current_dir, "books", "odyssey.txt")
db_dir = os.path.join(current_dir, "db")

In [18]:
# Check if the text file exists
if not os.path.exists(file_path):
    raise FileNotFoundError(
        f"The file {file_path} does not exist. Please check the path."
    )

In [19]:
# Split the document into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Display information about the split documents
print("\n--- Document Chunks Information ---")
print(f"Number of document chunks: {len(docs)}")
print(f"Sample chunk:\n{docs[0].page_content}\n")

Created a chunk of size 1012, which is longer than the specified 1000
Created a chunk of size 1636, which is longer than the specified 1000
Created a chunk of size 1218, which is longer than the specified 1000
Created a chunk of size 1869, which is longer than the specified 1000
Created a chunk of size 1294, which is longer than the specified 1000
Created a chunk of size 2270, which is longer than the specified 1000
Created a chunk of size 1431, which is longer than the specified 1000
Created a chunk of size 1763, which is longer than the specified 1000
Created a chunk of size 1575, which is longer than the specified 1000
Created a chunk of size 1024, which is longer than the specified 1000
Created a chunk of size 1147, which is longer than the specified 1000
Created a chunk of size 1205, which is longer than the specified 1000
Created a chunk of size 1606, which is longer than the specified 1000
Created a chunk of size 1833, which is longer than the specified 1000
Created a chunk of s


--- Document Chunks Information ---
Number of document chunks: 725
Sample chunk:
Provided by The Internet Classics Archive.
See bottom for copyright. Available online at
    http://classics.mit.edu//Homer/odyssey.html

The Odyssey
By Homer


Translated by Samuel Butler

----------------------------------------------------------------------

BOOK I

Tell me, O muse, of that ingenious hero who travelled far and wide
after he had sacked the famous town of Troy. Many cities did he visit,
and many were the nations with whose manners and customs he was acquainted;
moreover he suffered much by sea while trying to save his own life
and bring his men safely home; but do what he might he could not save
his men, for they perished through their own sheer folly in eating
the cattle of the Sun-god Hyperion; so the god prevented them from
ever reaching home. Tell me, too, about all these things, O daughter
of Jove, from whatsoever source you may know them.



In [21]:
# Function to create and persist vector store
def create_vector_store(docs, embeddings, store_name):
    persistent_directory = os.path.join(db_dir, store_name)
    if not os.path.exists(persistent_directory):
        print(f"\n--- Creating vector store {store_name} ---")
        Chroma.from_documents(
            docs, embeddings, persist_directory=persistent_directory)
        print(f"--- Finished creating vector store {store_name} ---")
    else:
        print(
            f"Vector store {store_name} already exists. No need to initialize.")

In [24]:
print("\n--- Using Hugging Face Transformers ---")
huggingface_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)
create_vector_store(docs, huggingface_embeddings, "chroma_db_huggingface")

print("Embedding demonstrations for OpenAI and Hugging Face completed.")


# Function to query a vector store


def query_vector_store(store_name, query, embedding_function):
    persistent_directory = os.path.join(db_dir, store_name)
    if os.path.exists(persistent_directory):
        print(f"\n--- Querying the Vector Store {store_name} ---")
        db = Chroma(
            persist_directory=persistent_directory,
            embedding_function=embedding_function,
        )
        retriever = db.as_retriever(
            search_type="similarity_score_threshold",
            search_kwargs={"k": 3, "score_threshold": 0.1},
        )
        relevant_docs = retriever.invoke(query)
        # Display the relevant results with metadata
        print(f"\n--- Relevant Documents for {store_name} ---")
        for i, doc in enumerate(relevant_docs, 1):
            print(f"Document {i}:\n{doc.page_content}\n")
            if doc.metadata:
                print(f"Source: {doc.metadata.get('source', 'Unknown')}\n")
    else:
        print(f"Vector store {store_name} does not exist.")


--- Using Hugging Face Transformers ---


  huggingface_embeddings = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm



--- Creating vector store chroma_db_huggingface ---
--- Finished creating vector store chroma_db_huggingface ---
Embedding demonstrations for OpenAI and Hugging Face completed.


In [26]:
# Define the user's question
query = "Who is Odysseus' wife?"

# Query each vector store
query_vector_store("chroma_db_huggingface", query, huggingface_embeddings)

print("Querying demonstrations completed.")


--- Querying the Vector Store chroma_db_huggingface ---

--- Relevant Documents for chroma_db_huggingface ---
Document 1:
Thus did he speak, and they went on board even as he had said. But
as Telemachus was thus busied, praying also and sacrificing to Minerva
in the ship's stern, there came to him a man from a distant country,
a seer, who was flying from Argos because he had killed a man. He
was descended from Melampus, who used to live in Pylos, the land of
sheep; he was rich and owned a great house, but he was driven into
exile by the great and powerful king Neleus. Neleus seized his goods
and held them for a whole year, during which he was a close prisoner
in the house of king Phylacus, and in much distress of mind both on
account of the daughter of Neleus and because he was haunted by a
great sorrow that dread Erinyes had laid upon him. In the end, however,
he escaped with his life, drove the cattle from Phylace to Pylos,
avenged the wrong that had been done him, and gave the daug