In [23]:
import ollama
import redis
import numpy as np
import os
import time
import psutil
from redis.commands.search.query import Query
from transformers import AutoModel

# Initialize Redis connection
redis_client = redis.Redis(host="localhost", port=6379, db=0)

# set constants
VECTOR_DIM = 768
INDEX_NAME = "embedding_index"
DOC_PREFIX = "doc:"
DISTANCE_METRIC = "COSINE"
TEXT_FOLDER = "processed_texts"  
selected_model = None
jina_model = None
selected_llm_model = None

# Function to get the current memory usage of Redis in MB
def get_redis_memory_usage():
    memory_info = redis_client.info("memory")
    return memory_info.get("used_memory", 0) / (1024 * 1024)  # Convert bytes to MB

# Function to get the current memory usage of the system in MB
def get_system_memory_usage():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / (1024 * 1024)  # Convert bytes to MB

# Clear redis database if reindexing
def create_hnsw_index():
    try:
        redis_client.execute_command(f"FT.DROPINDEX {INDEX_NAME} DD")
    except redis.exceptions.ResponseError:
        pass
    
    redis_client.execute_command(
        f"""
        FT.CREATE {INDEX_NAME} ON HASH PREFIX 1 {DOC_PREFIX}
        SCHEMA text TEXT
        embedding VECTOR HNSW 6 DIM {VECTOR_DIM} TYPE FLOAT32 DISTANCE_METRIC {DISTANCE_METRIC}
        """
    )
    print("Index created successfully.")

# Generate embedding for the text
def get_embedding(text: str) -> list:
    """
    Generate an embedding for the given text using the selected embedding model.

    Parameters:
    text (str): The input text to be embedded.

    Returns:
    list: A list of floats representing the embedding vector for the input text.
    """
    try:
        if EMBEDDING_MODEL == "jina-embeddings-v2-base-en":
            return jina_model.encode([text])[0].tolist()
        else:
            response = ollama.embeddings(model=EMBEDDING_MODEL, prompt=text)
            return response["embedding"]
    except Exception as e:
        print(f"Error generating embedding for text: {e}")
        return None  # Return None if there's an error

# Store embedding in Redis
def store_embedding(doc_id: str, text: str, embedding: list):
    if embedding is not None:  # Only store if embedding is valid
        key = f"{DOC_PREFIX}{doc_id}"
        redis_client.hset(
            key,
            mapping={
                "text": text,
                "embedding": np.array(embedding, dtype=np.float32).tobytes(),  # Store as byte array
            },
        )
        print(f"Stored embedding for: {doc_id}")
    else:
        print(f"Skipping file {doc_id} due to embedding generation error.")

# Process all text files and generate/store embeddings
def process_text_files():
    """
    This function processes all text files in the specified folder, reads their content,
    generates embeddings for the text using the selected embedding model, and stores the
    embeddings along with the text content in Redis.
    """
    if not os.path.exists(TEXT_FOLDER):
        print(f"Folder '{TEXT_FOLDER}' not found.")
        return

    text_files = [f for f in os.listdir(TEXT_FOLDER) if f.endswith(".txt")]
    if not text_files:
        print("No text files found.")
        return

    # Record start time for embedding process
    start_embedding_time = time.time()
    
    # Get initial Redis memory usage
    initial_memory = get_redis_memory_usage()

    for filename in text_files:
        filepath = os.path.join(TEXT_FOLDER, filename)
        try:
            with open(filepath, "r", encoding="utf-8") as file:
                text = file.read()
                embedding = get_embedding(text)
                if embedding is not None:
                    store_embedding(filename, text, embedding)
                else:
                    print(f"Skipping {filename} due to embedding error.")
        except Exception as e:
            print(f"Error processing file {filename}: {e}")
            continue  # Skip this file and continue with the next one

    # Record end time for embedding process
    end_embedding_time = time.time()

    # Calculate total embedding time
    embedding_time = end_embedding_time - start_embedding_time
    print(f"\nTotal embedding time: {embedding_time:.2f} seconds")
    
    # Get final Redis memory usage
    final_memory = get_redis_memory_usage()

    # Calculate memory usage in MB
    memory_used = final_memory - initial_memory
    print(f"Memory used for embeddings: {memory_used:.2f} MB")

# Perform KNN search and track memory usage during LLM query
def perform_knn_search(query_text: str, k: int = 2):
    """
    Perform a K-Nearest Neighbors (KNN) search on the Redis index using the given query text.

    Parameters:
    query_text (str): The text query to search for in the index.
    k (int, optional): The number of nearest neighbors to retrieve. Defaults to 2.
    """
    # Get initial system memory before performing the LLM query
    initial_system_memory = get_system_memory_usage()

    # Start timer for LLM query execution
    start_query_time = time.time()

    # Perform the KNN search
    embedding = get_embedding(query_text)
    q = (
        Query(f"*=>[KNN {k} @embedding $vec AS vector_distance]")
        .sort_by("vector_distance")
        .return_fields("text", "vector_distance")
        .dialect(2)
    )
    res = redis_client.ft(INDEX_NAME).search(
        q, query_params={"vec": np.array(embedding, dtype=np.float32).tobytes()}
    )
    matching_chunks = [doc.text for doc in res.docs]
    
    if not matching_chunks:
        print("No relevant matches found.")
        return
    
    print(f"\nTop {len(matching_chunks)} matching chunks retrieved:")
    for i, chunk in enumerate(matching_chunks):
        print(f"\nChunk {i+1}: {chunk[:300]}...")  # Display first 300 characters

    # Get response from the LLM
    response = query_llm(query_text, matching_chunks)
    print(f"\nResponse from {LLM_MODEL}:\n{response}\n")

    # Stop timer for LLM query execution
    end_query_time = time.time()

    # Calculate time taken for LLM query
    query_time = end_query_time - start_query_time
    print(f"LLM query execution time: {query_time:.2f} seconds")  # This should be printed only once

    # Get final system memory after performing the LLM query
    final_system_memory = get_system_memory_usage()

    # Calculate memory usage for the LLM query in MB
    memory_used_for_query = final_system_memory - initial_system_memory
    print(f"Memory used for LLM query: {memory_used_for_query:.2f} MB")


# Prompt user to select an embedding model
embedding_models = {
    "1": "nomic-embed-text",
    "2": "jina-embeddings-v2-base-en",
    "3": "granite-embedding:278m",
}

print("Select an embedding model:")
for key, model in embedding_models.items():
    print(f"{key}: {model}")

while selected_model not in embedding_models:
    selected_model = input("Enter the number corresponding to your choice: ")

EMBEDDING_MODEL = embedding_models[selected_model]

# If Jina embeddings are selected, load the model
if EMBEDDING_MODEL == "jina-embeddings-v2-base-en":
    jina_model = AutoModel.from_pretrained("jinaai/jina-embeddings-v2-base-en", trust_remote_code=True)

# Prompt user to select an LLM model
llm_models = {
    "1": "llama3.2:latest",
    "2": "mistral",
}

print("Select an LLM model:")
for key, model in llm_models.items():
    print(f"{key}: {model}")

while selected_llm_model not in llm_models:
    selected_llm_model = input("Enter the number corresponding to your choice: ")

LLM_MODEL = llm_models[selected_llm_model]
print(f"Using LLM model: {LLM_MODEL}")

if __name__ == "__main__":
    create_hnsw_index()
    process_text_files()
    query = input("What question do you want to ask? ")
    perform_knn_search(query)


Select an embedding model:
1: nomic-embed-text
2: jina-embeddings-v2-base-en
3: granite-embedding:278m


Enter the number corresponding to your choice:  3


Select an LLM model:
1: llama3.2:latest
2: mistral


Enter the number corresponding to your choice:  2


Using LLM model: mistral
Index created successfully.
Stored embedding for: AWS Intro_chunk0.txt
Stored embedding for: AWS Intro_chunk1.txt
Stored embedding for: AWS Intro_chunk2.txt
Stored embedding for: AWS Intro_chunk3.txt
Stored embedding for: AWS Intro_chunk4.txt
Stored embedding for: AWS Intro_chunk5.txt
Stored embedding for: B+Tree Walkthrough_chunk0.txt
Stored embedding for: B+Tree Walkthrough_chunk1.txt
Stored embedding for: B+Tree Walkthrough_chunk2.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk0.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk1.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk10.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk11.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk12.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk13.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk14.

What question do you want to ask?  what is an AVL tree?



Top 2 matching chunks retrieved:

Chunk 1: tree works. This would be the “ tree”, or a tree of order 3. Figure 12.6.2: An example of building a tree Next, let’s see how to search. Figure 12.6.3: An example of searching a tree Finally, let’s see an example of deleting from the tree B+ B+ 2 −3+ B+ 1 / 28 << < > >> Example 2-3+ Tree Visualizati...

Chunk 2: on either side of the subtree pointer. (This generalizes the BST invariant.) 5. The root has at least two children if it is not a leaf. For example, the following is an order-5 B-tree (m=5) where the leaves have enough space to store up to 3 data records: Because the height of the tree is uniformly ...

LLM query time: 48.67 seconds

Response from mistral:
 An AVL tree (Adelson-Velsky and Landis tree) is a self-balancing binary search tree, which was invented to solve the problem of maintaining the height balance in a Binary Search Tree (BST). It's named after Georgy Adelson-Marsálík (George Adelson-Velsky in English) and Evgeniy Land

In [39]:
import chromadb
import ollama
import os
import time
import psutil
import numpy as np
from transformers import AutoModel

# Initialize ChromaDB connection
chroma_client = chromadb.HttpClient(host="localhost", port=8000)

# Set constants
COLLECTION_NAME = "ds4300-rag"
TEXT_FOLDER = "processed_texts"  
selected_model = None
jina_model = None
selected_llm_model = None

# Function to get the current memory usage of the system in MB
def get_system_memory_usage():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / (1024 * 1024)  # Convert bytes to MB

# Ensure collection exists and clear it at the start of each run
def get_or_create_collection():
    try:
        chroma_client.delete_collection(COLLECTION_NAME)  # Clear existing data
    except Exception as e:
        print(f"Error clearing collection: {e}")
    return chroma_client.create_collection(COLLECTION_NAME)

collection = get_or_create_collection()

def get_embedding(text: str) -> list:
    """
    Generate an embedding for the given text using the selected embedding model.
    """
    try:
        if EMBEDDING_MODEL == "jina-embeddings-v2-base-en":
            return jina_model.encode([text])[0].tolist()
        else:
            response = ollama.embeddings(model=EMBEDDING_MODEL, prompt=text)
            return response["embedding"]
    except Exception as e:
        print(f"Error generating embedding for text: {e}")
        return None  # Return None if there's an error, and skip storing the embedding

def store_embedding(doc_id: str, text: str, embedding: list):
    """
    Store the document and its embedding in ChromaDB.
    """
    if embedding is not None:  # Only store if embedding is valid
        collection.add(ids=[doc_id], embeddings=[embedding], documents=[text])
        print(f"Stored embedding for: {doc_id}")
    else:
        print(f"Skipping file {doc_id} due to embedding generation error.")

def process_text_files():
    """
    Reads text files, generates embeddings, and stores them in ChromaDB.
    """
    if not os.path.exists(TEXT_FOLDER):
        print(f"Folder '{TEXT_FOLDER}' not found.")
        return

    text_files = [f for f in os.listdir(TEXT_FOLDER) if f.endswith(".txt")]
    if not text_files:
        print("No text files found.")
        return

    # Record start time for embedding process
    start_embedding_time = time.time()

    # Get initial system memory usage
    initial_system_memory = get_system_memory_usage()

    for filename in text_files:
        filepath = os.path.join(TEXT_FOLDER, filename)
        try:
            with open(filepath, "r", encoding="utf-8") as file:
                text = file.read()
                embedding = get_embedding(text)
                store_embedding(filename, text, embedding)
        except Exception as e:
            print(f"Error processing file {filename}: {e}")
            continue  # Skip this file and continue with the next one

    # Record end time for embedding process
    end_embedding_time = time.time()

    # Calculate total embedding time
    embedding_time = end_embedding_time - start_embedding_time
    print(f"\nTotal embedding time: {embedding_time:.2f} seconds")
    
    # Get final system memory usage
    final_system_memory = get_system_memory_usage()

    # Calculate memory usage in MB
    memory_used_for_embeddings = final_system_memory - initial_system_memory
    print(f"Memory used for embeddings: {memory_used_for_embeddings:.2f} MB")

def query_llm(query: str, matching_chunks: list) -> str:
    """
    Query the LLM with a given question and relevant context.
    """
    context = "\n\n".join([f"Chunk {i+1}: {chunk}" for i, chunk in enumerate(matching_chunks)])
    prompt_to_send = (
        f"User's Question: {query}\n\n"
        f"Relevant Context:\n{context}\n\n"
        "Your task: Answer the user's question as clearly as possible."
    )
    response = ollama.chat(
        model=LLM_MODEL,
        messages=[{"role": "system", "content": "You are an AI assistant with expertise in computer science."},
                  {"role": "user", "content": prompt_to_send}]
    )
    return response["message"]["content"]

def perform_knn_search(query_text: str, k: int = 2):
    """
    Perform a KNN similarity search in ChromaDB.
    """
    # Get initial system memory before performing the LLM query
    initial_system_memory = get_system_memory_usage()

    # Start timer for KNN search and LLM query execution
    start_query_time = time.time()

    embedding = get_embedding(query_text)
    if embedding is None:
        print("Embedding generation failed, cannot perform search.")
        return

    results = collection.query(query_embeddings=[embedding], n_results=k)

    if not results['documents'][0]:
        print("No relevant matches found.")
        return

    matching_chunks = results['documents'][0]
    print(f"\nTop {len(matching_chunks)} matching chunks retrieved:")
    for i, chunk in enumerate(matching_chunks):
        print(f"\nChunk {i+1}: {chunk[:300]}...")

    response = query_llm(query_text, matching_chunks)
    print(f"\nResponse from {LLM_MODEL}:\n{response}\n")

    # Record end time for the LLM query execution
    end_query_time = time.time()

    # Calculate time taken for KNN search and LLM query
    query_time = end_query_time - start_query_time
    print(f"LLM query execution time: {query_time:.2f} seconds")

    # Get final system memory after performing the LLM query
    final_system_memory = get_system_memory_usage()

    # Calculate memory usage for the LLM query in MB
    memory_used_for_query = final_system_memory - initial_system_memory
    print(f"Memory used for LLM query: {memory_used_for_query:.2f} MB")


# Prompt user to select an embedding model
embedding_models = {
    "1": "nomic-embed-text",
    "2": "jina-embeddings-v2-base-en",
    "3": "granite-embedding:278m",
}

print("Select an embedding model:")
for key, model in embedding_models.items():
    print(f"{key}: {model}")

while selected_model not in embedding_models:
    selected_model = input("Enter the number corresponding to your choice: ")

EMBEDDING_MODEL = embedding_models[selected_model]

# If Jina embeddings are selected, load the model
if EMBEDDING_MODEL == "jina-embeddings-v2-base-en":
    jina_model = AutoModel.from_pretrained("jinaai/jina-embeddings-v2-base-en", trust_remote_code=True)

# Prompt user to select an LLM model
llm_models = {
    "1": "llama3.2:latest",
    "2": "mistral",
}

print("Select an LLM model:")
for key, model in llm_models.items():
    print(f"{key}: {model}")

while selected_llm_model not in llm_models:
    selected_llm_model = input("Enter the number corresponding to your choice: ")

LLM_MODEL = llm_models[selected_llm_model]
print(f"Using LLM model: {LLM_MODEL}")

if __name__ == "__main__":
    # process text files loads the parsed notes into the database
    process_text_files()
    query = input("What question do you want to ask? ")
    # actually performs the semantic search and queries the LLM
    perform_knn_search(query)



Select an embedding model:
1: nomic-embed-text
2: jina-embeddings-v2-base-en
3: granite-embedding:278m


Enter the number corresponding to your choice:  1


Select an LLM model:
1: llama3.2:latest
2: mistral


Enter the number corresponding to your choice:  2


Using LLM model: mistral
Stored embedding for: AWS Intro_chunk0.txt
Stored embedding for: AWS Intro_chunk1.txt
Stored embedding for: AWS Intro_chunk2.txt
Stored embedding for: AWS Intro_chunk3.txt
Stored embedding for: AWS Intro_chunk4.txt
Stored embedding for: AWS Intro_chunk5.txt
Stored embedding for: B+Tree Walkthrough_chunk0.txt
Stored embedding for: B+Tree Walkthrough_chunk1.txt
Stored embedding for: B+Tree Walkthrough_chunk2.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk0.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk1.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk10.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk11.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk12.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk13.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk14.txt
Stored embedding for: B-

What question do you want to ask?  what is an AVL tree?



Top 2 matching chunks retrieved:

Chunk 1: 12.6. B-Trees 12.6.1. B-Trees This module presents the B-tree. B-trees are usually attributed to R. Bayer and E. McCreight who described the B-tree in a 1972 paper. By 1979, B-trees had replaced virtually all large-file access methods other than hashing. B-trees, or some variant of B-trees, are the ...

Chunk 2: Tree Visualization: Insert into a tree of degree 4 B+ 3/17/25, 9:22 AM 12.6. B-Trees — CS3 Data Structures & Algorithms https://opendsa-server.cs.vt.edu/ODSA/Books/CS3/html/BTree.html#id2 5/9 Here is an exercise to see if you get the basic idea of tree insertion. To delete record from the tree, firs...

Response from mistral:
 An AVL tree is a self-balancing binary search tree, named after its inventors Adel'son-Vel'skii and Landis. It maintains the balance of the tree by adjusting the height of the tree during insertions and deletions to ensure that the difference in heights between subtrees at any given node is no more than one. Thi

In [46]:
import chromadb
import ollama
import os
import time
import psutil
import numpy as np
from transformers import AutoModel

# Initialize ChromaDB connection
chroma_client = chromadb.HttpClient(host="localhost", port=8000)

# Set constants
COLLECTION_NAME = "ds4300-rag"
TEXT_FOLDER = "processed_texts"  
selected_model = None
jina_model = None
selected_llm_model = None

# Function to get the current memory usage of the system in MB
def get_system_memory_usage():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss / (1024 * 1024)  # Convert bytes to MB

def get_stable_memory_usage():
    """
    Ensures that memory readings are stable by taking multiple samples
    and ensuring no large fluctuations occur before accepting the reading.
    """
    initial_memory = get_system_memory_usage()
    time.sleep(0.1)  # Delay for a short time to allow system processes to stabilize
    stable_memory = get_system_memory_usage()
    
    # If the memory fluctuates within 5% of the first reading, we take it as stable
    while abs(stable_memory - initial_memory) > 0.05 * initial_memory:
        time.sleep(0.1)  # Wait a bit before trying again
        stable_memory = get_system_memory_usage()

    return stable_memory

# Ensure collection exists and clear it at the start of each run
def get_or_create_collection():
    try:
        chroma_client.delete_collection(COLLECTION_NAME)  # Clear existing data
    except Exception as e:
        print(f"Error clearing collection: {e}")
    return chroma_client.create_collection(COLLECTION_NAME)

collection = get_or_create_collection()

def get_embedding(text: str) -> list:
    """
    Generate an embedding for the given text using the selected embedding model.
    """
    try:
        if EMBEDDING_MODEL == "jina-embeddings-v2-base-en":
            return jina_model.encode([text])[0].tolist()
        else:
            response = ollama.embeddings(model=EMBEDDING_MODEL, prompt=text)
            return response["embedding"]
    except Exception as e:
        print(f"Error generating embedding for text: {e}")
        return None  # Return None if there's an error, and skip storing the embedding

def store_embedding(doc_id: str, text: str, embedding: list):
    """
    Store the document and its embedding in ChromaDB.
    """
    if embedding is not None:  # Only store if embedding is valid
        collection.add(ids=[doc_id], embeddings=[embedding], documents=[text])
        print(f"Stored embedding for: {doc_id}")
    else:
        print(f"Skipping file {doc_id} due to embedding generation error.")

def process_text_files():
    """
    Reads text files, generates embeddings, and stores them in ChromaDB.
    """
    if not os.path.exists(TEXT_FOLDER):
        print(f"Folder '{TEXT_FOLDER}' not found.")
        return

    text_files = [f for f in os.listdir(TEXT_FOLDER) if f.endswith(".txt")]
    if not text_files:
        print("No text files found.")
        return

    # Record start time for embedding process
    start_embedding_time = time.time()

    # Get initial system memory usage
    initial_system_memory = get_stable_memory_usage()

    for filename in text_files:
        filepath = os.path.join(TEXT_FOLDER, filename)
        try:
            with open(filepath, "r", encoding="utf-8") as file:
                text = file.read()
                embedding = get_embedding(text)
                store_embedding(filename, text, embedding)
        except Exception as e:
            print(f"Error processing file {filename}: {e}")
            continue  # Skip this file and continue with the next one

    # Record end time for embedding process
    end_embedding_time = time.time()

    # Calculate total embedding time
    embedding_time = end_embedding_time - start_embedding_time
    print(f"\nTotal embedding time: {embedding_time:.2f} seconds")
    
    # Get final system memory usage
    final_system_memory = get_stable_memory_usage()

    # Calculate memory usage in MB
    memory_used_for_embeddings = final_system_memory - initial_system_memory
    print(f"Memory used for embeddings: {memory_used_for_embeddings:.2f} MB")

def query_llm(query: str, matching_chunks: list) -> str:
    """
    Query the LLM with a given question and relevant context.
    """
    context = "\n\n".join([f"Chunk {i+1}: {chunk}" for i, chunk in enumerate(matching_chunks)])
    prompt_to_send = (
        f"User's Question: {query}\n\n"
        f"Relevant Context:\n{context}\n\n"
        "Your task: Answer the user's question as clearly as possible."
    )
    response = ollama.chat(
        model=LLM_MODEL,
        messages=[{"role": "system", "content": "You are an AI assistant with expertise in computer science."},
                  {"role": "user", "content": prompt_to_send}]
    )
    return response["message"]["content"]

def perform_knn_search(query_text: str, k: int = 2):
    """
    Perform a KNN similarity search in ChromaDB.
    """
    # Get initial system memory before performing the LLM query
    initial_system_memory = get_stable_memory_usage()

    # Start timer for KNN search and LLM query execution
    start_query_time = time.time()

    embedding = get_embedding(query_text)
    if embedding is None:
        print("Embedding generation failed, cannot perform search.")
        return

    results = collection.query(query_embeddings=[embedding], n_results=k)

    if not results['documents'][0]:
        print("No relevant matches found.")
        return

    matching_chunks = results['documents'][0]
    print(f"\nTop {len(matching_chunks)} matching chunks retrieved:")
    for i, chunk in enumerate(matching_chunks):
        print(f"\nChunk {i+1}: {chunk[:300]}...")

    response = query_llm(query_text, matching_chunks)
    print(f"\nResponse from {LLM_MODEL}:\n{response}\n")

    # Record end time for the LLM query execution
    end_query_time = time.time()

    # Calculate time taken for KNN search and LLM query
    query_time = end_query_time - start_query_time
    print(f"LLM query execution time: {query_time:.2f} seconds")

    # Get final system memory after performing the LLM query
    final_system_memory = get_stable_memory_usage()

    # Calculate memory usage for the LLM query in MB
    memory_used_for_query = final_system_memory - initial_system_memory
    print(f"Memory used for LLM query: {memory_used_for_query:.2f} MB")


# Prompt user to select an embedding model
embedding_models = {
    "1": "nomic-embed-text",
    "2": "jina-embeddings-v2-base-en",
    "3": "granite-embedding:278m",
}

print("Select an embedding model:")
for key, model in embedding_models.items():
    print(f"{key}: {model}")

while selected_model not in embedding_models:
    selected_model = input("Enter the number corresponding to your choice: ")

EMBEDDING_MODEL = embedding_models[selected_model]

# If Jina embeddings are selected, load the model
if EMBEDDING_MODEL == "jina-embeddings-v2-base-en":
    jina_model = AutoModel.from_pretrained("jinaai/jina-embeddings-v2-base-en", trust_remote_code=True)

# Prompt user to select an LLM model
llm_models = {
    "1": "llama3.2:latest",
    "2": "mistral",
}

print("Select an LLM model:")
for key, model in llm_models.items():
    print(f"{key}: {model}")

while selected_llm_model not in llm_models:
    selected_llm_model = input("Enter the number corresponding to your choice: ")

LLM_MODEL = llm_models[selected_llm_model]
print(f"Using LLM model: {LLM_MODEL}")

if __name__ == "__main__":
    # process text files loads the parsed notes into the database
    process_text_files()
    query = input("What question do you want to ask? ")
    # actually performs the semantic search and queries the LLM
    perform_knn_search(query)


Select an embedding model:
1: nomic-embed-text
2: jina-embeddings-v2-base-en
3: granite-embedding:278m


Enter the number corresponding to your choice:  3


Select an LLM model:
1: llama3.2:latest
2: mistral


Enter the number corresponding to your choice:  2


Using LLM model: mistral
Stored embedding for: AWS Intro_chunk0.txt
Stored embedding for: AWS Intro_chunk1.txt
Stored embedding for: AWS Intro_chunk2.txt
Stored embedding for: AWS Intro_chunk3.txt
Stored embedding for: AWS Intro_chunk4.txt
Stored embedding for: AWS Intro_chunk5.txt
Stored embedding for: B+Tree Walkthrough_chunk0.txt
Stored embedding for: B+Tree Walkthrough_chunk1.txt
Stored embedding for: B+Tree Walkthrough_chunk2.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk0.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk1.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk10.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk11.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk12.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk13.txt
Stored embedding for: B-Trees — CS3 Data Structures & Algorithms_chunk14.txt
Stored embedding for: B-

What question do you want to ask?  What is an AVL tree?



Top 2 matching chunks retrieved:

Chunk 1: tree works. This would be the “ tree”, or a tree of order 3. Figure 12.6.2: An example of building a tree Next, let’s see how to search. Figure 12.6.3: An example of searching a tree Finally, let’s see an example of deleting from the tree B+ B+ 2 −3+ B+ 1 / 28 << < > >> Example 2-3+ Tree Visualizati...

Chunk 2: on either side of the subtree pointer. (This generalizes the BST invariant.) 5. The root has at least two children if it is not a leaf. For example, the following is an order-5 B-tree (m=5) where the leaves have enough space to store up to 3 data records: Because the height of the tree is uniformly ...

Response from mistral:
 An AVL tree is a self-balancing binary search tree, which means it maintains its height and balance during insertions, deletions, and lookups. The name "AVL" stands for Adelson-Velsky and Landis, who described the data structure in 1962.

The main feature of an AVL tree is that it uses a balance factor for each 