In [1]:
"""
This is the notebook for running Qwen 2.5 VL locally 
by howchih.lee.2024@mitb.smu.edu.sg

Agentic RAG with PDF upload of mixed modality for conversion to text vectors

Operating in venv with Python 3.11.9

"""

'\nThis is the notebook for running Qwen 2.5 VL locally \nby howchih.lee.2024@mitb.smu.edu.sg\n\nAgentic RAG with PDF upload of mixed modality for conversion to text vectors\n\nOperating in venv with Python 3.11.9\n\n'

## Part 1: Load Dependencies and Verify Environment properly created

In [2]:
## Verify that nomic_emebed_text:latest and qwen2.5vl:3b are installed locally under .ollama directory, otherwise follow instructions on README.md to download to environment in Terminal before proceeding.

#!ollama list

In [3]:
## Load Dependencies from requirements.txt
#!pip install -r requirements.txt


In [4]:

# When running transformers, if you encountered error on "Replicate", your transformer version is not up to date.  
# Install latest transformers from source on hugging face using this cell, then Restart and run from the top. 
# Takes over 40s to run, so please be patient.
# After Restart, do not run this cell again. 

#!pip install git+https://github.com/huggingface/transformers --upgrade --quiet

#print("✅ Transformers updated from source")
#print("🔄 Please restart your kernel after this completes")
#print("Then re-run the model loading cells except for this cell")

In [5]:
## Test environment
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoProcessor
import gc
import os
import psutil
from PIL import Image
import numpy as np
from config import *

def check_environment():
    # Check PyTorch and CUDA
    print(f"PyTorch version: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"CUDA version: {torch.version.cuda}")
        print(f"GPU device: {torch.cuda.get_device_name(0)}")
    
    # Check Transformers
    print(f"Transformers version: {transformers.__version__}")
    
    # Check if we can load a small model (not Qwen yet)
    try:
        processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
        print("✅ Successfully loaded a test model")
    except Exception as e:
        print(f"❌ Error loading test model: {e}")
    
    print("\nEnvironment setup completed.")

if __name__ == "__main__":
    check_environment()

✅ Config loaded - Base directory: C:\Users\pakke\OneDrive - Singapore Management University\CS605 Natural Language Processing for Smart Assistant\Project\RAG
✅ Data directory: C:\Users\pakke\OneDrive - Singapore Management University\CS605 Natural Language Processing for Smart Assistant\Project\RAG\data
✅ Cache directory: C:\Users\pakke\OneDrive - Singapore Management University\CS605 Natural Language Processing for Smart Assistant\Project\RAG\cache
✅ Language dictionaries: C:\Users\pakke\OneDrive - Singapore Management University\CS605 Natural Language Processing for Smart Assistant\Project\RAG\cache\lang_dict
PyTorch version: 2.7.0+cu126
CUDA available: True
CUDA version: 12.6
GPU device: NVIDIA RTX 2000 Ada Generation Laptop GPU
Transformers version: 4.52.4


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


✅ Successfully loaded a test model

Environment setup completed.


In [6]:
# Memory management and CPU optimization

def setup_cpu_optimization():
    """Optimize PyTorch for CPU-only inference"""
    
    # Set thread count for optimal CPU performance
    torch.set_num_threads(4)  # Adjust based on your CPU
    
    # Optimize for inference
    torch.set_grad_enabled(False)
    
    # Set memory allocation strategy
    torch.backends.quantized.engine = 'fbgemm'
    
    print(f"✅ CPU optimization configured")
    print(f"   Threads: {torch.get_num_threads()}")
    print(f"   Grad enabled: {torch.is_grad_enabled()}")

def clear_memory():
    """Clear memory cache"""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    print("🧹 Memory cleared")

def check_memory_usage():
    """Check current memory usage"""
    process = psutil.Process(os.getpid())
    memory_info = process.memory_info()
    memory_mb = memory_info.rss / 1024 / 1024
    
    print(f"📊 Memory Usage: {memory_mb:.1f} MB")
    return memory_mb



In [7]:
# Setup CPU optimization
## The Qwuen2.5vl:3b model should take up 340MB of memory. 
setup_cpu_optimization()
clear_memory()
check_memory_usage()

✅ CPU optimization configured
   Threads: 4
   Grad enabled: False
🧹 Memory cleared
📊 Memory Usage: 939.6 MB


939.6484375

## Part 2: Using ollama, load "qwen2.5vl:3b" from ollama list. Verify that it works with sample prompt. 


In [8]:
# Import the model manager and necessary dependencies
import sys
from pathlib import Path

# Add project root to path if needed
project_root = Path.cwd()
sys.path.insert(0, str(project_root))

# Import our model manager classes
from src.model_manager import OllamaManager, QwenMultimodalManager
from src.document_processor import DocumentProcessor
from config import *

import torch
import os
import logging
import matplotlib.pyplot as plt
from PIL import Image
import io

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Memory management - reuse your existing functions
def clear_memory():
    """Clear memory cache"""
    import gc
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    print("🧹 Memory cleared")

print("✅ Imports successful")



✅ Imports successful


In [9]:
# Update the OllamaManager with the correct model name
ollama_manager = OllamaManager(model_name="qwen2.5vl:3b")

# Check if model is available
model_available = ollama_manager.check_model_available()
print(f"✅ Model available: {model_available}")

if not model_available:
    print("⚠️ Model not available. Pulling now (this may take a while)...")
    model_pulled = ollama_manager.pull_model()
    print(f"✅ Model pulled: {model_pulled}")

# Test simple text generation with the updated model name
print("\n🔍 Testing simple text generation with model: qwen2.5vl:3b")
response = ollama_manager.generate_text(
    "What are the top 5 tourist attractions in Singapore?",
    max_tokens=100
)

if 'error' in response:
    print(f"❌ Generation failed: {response['error']}")
    
    # Try the quantized version if standard version fails
    print("\n🔍 Trying quantized version: qwen2.5vl:3b-q4_K_M")
    ollama_manager = OllamaManager(model_name="qwen2.5vl:3b-q4_K_M")
    
    model_available = ollama_manager.check_model_available()
    if not model_available:
        print("⚠️ Quantized model not available. Pulling now...")
        ollama_manager.pull_model()
    
    response = ollama_manager.generate_text(
        "What are the top 5 tourist attractions in Singapore?",
        max_tokens=150
    )
    
    if 'error' in response:
        print(f"❌ Generation with quantized model failed: {response['error']}")
    else:
        print(f"✅ Generation with quantized model successful!")
        print(f"Response: {response['response']}")
else:
    print(f"✅ Generation successful!")
    print(f"Response: {response['response']}")
    print(f"Duration: {response['total_duration'] / 1e9:.2f} seconds")

INFO:src.model_manager:OllamaManager initialized with model: qwen2.5vl:3b
INFO:src.model_manager:Model qwen2.5vl:3b is available locally


✅ Model available: True

🔍 Testing simple text generation with model: qwen2.5vl:3b
✅ Generation successful!
Response: 1. Singapore Zoo
2. Singapore Botanic Gardens
3. Singapore River
4. Singapore Flyer
5. Sentosa Island
Duration: 4.19 seconds


In [10]:
# Import our custom RAG components
from src.embedding_manager import EmbeddingManager
from src.chroma_manager import ChromaManager
from src.rag_manager import RAGManager
from src.document_processor import DocumentProcessor
from datetime import datetime

# Additional imports for visualization
import matplotlib.pyplot as plt
import json
import pandas as pd
from IPython.display import display, HTML

In [11]:
# Initialize our embedding manager with the models we verified are working
embedding_manager = EmbeddingManager(
    text_embedding_model="nomic-embed-text",  # Specialized embedding model
    vision_model="qwen2.5vl:3b"               # Vision model for image descriptions
)

# Initialize ChromaDB manager
chroma_manager = ChromaManager(
    persist_directory=VECTOR_DB_PATH,          # From config.py
    embedding_model="nomic-embed-text",        # Should match embedding_manager
    collection_name="sg_explorer_documents"         # Collection name for this project
)

print(f"✅ Components initialized")
print(f"   - Using text embedding model: nomic-embed-text")
print(f"   - Using vision model: qwen2.5vl:3b")
print(f"   - ChromaDB persistence directory: {VECTOR_DB_PATH}")

INFO:src.embedding_manager:EmbeddingManager initialized with text model: nomic-embed-text, vision model: qwen2.5vl:3b
INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
INFO:src.chroma_manager:Using existing collection: sg_explorer_documents
INFO:src.chroma_manager:ChromaManager initialized with collection: sg_explorer_documents


✅ Components initialized
   - Using text embedding model: nomic-embed-text
   - Using vision model: qwen2.5vl:3b
   - ChromaDB persistence directory: C:\Users\pakke\OneDrive - Singapore Management University\CS605 Natural Language Processing for Smart Assistant\Project\RAG\cache\vector_db


In [12]:
# Create vector embeddings, one for text, one for images, one for linking the two, for each PDF

## Takes more than 25 mins to run
## The embeddings are stored in a temp file, different for each PDF. 
## The results will be saved in the ChromaDB vector database.

## Skip this cell if you want to query a PDF that has already been processed and stored in the vector database.

text_embeddings = []
text_ids = []
text_documents = []
text_metadatas = []

# Process text content
print("\n🔄 Processing text content...")
for i, content in enumerate(text_contents):
    print(f"  Processing text item {i+1}/{len(text_contents)}: {content.content_id}")
    text = content.content_data
    content_id = content.content_id
    
    # Generate embedding
    embedding = embedding_manager.generate_text_embedding(text)
    
    if embedding:
        text_embeddings.append(embedding)
        text_ids.append(content_id)
        text_documents.append(text)
        text_metadatas.append({
            "source": mixed_doc.filename,
            "page": content.source_page,
            "content_type": "text",
            "document_type": mixed_doc.document_type.value
        })
        print(f"  ✅ Generated embedding for {content_id}")
    else:
        print(f"  ❌ Failed to generate embedding for {content_id}")

# Process image content
print("\n🔄 Processing image content...")
image_embeddings = []
image_ids = []
image_documents = []
image_metadatas = []

import tempfile
import os

for i, content in enumerate(image_contents):
    print(f"  Processing image item {i+1}/{len(image_contents)}: {content.content_id}")
    # Save image to temp file
    image = content.content_data
    content_id = content.content_id
    
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
        temp_path = temp_file.name
        image.save(temp_path)
    
    try:
        # Generate description
        description = embedding_manager.generate_image_description(temp_path)
        
        if description:
            print(f"  ✓ Generated description ({len(description)} chars)")
            
            # Generate embedding for description
            embedding = embedding_manager.generate_text_embedding(description)
            
            if embedding:
                image_embeddings.append(embedding)
                image_ids.append(content_id)
                image_documents.append(description)
                image_metadatas.append({
                    "source": mixed_doc.filename,
                    "page": content.source_page,
                    "content_type": "image_description",
                    "document_type": mixed_doc.document_type.value
                })
                print(f"  ✅ Generated embedding for {content_id}")
            else:
                print(f"  ❌ Failed to generate embedding for {content_id}")
        else:
            print(f"  ❌ Failed to generate description for {content_id}")
    finally:
        # Clean up temp file
        if os.path.exists(temp_path):
            os.unlink(temp_path)

# Summarize results
print("\n📊 Embedding generation summary:")
print(f"  - Text items: {len(text_contents)}, successfully embedded: {len(text_embeddings)}")
print(f"  - Image items: {len(image_contents)}, successfully embedded: {len(image_embeddings)}")


🔄 Processing text content...


NameError: name 'text_contents' is not defined

## Part 4: Vector DB Integration 

In [None]:
# Add text embeddings to ChromaDB.  Stores these embeddings in ChromaDB, which persists them to disk in the location specified by persist_directory (which defaults to VECTOR_DB_PATH from your config)

## Skip this cell if you want to query a PDF that has already been processed and stored in the vector database.

'''print("\n🔄 Storing text embeddings in ChromaDB...")
if text_embeddings:
    
    success = chroma_manager.add_with_embeddings(
        texts=text_documents,
        embeddings=text_embeddings,
        metadatas=text_metadatas,
        ids=text_ids
    )
    print(f"  {'✅' if success else '❌'} Stored {len(text_embeddings)} text embeddings")
else:
    print("  ⚠️ No text embeddings to store")

# Add image embeddings to ChromaDB
print("\n🔄 Storing image embeddings in ChromaDB...")
if image_embeddings:
    success = chroma_manager.add_with_embeddings(
        texts=image_documents,
        embeddings=image_embeddings,
        metadatas=image_metadatas,
        ids=image_ids
    )
    print(f"  {'✅' if success else '❌'} Stored {len(image_embeddings)} image embeddings")
else:
    print("  ⚠️ No image embeddings to store")

# Get collection stats
stats = chroma_manager.get_collection_stats()
print(f"\n📊 ChromaDB collection stats:")
for key, value in stats.items():
    print(f"  - {key}: {value}")'''


## Part 4b: Come straight here if you already have vector embeddings for the text and images from RAG. Begin your query by calling up vectordb from ChromaDB.

In [13]:
# If PDF vector embeddings already stored in Chroma DB, get stats directly about your collection

## If you want to query a PDF that has already been processed and stored in the vector database, start from here.
stats = chroma_manager.get_collection_stats()
print(f"📊 ChromaDB collection stats:")
for key, value in stats.items():
    print(f"  - {key}: {value}")

# Peek at some items in the collection
sample = chroma_manager.collection.peek(limit=30)
print(f"\n📄 Sample items:")
print(f"  - IDs: {sample['ids']}")
print(f"  - Metadata samples: {sample['metadatas'][:]}")  # Show first 3 metadata entries

INFO:src.chroma_manager:Collection stats: 42 items


📊 ChromaDB collection stats:
  - name: sg_explorer_documents
  - count: 42
  - embedding_model: nomic-embed-text
  - embedding_dimension: unknown
  - persist_directory: C:\Users\pakke\OneDrive - Singapore Management University\CS605 Natural Language Processing for Smart Assistant\Project\RAG\cache\vector_db

📄 Sample items:
  - IDs: ['singapore_explorer_guide_text_text_p1_000', 'singapore_explorer_guide_text_text_p2_000', 'singapore_explorer_guide_text_text_p3_000', 'singapore_explorer_guide_text_text_p4_000', 'singapore_explorer_guide_text_text_p5_000', 'singapore_explorer_guide_text_text_p6_000', 'singapore_explorer_guide_text_text_p7_000', 'singapore_explorer_guide_text_text_p8_000', 'singapore_explorer_guide_text_text_p9_000', 'singapore_explorer_guide_text_text_p10_000', 'singapore_explorer_guide_text_text_p11_000', 'singapore_explorer_guide_image_text_p1_000', 'singapore_explorer_guide_image_text_p2_000', 'singapore_explorer_guide_image_text_p3_000', 'singapore_explorer_guide_ima

## Part 5: Generate RAG Query for Text

In [14]:
# Generate Rag Query  
from src.rag_query import RAGQueryEngine

# Initialize the RAG query engine
rag_engine = RAGQueryEngine(
    embedding_manager=embedding_manager,
    chroma_manager=chroma_manager,
    ollama_manager=ollama_manager,
    default_results=3
)

# Now you can use it to perform queries
result = rag_engine.query("What is the purpose of this document? Please summarise in no more than 100 words.")

INFO:src.rag_query:RAGQueryEngine initialized with default_results=3
INFO:src.rag_query:Query: What is the purpose of this document? Please summarise in no more than 100 words.
INFO:src.chroma_manager:Embedding query returned 3 results
INFO:src.rag_query:Found 3 relevant chunks
INFO:src.rag_query:Result 1: Relevance: -365.1010, Source: mixed_singapore_explorer_guide_text1, Page: 5, Type: image_description
INFO:src.rag_query:Content preview: The image appears to be a promotional or informational page for a museum or educational center in Si...
INFO:src.rag_query:Result 2: Relevance: -382.9700, Source: mixed_singapore_explorer_guide_text1, Page: 1, Type: image_description
INFO:src.rag_query:Content preview: The image is a simple, clean design with a predominantly red background. At the top right corner, th...
INFO:src.rag_query:Result 3: Relevance: -383.4837, Source: mixed_singapore_explorer_guide_text1, Page: 9, Type: image_description
INFO:src.rag_query:Content preview: The image is a 

In [29]:
def extract_answer(response):
    if isinstance(response, str) and "'answer':" in response:
        return response.split("Answer:", 1)[1].strip()
    return ""  # or response, or np.nan, depending on your use case

In [40]:
import pandas as pd
import time

FILE_NAME = "test.xlsx"

# Step 1: Load your Excel file
df = pd.read_excel(FILE_NAME)  # Update with actual filename

# Step 2: Add response column by running raw_query on each Raw Query
responses = []
times = []

for prompt in df['Raw Query']:
    start = time.time()
    try:
        result = rag_engine.query(prompt)
        response = result.get('answer', '')  # Extract only the answer field
    except Exception as e:
        response = f"[ERROR] {str(e)}"
    end = time.time()
    
    responses.append(response)
    times.append(end - start)



INFO:src.rag_query:Query: We're a family of 4 with two children aged 6 and 9 visiting Singapore for 5 days. We love interactive science exhibits, nature parks, and kid-friendly activities. Can you suggest an itinerary with one rest day in the middle? Show us relevant attractions with images.
INFO:src.chroma_manager:Embedding query returned 3 results
INFO:src.rag_query:Found 3 relevant chunks
INFO:src.rag_query:Result 1: Relevance: -199.1837, Source: singapore_explorer_guide_text.pdf, Page: 4, Type: text
INFO:src.rag_query:Content preview: 1 04- A QUICK GUIDE TO SINGAPORE'S DISTRICTS /

MARINA BAY

The marquee of modern Singapore

for eve...
INFO:src.rag_query:Result 2: Relevance: -199.1837, Source: mixed_singapore_explorer_guide_text1, Page: 4, Type: text
INFO:src.rag_query:Content preview: 1 04- A QUICK GUIDE TO SINGAPORE'S DISTRICTS /

MARINA BAY

The marquee of modern Singapore

for eve...
INFO:src.rag_query:Result 3: Relevance: -213.0801, Source: singapore_explorer_guide_text.pdf, 

In [44]:
print(times)

[17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436, 17.796164989471436,

In [43]:
# Step 3: Save the results
df['Response'] = responses
df['Query Time (sec)'] = times

# Step 4: Save to a new Excel/CSV file
df.to_excel("query_responses.xlsx", index=False)

In [25]:
import time

start_time = time.time()

# Query your RAG engine
result1 = rag_engine.query(
    "You are a personal travel agent. Your client is looking for a travel itinerary for a four day trip to Singapore. "
    "Your client has a family of two adults and two children, and is interested in food and culture activities at night "
    "for the adults and scientific exploration for the children during the day. Please generate a four day itinerary for your client "
    "with half a rest day on Day 2."
)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"⏱️ Query took {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)")

INFO:src.rag_query:Query: You are a personal travel agent. Your client is looking for a travel itinerary for a four day trip to Singapore. Your client has a family of two adults and two children, and is interested in food and culture activities at night for the adults and scientific exploration for the children during the day. Please generate a four day itinerary for your client with half a rest day on Day 2.
INFO:src.chroma_manager:Embedding query returned 3 results
INFO:src.rag_query:Found 3 relevant chunks
INFO:src.rag_query:Result 1: Relevance: -219.2856, Source: singapore_explorer_guide_image.pdf, Page: 9, Type: text
INFO:src.rag_query:Content preview: ACTIVITIES TO
EXPLORE IN SINGAPORE

BOX BACKPACKERS HOSTEL

1 VESPA TOURS BY BETEL
& TOURS AND SIDEW...
INFO:src.rag_query:Result 2: Relevance: -220.2726, Source: singapore_explorer_guide_text.pdf, Page: 9, Type: text
INFO:src.rag_query:Content preview: / 09 - ACTIVITIES TO EXPLORE IN SINGAPORE /

If you need company while exploring

⏱️ Query took 18.47 seconds (0.31 minutes)


In [20]:
# takes about 2 mins to generate
result2 = rag_engine.query("You are a personal travel agent.  Your client is looking for a travel itinerary for six day trip to Singapore.  Your client is an active couple with no children, and is interested in outdoor and cultural activities during the day and good food for dinner at night followed by a trip to the bar or club.  Please generate a six day itinerary for your client with a rest day on Day 3.")

INFO:src.rag_query:Query: You are a personal travel agent.  Your client is looking for a travel itinerary for six day trip to Singapore.  Your client is an active couple with no children, and is interested in outdoor and cultural activities during the day and good food for dinner at night followed by a trip to the bar or club.  Please generate a six day itinerary for your client with a rest day on Day 3.
INFO:src.chroma_manager:Embedding query returned 3 results
INFO:src.rag_query:Found 3 relevant chunks
INFO:src.rag_query:Result 1: Relevance: -216.5031, Source: singapore_explorer_guide_image.pdf, Page: 9, Type: text
INFO:src.rag_query:Content preview: ACTIVITIES TO
EXPLORE IN SINGAPORE

BOX BACKPACKERS HOSTEL

1 VESPA TOURS BY BETEL
& TOURS AND SIDEW...
INFO:src.rag_query:Result 2: Relevance: -224.7441, Source: singapore_explorer_guide_text.pdf, Page: 9, Type: text
INFO:src.rag_query:Content preview: / 09 - ACTIVITIES TO EXPLORE IN SINGAPORE /

If you need company while exploring

ACT

In [22]:
# takes about 2 mins to generate
result4 = rag_engine.query("You are a personal travel agent.  Your client a couple, husband and wife (both 65+), are visiting Singapore for 4 days. They're interested in heritage sites, museums, and cultural districts like Chinatown and Little India. Please generate an itinerary with one relaxing day to rest in the middle.")

INFO:src.rag_query:Query: You are a personal travel agent.  Your client a couple, husband and wife (both 65+), are visiting Singapore for 4 days. They're interested in heritage sites, museums, and cultural districts like Chinatown and Little India. Please generate an itinerary with one relaxing day to rest in the middle.
INFO:src.chroma_manager:Embedding query returned 3 results
INFO:src.rag_query:Found 3 relevant chunks
INFO:src.rag_query:Result 1: Relevance: -198.7402, Source: mixed_singapore_explorer_guide_text1, Page: 2, Type: text
INFO:src.rag_query:Content preview: 1/02 - CONTENTS & A QUICK GUIDE TO SINGAPORE'S DISTRICTS /

A QUICK GUIDE

ORCHARD

Don't just sight...
INFO:src.rag_query:Result 2: Relevance: -202.4837, Source: singapore_explorer_guide_text.pdf, Page: 2, Type: text
INFO:src.rag_query:Content preview: / 02 - CONTENTS & A QUICK GUIDE TO SINGAPORE'S DISTRICTS /

A QUICK GUIDE

ORCHARD

Don't just sight...
INFO:src.rag_query:Result 3: Relevance: -203.8227, Source: singa

## Part 6: Generate RAG Query for Image

In [None]:
# We need both the Text Query and the Image Query Engines

# Create an actual instance of DocumentProcessor
from src.document_processor import DocumentProcessor
document_processor = DocumentProcessor()  # This creates an instance

import importlib
import src.rag_query
from src.rag_query import RAGQueryEngine

# Initialize the Text Query Engine with the document_processor instance
rag_engine = RAGQueryEngine(
    embedding_manager=embedding_manager,
    chroma_manager=chroma_manager,
    ollama_manager=ollama_manager,
    document_processor=document_processor,  # Pass the actual instance
    default_results=3
)

In [None]:
# Import the ImageQueryHelper, instantiate an instance of the class

from src.image_query import ImageQueryHelper

# Initialize the image query helper
image_helper = ImageQueryHelper(
    rag_engine=rag_engine,
    chroma_manager=chroma_manager,
    embedding_manager=embedding_manager
)

In [None]:
# Import the ImageResultsManager,  instantiate an instance of the class

from src.image_results_manager import ImageResultsManager

# Now create the manager with updated code
image_results_manager = ImageResultsManager()


In [None]:
# Query ChromaDB directly with page filter
results = chroma_manager.query(
    query_text="tourist attractions",
    n_results=3,
    where={"page": 3}  # Filter to page 3 only
)
# Then format for display

In [None]:
# Create a dictionary to store image query results
## This is an image query that returns and displays images 
## May take more than 12 mins to run

image_result_dict = {}

# Example 1: Find general images for photography
print("\n=== Finding Photographer images related to Singapore Tourism ===")
image_result1 = image_helper.find_images_by_query("Four photography hobbyists want the most photogenic locations - architecture, nature, street scenes, and cultural sites. Show the top three images of scenic spots and sites.", n_results=3)
image_helper.display_image_results(image_result1)  
image_result_dict['singapore_photography_sights_query'] = image_result1

# Example 2: Find nature and garden images 
print("\n=== Finding nature and garden images ===")
image_result2 = image_helper.find_images_by_query("A couple passionate about botany and wildlife want to explore Singapore Botanic Garden, nature reserves, bird watching spots, and conservation areas. Show three images of nature attractions and gardens", n_results=3)
image_helper.display_image_results(image_result2)  
image_result_dict['singapore_nature_gardens_query'] = image_result2

# Example 3: Find by art and creative attractions 
print("\n=== Finding art and creative attractions ===")
image_result3 = image_helper.find_images_by_query("Three art teachers visiting Singapore are interested in National Gallery, contemporary art spaces, creative districts, and hands-on art experiences. Show three images of art venues and creative attractions.", n_results=3)
image_helper.display_image_results(image_result3)  
image_result_dict['singapore_art_creative_query'] = image_result3


In [None]:
print(len(image_result_dict))

In [None]:
# See all source files in your collection
for result in image_result1['results']:
    print(f"Stored as: {result['source_file']}")

In [None]:

# Display summary of all results
print("\n" + "="*100)
print("=== COMPREHENSIVE RESULTS SUMMARY ===")
print("="*100)
image_helper.display_image_result_dict(image_result_dict)

# Add results to manager and save - SIMPLE VERSION
print("\n=== SAVING RESULTS ===")
image_results_manager.add_results(image_result_dict)

# Save results to a file
output_file = image_results_manager.save_results("singapore_explorer_image2_QAP.json")
print(f"Results saved to: {output_file}")

# Optional: Show summary
summary = image_results_manager.get_results_summary()
print(f"Summary: {summary}")

# Optional: List all saved files
saved_files = image_results_manager.list_saved_files()
print(f"Available files: {saved_files}")

## Part 7: generate text answers to a text query of image.  It returns text descriptions of the images being queried, but no image.

In [None]:
image_results = image_helper.find_images_by_query("Gardens by the Bay", n_results=3)
print("Results found:")
for i, result in enumerate(image_results['results']):
    print(f"  {i+1}. Source: {result['source_file']}, Page: {result['page']}")
    print(f"     Description: {result['description'][:200]}...")

In [None]:
## There is room for improvement through prompt engineering.  General enquiries where the OCR has not read text similar to prompt will return very generic responses. Specific queries can return more detailed responses. 

## Page numbers where images are being queried cannot be specified because image search stores vector embeddings of image without page numbers. 

## Several images on one page can only be "parsed" manually by hand if User only wants to return that image and not the whole page.

In [None]:
whos