In [6]:
import os
import json
import asyncio
import uuid
from pathlib import Path
from datetime import datetime

import nats
from nats.js.api import StreamConfig, RetentionPolicy, DiscardPolicy
from dotenv import load_dotenv
from IPython.display import display, JSON

# Load environment variables from .env file
load_dotenv(os.path.join("keys", ".env"))

# NATS connection settings
NAT_URL = os.getenv("NAT_URL", "nats://localhost:4222")

# Get mode-specific stream and subject settings - use the new separated streams
INPUT_STREAM_TAGGER = os.getenv("INPUT_STREAM_TAGGER", "IMAGE-TASKS-TAGGER")
INPUT_SUBJECT_TAGGER = os.getenv("INPUT_SUBJECT_TAGGER", "tagger.tasks.started.>")

INPUT_STREAM_CAPTIONING = os.getenv("INPUT_STREAM_CAPTIONING", "IMAGE-TASKS-CAPTIONING")
INPUT_SUBJECT_CAPTIONING = os.getenv("INPUT_SUBJECT_CAPTIONING", "caption.tasks.started.>") 

LOCAL_ENV = os.getenv("LOCAL_ENV", "1")

# Display current configuration
print("NATS Image Producer Configuration:")
print(f"NATS URL: {NAT_URL}")
print(f"Tagging Stream: {INPUT_STREAM_TAGGER}")
print(f"Tagging Subject: {INPUT_SUBJECT_TAGGER}")
print(f"Captioning Stream: {INPUT_STREAM_CAPTIONING}")
print(f"Captioning Subject: {INPUT_SUBJECT_CAPTIONING}")
print(f"Local Environment: {LOCAL_ENV}")

# Input directory for images
INPUT_DIR = "images_local"
os.makedirs(INPUT_DIR, exist_ok=True)

NATS Image Consumer Configuration:
NATS URL: nats://localhost:4222
Tagging Stream: IMAGE-RESULTS-TAGGER
Tagging Subject: tagger.results.completed.>
Captioning Stream: IMAGE-RESULTS-CAPTIONING
Captioning Subject: caption.results.completed.>
Local Environment: 1
Output Directory: output_images
  - Tagging Results: output_images/tagging
  - Captioning Results: output_images/captioning


In [2]:
def analyze_image_results(data, mode="auto"):
    """
    Analyze image processing results from a single result

    Args:
        data: The result data to analyze
        mode: Processing mode - "tagging", "captioning", or "auto" (detect)
    """
    if not data:
        print("No data to analyze")
        return

    # Extract the data section from the result
    documents = data.get("data", [])

    if not documents:
        print("No documents found in result")
        return

    # Detect mode if set to auto
    if mode == "auto":
        # Check the first document's content format to determine the mode
        if documents and "source" in documents[0]:
            content = documents[0]["source"].get("content", [])
            # If content is a list of dictionaries with 'label' and 'confidence', it's tagging
            if isinstance(content, list) and content and isinstance(content, list):
                if content and isinstance(content[0], dict) and "label" in content[0] and "confidence" in content[0]:
                    mode = "tagging"
                # If content is a list of strings, it's captioning
                elif content and isinstance(content[0], str):
                    mode = "captioning"
                else:
                    mode = "unknown"

    print(f"Analyzing results in {mode.upper()} mode")

    # Analyze based on detected or specified mode
    if mode == "tagging":
        analyze_tagging_results(documents)
    elif mode == "captioning":
        analyze_captioning_results(documents)
    else:
        print(f"Unknown result format - cannot analyze")


def analyze_tagging_results(documents):
    """Analyze image tagging results"""
    total_documents = len(documents)
    total_tags = 0
    tags_by_confidence = {}

    for document in documents:
        # Extract source information for display
        source_info = document.get("source", {})
        file_name = source_info.get("file_name", "unknown")

        print(f"\nDocument: {document.get('id', 'unknown')}")
        print(f"File: {file_name}")

        # Extract tags/labels
        tags = source_info.get("content", [])
        total_tags += len(tags)

        # Display tags with confidence
        if tags:
            print("\nTags:")
            for tag in tags:
                label = tag.get("label", "unknown")
                confidence = tag.get("confidence", 0)

                # Group tags by confidence level
                confidence_level = round(confidence * 10) / 10  # Round to 1 decimal
                if confidence_level in tags_by_confidence:
                    tags_by_confidence[confidence_level].append(label)
                else:
                    tags_by_confidence[confidence_level] = [label]

                print(f"  - {label}: {confidence:.4f}")
        else:
            print("No tags found for this document")

    print(f"\nSummary:")
    print(f"Total documents analyzed: {total_documents}")
    print(f"Total tags found: {total_tags}")
    print("\nTags by confidence level:")
    for confidence, tags in sorted(tags_by_confidence.items(), key=lambda x: x[0], reverse=True):
        print(f"  Confidence {confidence:.1f}: {len(tags)} tags")
        if confidence >= 0.5:  # Only show high confidence tags in summary
            tag_list = ", ".join(tags)
            print(f"    Tags: {tag_list}")


def analyze_captioning_results(documents):
    """Analyze image captioning results"""
    total_documents = len(documents)

    print(f"\nCAPTIONING RESULTS")
    print(f"Total documents analyzed: {total_documents}")

    for document in documents:
        # Extract source information for display
        source_info = document.get("source", {})
        file_name = source_info.get("file_name", "unknown")

        print(f"\nDocument: {document.get('id', 'unknown')}")
        print(f"File: {file_name}")

        # Extract caption/description
        captions = source_info.get("content", [])

        # Display captions
        if captions:
            print("\nCaption:")
            for caption in captions:
                print(f"  {caption}")
        else:
            print("No caption found for this document")

    print(f"\nSummary:")
    print(f"Processed {total_documents} image(s) with the captioning model")


def visualize_top_tags(data, max_tags=10):
    """Create a simple visualization of top tags from the data"""
    if not data:
        return

    # Extract the data section from the result
    documents = data.get("data", [])

    if not documents:
        return

    # Check if this is tagging data
    if not documents or "source" not in documents[0]:
        print("No tagging data to visualize")
        return

    # Get first document to check content format
    first_doc = documents[0]
    content = first_doc.get("source", {}).get("content", [])

    # Check if this is tagging data
    if not content or not isinstance(content, list) or not isinstance(content[0], dict) or "label" not in content[0]:
        print("This appears to be captioning data, not tagging data - no visualization available")
        return

    # Collect all tags with their confidences
    tag_counts = {}

    for document in documents:
        tags = document.get("source", {}).get("content", [])

        for tag in tags:
            label = tag.get("label", "unknown")
            confidence = tag.get("confidence", 0)

            if label in tag_counts:
                tag_counts[label] = max(tag_counts[label], confidence)  # Keep highest confidence
            else:
                tag_counts[label] = confidence

    # Sort by confidence and get top tags
    top_tags = sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:max_tags]

    if not top_tags:
        print("No tags to visualize")
        return

    # Create visualization
    labels = [tag[0] for tag in top_tags]
    confidences = [tag[1] for tag in top_tags]

    plt.figure(figsize=(10, 6))
    plt.barh(labels, confidences, color='skyblue')
    plt.xlabel('Confidence')
    plt.title('Top Tags by Confidence')
    plt.xlim(0, 1.0)
    plt.gca().invert_yaxis()  # Highest confidence at the top
    plt.tight_layout()
    plt.show()

In [3]:
async def publish_images_to_nats(
    folder_path, 
    nats_url=NAT_URL,
    mode="tagging",
    local_env=LOCAL_ENV,
    num_labels=5,
    prompt="OD"
):
    """
    Publish all image files from a folder to the NATS input stream/subject.

    Args:
        folder_path: Path to the folder containing image files to process
        nats_url: The NATS server URL
        mode: Processing mode ("tagging" or "captioning")
        local_env: Local environment flag
        num_labels: Number of labels to return for each image (for tagging)
        prompt: Prompt for Florence-2 model ("OD" or "MORE_DETAILED_CAPTION")
    """
    # Check if folder exists
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"Folder not found: {folder_path}")

    # Determine the appropriate stream and subject based on mode
    if mode == "tagging":
        input_stream = INPUT_STREAM_TAGGER
        input_subject = INPUT_SUBJECT_TAGGER
    else:  # mode == "captioning"
        input_stream = INPUT_STREAM_CAPTIONING
        input_subject = INPUT_SUBJECT_CAPTIONING

    print(f"Using mode: {mode}, stream: {input_stream}, subject: {input_subject}, prompt: {prompt}")

    # Ensure the appropriate stream exists with the subject
    await ensure_stream_exists(
        nats_url=nats_url,
        stream_name=input_stream,
        subjects=[input_subject]
    )

    # Connect to NATS
    nc = await nats.connect(nats_url)
    js = nc.jetstream()

    # Track files published
    files_published = []

    try:
        # Process each image file in the folder
        for filename in os.listdir(folder_path):
            # Filter for common image file extensions
            if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tiff')):
                file_path = os.path.join(folder_path, filename)
                print(f"Processing image file: {filename}")

                try:
                    # Create a temporary HTTP server URL or file URL
                    # For local testing, we'll use file:// URLs
                    if local_env == "1":
                        # Using absolute path for file:// URLs
                        abs_path = os.path.abspath(file_path)
                        uri = f"file://{abs_path}"
                    else:
                        # In production, this might be an HTTP URL
                        uri = f"file://{file_path}"

                    # Create message payload
                    message = {
                        "source": {
                            "uri": uri,
                            "type": "image"
                        },
                        "state": {
                            "status": "STARTED",
                            "timestamp": datetime.now().isoformat()
                        },
                        "prompt": prompt
                    }

                    # Add num_labels only for tagging mode
                    if mode == "tagging":
                        message["num_labels"] = num_labels

                    # Create message headers with filename
                    headers = {
                        "filename": filename
                    }

                    # Publish message to input subject in the appropriate stream
                    await js.publish(
                        input_subject, 
                        json.dumps(message).encode(), 
                        headers=headers,
                        stream=input_stream
                    )

                    print(f"Published file {filename} to {input_subject} in stream {input_stream}")
                    files_published.append(filename)

                except Exception as e:
                    print(f"Error processing file {filename}: {str(e)}")

    finally:
        # Close NATS connection
        await nc.close()

    return files_published

async def process_folder_with_mode(folder_path, mode="tagging", num_labels=5, prompt=None):
    """
    Publish all image files in the folder to NATS with specified mode

    Args:
        folder_path: Path to folder containing images
        mode: "tagging" or "captioning"
        num_labels: Number of labels for tagging mode
        prompt: Override the default prompt (if None, will use mode-appropriate default)
    """
    # Set default prompt based on mode if not provided
    if prompt is None:
        if mode == "tagging":
            prompt = "OD"
        else:  # mode == "captioning"
            prompt = "MORE_DETAILED_CAPTION"

    print(f"Publishing image files from {folder_path} to NATS using {mode} mode...")
    files_published = await publish_images_to_nats(
        folder_path, 
        mode=mode, 
        num_labels=num_labels, 
        prompt=prompt
    )

    if files_published:
        print(f"\nPublished {len(files_published)} files to NATS")
        print("Files published:")
        for file in files_published:
            print(f"- {file}")
    else:
        print("No files were published to NATS")

    return files_published


async def process_folder_both_modes(folder_path, num_labels=5):
    """
    Publish all image files in the folder to both tagging and captioning streams
    
    Args:
        folder_path: Path to folder containing images
        num_labels: Number of labels for tagging mode
    """
    print(f"Publishing image files from {folder_path} to both tagging and captioning streams...")
    
    # First process with tagging mode
    tagging_files = await process_folder_with_mode(
        folder_path, 
        mode="tagging", 
        num_labels=num_labels, 
        prompt="OD"
    )
    
    # Then process with captioning mode
    captioning_files = await process_folder_with_mode(
        folder_path, 
        mode="captioning", 
        prompt="MORE_DETAILED_CAPTION"
    )
    
    # Combine unique files from both modes
    all_files = list(set(tagging_files + captioning_files))
    
    print(f"\nSummary:")
    print(f"Total files published: {len(all_files)}")
    print(f"Files published to tagging stream: {len(tagging_files)}")
    print(f"Files published to captioning stream: {len(captioning_files)}")
    
    return all_files

In [4]:
# 1. For tagging mode (using dedicated stream)
await process_folder_with_mode(INPUT_DIR, mode="tagging", num_labels=5, prompt="OD")

Publishing image files from images_local to NATS using tagging mode...
Using mode: tagging, stream: IMAGE-TASKS-TAGGER, subject: tagger.tasks.started.>, prompt: OD
Stream 'IMAGE-TASKS-TAGGER' already exists with subjects: ['tagger.tasks.started.>']
Processing image file: 2Persons.jpg
Published file 2Persons.jpg to tagger.tasks.started.> in stream IMAGE-TASKS-TAGGER
Processing image file: baseball.jpg
Published file baseball.jpg to tagger.tasks.started.> in stream IMAGE-TASKS-TAGGER
Processing image file: beach.jpg
Published file beach.jpg to tagger.tasks.started.> in stream IMAGE-TASKS-TAGGER
Processing image file: buildings.jpg
Published file buildings.jpg to tagger.tasks.started.> in stream IMAGE-TASKS-TAGGER
Processing image file: buterfly.jpg
Published file buterfly.jpg to tagger.tasks.started.> in stream IMAGE-TASKS-TAGGER
Processing image file: cow.jpg
Published file cow.jpg to tagger.tasks.started.> in stream IMAGE-TASKS-TAGGER
Processing image file: download.png
Published file 

['2Persons.jpg',
 'baseball.jpg',
 'beach.jpg',
 'buildings.jpg',
 'buterfly.jpg',
 'cow.jpg',
 'download.png',
 'forest.jpg',
 'road.jpg',
 'shrek.jpg',
 'van.jpg',
 'woman.jpg']

In [5]:
# 2. For captioning mode (using dedicated stream)
await process_folder_with_mode(INPUT_DIR, mode="captioning", prompt="MORE_DETAILED_CAPTION")

Publishing image files from images_local to NATS using captioning mode...
Using mode: captioning, stream: IMAGE-TASKS-CAPTIONING, subject: caption.tasks.started.>, prompt: MORE_DETAILED_CAPTION
Stream 'IMAGE-TASKS-CAPTIONING' already exists with subjects: ['caption.tasks.started.>']
Processing image file: 2Persons.jpg
Published file 2Persons.jpg to caption.tasks.started.> in stream IMAGE-TASKS-CAPTIONING
Processing image file: baseball.jpg
Published file baseball.jpg to caption.tasks.started.> in stream IMAGE-TASKS-CAPTIONING
Processing image file: beach.jpg
Published file beach.jpg to caption.tasks.started.> in stream IMAGE-TASKS-CAPTIONING
Processing image file: buildings.jpg
Published file buildings.jpg to caption.tasks.started.> in stream IMAGE-TASKS-CAPTIONING
Processing image file: buterfly.jpg
Published file buterfly.jpg to caption.tasks.started.> in stream IMAGE-TASKS-CAPTIONING
Processing image file: cow.jpg
Published file cow.jpg to caption.tasks.started.> in stream IMAGE-TAS

['2Persons.jpg',
 'baseball.jpg',
 'beach.jpg',
 'buildings.jpg',
 'buterfly.jpg',
 'cow.jpg',
 'download.png',
 'forest.jpg',
 'road.jpg',
 'shrek.jpg',
 'van.jpg',
 'woman.jpg']

In [None]:
# 3. Process folder to both streams simultaneously
await process_folder_both_modes(INPUT_DIR, num_labels=5)