In [8]:
import os
import json
import asyncio
import uuid
from pathlib import Path
from datetime import datetime
import base64
import traceback

import nats
from nats.js.api import StreamConfig, RetentionPolicy, DiscardPolicy
from dotenv import load_dotenv
from IPython.display import display, JSON

# Load environment variables from .env file
load_dotenv(os.path.join("keys", ".env"))

# NATS connection settings - use original env variables with exact capitalization
NAT_URL = os.getenv("NAT_URL", "nats://localhost:4222")
INPUT_STREAM = os.getenv("INPUT_STREAM", "IMAGE-TASKS")
INPUT_SUBJECT = os.getenv("INPUT_SUBJECT", "image.tasks.started.>")
LOCAL_ENV = os.getenv("LOCAL_ENV", "1")

# # Extract the actual subject without wildcards
# if ">" in INPUT_SUBJECT:
#     ACTUAL_SUBJECT = INPUT_SUBJECT.replace(">", "")
# else:
#     ACTUAL_SUBJECT = INPUT_SUBJECT
ACTUAL_SUBJECT = INPUT_SUBJECT
# Input/Output directories
INPUT_DIR = "images"  # Directory containing images to process
OUTPUT_DIR = "output_images"  # Directory for results
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Display current configuration
print("NATS Image Tagger Producer Configuration:")
print(f"NATS URL: {NAT_URL}")
print(f"Input Stream: {INPUT_STREAM}")
print(f"Input Subject from env: {INPUT_SUBJECT}")
print(f"Actual subject for publishing: {ACTUAL_SUBJECT}")
print(f"Local Environment: {LOCAL_ENV}")
print(f"Input Directory: {INPUT_DIR}")
print(f"Output Directory: {OUTPUT_DIR}")

NATS Image Tagger Producer Configuration:
NATS URL: nats://localhost:4222
Input Stream: IMAGE-TASKS
Input Subject from env: image.tasks.started.>
Actual subject for publishing: image.tasks.started.>
Local Environment: 1
Input Directory: images
Output Directory: output_images


In [9]:
async def list_all_streams(nats_url=NAT_URL):
    """List all streams in the NATS server"""
    nc = await nats.connect(nats_url)
    js = nc.jetstream()
    
    try:
        # Get stream info
        streams = await js.streams_info()
        print("\nStreams available on NATS server:")
        
        for stream in streams:
            print(f"- Name: {stream.config.name}")
            print(f"  Subjects: {stream.config.subjects}")
            if hasattr(stream.state, 'messages'):
                print(f"  Messages: {stream.state.messages}")
            print("")
            
        return streams
    except Exception as e:
        print(f"Error listing streams: {e}")
        traceback.print_exc()
        return []
    finally:
        await nc.close()


def is_image_file(filename):
    """Check if a file is an image based on its extension"""
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']
    return any(filename.lower().endswith(ext) for ext in image_extensions)


def encode_image_to_base64(image_path):
    """Encode an image file to base64"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [10]:
async def publish_using_core_nats(
    nc, 
    subject, 
    message_data,
    headers=None
):
    """Publish a message using core NATS (not JetStream) as a fallback"""
    try:
        if headers:
            await nc.publish(subject, json.dumps(message_data).encode(), headers=headers)
        else:
            await nc.publish(subject, json.dumps(message_data).encode())
        print(f"Published using core NATS to subject: {subject}")
        return True
    except Exception as e:
        print(f"Error publishing with core NATS: {e}")
        traceback.print_exc()
        return False

In [11]:
async def publish_images_to_nats(
    folder_path, 
    nats_url=NAT_URL,
    publish_subject=ACTUAL_SUBJECT,  # Use the processed subject without wildcards
    local_env=LOCAL_ENV,
    num_labels=5,  # Number of labels to request per image
    prompt="OD"  # Default prompt for Florence-2
):
    """
    Publish all image files from a folder to the NATS server.
    
    Args:
        folder_path: Path to the folder containing image files to process
        nats_url: The NATS server URL
        publish_subject: The subject to publish to (without wildcards)
        local_env: Local environment flag
        num_labels: Number of image tags to request
        prompt: Prompt for Florence-2 model (if used)
    """
    # Check if folder exists
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"Folder not found: {folder_path}")
    
    # List existing streams for debugging
    await list_all_streams(nats_url)
    
    # Connect to NATS
    nc = await nats.connect(nats_url)
    
    # Track files published
    files_published = []
    
    try:
        # Get list of image files
        image_files = [f for f in os.listdir(folder_path) if is_image_file(f)]
        if not image_files:
            print(f"No image files found in {folder_path}")
            return []
        
        print(f"Found {len(image_files)} image files to process")
        
        # Process each image file in the folder
        for filename in image_files:
            file_path = os.path.join(folder_path, filename)
            print(f"Processing image: {filename}")
            
            try:
                # Get file extension and prepare image data
                file_ext = filename.split('.')[-1].lower()
                image_base64 = encode_image_to_base64(file_path)
                document_id = str(uuid.uuid4())
                
                # For image tagging, we'll structure the documents list with expected format for process_request
                # The backend expects a specific structure with "source" containing file_type, file_name, and content
                image_documents = [{
                    "id": document_id,
                    "source": {
                        "file_type": file_ext,
                        "file_name": filename,
                        "content": image_base64,
                        "uri": f"file://{file_path}"
                    }
                }]
                
                # Create message payload
                message = {
                    "documents": image_documents,
                    "num_labels": num_labels,
                    "prompt": prompt
                }
                
                # Create message headers - ONLY include filename, no other headers
                headers = {
                    "filename": filename
                }
                
                # First try using direct publish without JetStream
                print(f"Attempting to publish to subject: {publish_subject} using direct publish")
                await nc.publish(
                    publish_subject, 
                    json.dumps(message).encode(), 
                    headers=headers
                )
                print(f"Published image {filename} to {publish_subject}")
                files_published.append(filename)
                
            except Exception as e:
                print(f"Error processing image {filename}: {str(e)}")
                traceback.print_exc()
                continue
        
    finally:
        # Close NATS connection
        await nc.close()
    
    return files_published

In [12]:
async def process_image_folder(
    folder_path=INPUT_DIR, 
    num_labels=5,
    prompt="OD"
):
    """Publish all image files in the folder to NATS"""
    print(f"Publishing images from {folder_path} to NATS...")
    print(f"Requesting {num_labels} labels per image with prompt: '{prompt}'")
    
    files_published = await publish_images_to_nats(
        folder_path,
        publish_subject=ACTUAL_SUBJECT,
        num_labels=num_labels,
        prompt=prompt
    )
    
    if files_published:
        print(f"\nPublished {len(files_published)} images to NATS")
        print("Images published:")
        for file in files_published:
            print(f"- {file}")
    else:
        print("No images were published to NATS")
    
    return files_published

In [13]:
# Execute the main function to publish images
# You can customize these parameters as needed
folder_path = INPUT_DIR
num_labels = 5  # Number of tags to request per image
prompt = "OD"  # Using default prompt for Florence-2


In [14]:
# Run the async function
files_published = await process_image_folder(
    folder_path=folder_path,
    num_labels=num_labels,
    prompt=prompt
)

Publishing images from images to NATS...
Requesting 5 labels per image with prompt: 'OD'

Streams available on NATS server:
- Name: IMAGE-RESULTS
  Subjects: ['IMAGE.RESULTS.COMPLETED.>', 'image.results.completed.>', 'IMAGE.results.completed.>']
  Messages: 1

- Name: IMAGE-TASKS
  Subjects: ['image.tasks.started.>']
  Messages: 0

- Name: IMAGE_STREAM
  Subjects: ['images.process']
  Messages: 0

- Name: TEXT_STREAM
  Subjects: ['text.results']
  Messages: 0

Found 12 image files to process
Processing image: 2Persons.jpg
Attempting to publish to subject: image.tasks.started.> using direct publish
Published image 2Persons.jpg to image.tasks.started.>
Processing image: baseball.jpg
Attempting to publish to subject: image.tasks.started.> using direct publish
Published image baseball.jpg to image.tasks.started.>
Processing image: beach.jpg
Attempting to publish to subject: image.tasks.started.> using direct publish
Published image beach.jpg to image.tasks.started.>
Processing image: build

Traceback (most recent call last):
  File "C:\Users\Administrator\AppData\Local\Temp\ipykernel_18208\315663737.py", line 79, in publish_images_to_nats
    await nc.publish(
  File "C:\Users\Administrator\AppData\Roaming\Python\Python311\site-packages\nats\aio\client.py", line 910, in publish
    raise errors.MaxPayloadError
nats.errors.MaxPayloadError: nats: maximum payload exceeded
Traceback (most recent call last):
  File "C:\Users\Administrator\AppData\Local\Temp\ipykernel_18208\315663737.py", line 79, in publish_images_to_nats
    await nc.publish(
  File "C:\Users\Administrator\AppData\Roaming\Python\Python311\site-packages\nats\aio\client.py", line 910, in publish
    raise errors.MaxPayloadError
nats.errors.MaxPayloadError: nats: maximum payload exceeded
Traceback (most recent call last):
  File "C:\Users\Administrator\AppData\Local\Temp\ipykernel_18208\315663737.py", line 79, in publish_images_to_nats
    await nc.publish(
  File "C:\Users\Administrator\AppData\Roaming\Python\P