In [1]:
### docker run -p 4222:4222 nats -js
import asyncio
import nats
from nats.errors import ConnectionClosedError, TimeoutError, NoServersError

import os
import glob
import json
import uuid
import nats
import mimetypes

In [2]:
# nc = await nats.connect("localhost")
# print("Maximum payload is %d bytes" % nc.max_payloa

In [3]:


async def publish_images_from_folder(nats_url, stream_name, subject, input_folder):
    """
    Publishes metadata for all images from a folder to NATS JetStream.
    Instead of sending binary data, it sends metadata with URI.
    """
    try:
        nc = await nats.connect(nats_url)
        js = nc.jetstream()

        for file_path in glob.glob(os.path.join(input_folder, "*")):
            if os.path.isfile(file_path):
                filename = os.path.basename(file_path)
                # Get file size
                file_size = os.path.getsize(file_path)
                # Get file mime type
                mime_type, _ = mimetypes.guess_type(file_path)
                if not mime_type:
                    mime_type = "application/octet-stream"  # Default if can't determine
                
                # Create absolute path for URI
                absolute_path = os.path.abspath(file_path)
                # Create message with metadata
                message = {
                    "version": "1.0",
                    "batchId": str(uuid.uuid4()),
                    "source": {
                        "uri": f"file://{absolute_path}",
                        "mimeType": mime_type,
                        "size": file_size
                    },
                    "ocrOptions": {
                        "languages": ["en"],
                        "preProcessing": ["resize", "2x-zoom", "rgb"]
                    },
                    "state": {
                        "fileId": str(uuid.uuid4()).replace("-", "_")[:20],
                        "scanId": "sony-doc"
                    }
                }
                
                # Convert to JSON and encode to bytes for NATS
                message_data = json.dumps(message).encode()
                
                await js.publish(subject, message_data, stream=stream_name, headers={"file-name": filename})
                print(f"Published metadata for '{filename}' to {subject} in stream {stream_name}")
    except Exception as e:
        print(f"Error publishing metadata: {e}")
    finally:
        await nc.close()


In [4]:
nats_url = "nats://localhost:4222"
stream_name = "IMAGE_STREAM"
subject = "images.process"
input_folder = "docs_local" #Replace with your file path
output_folder  = "output" #Directory where files will be saved.




In [5]:
os.getcwd()

'/mnt/d/repos2/nats'

In [6]:

await publish_images_from_folder(nats_url, stream_name, subject, input_folder)
        

Published metadata for 'base64.html' to images.process in stream IMAGE_STREAM
Published metadata for 'contribution_form_Juan_23082023.pdf' to images.process in stream IMAGE_STREAM
Published metadata for 'image_0.png' to images.process in stream IMAGE_STREAM
Published metadata for 'image_1.png' to images.process in stream IMAGE_STREAM
Published metadata for 'image_2.png' to images.process in stream IMAGE_STREAM
Published metadata for 'pict001.jpg' to images.process in stream IMAGE_STREAM
Published metadata for 'pict002.jpg' to images.process in stream IMAGE_STREAM
Published metadata for 'pict003.jpg' to images.process in stream IMAGE_STREAM
Published metadata for 'ppt_test.ppt' to images.process in stream IMAGE_STREAM
Published metadata for 'rtf.html' to images.process in stream IMAGE_STREAM
Published metadata for 'senior_python_developer_nlplogix1_sm.gif' to images.process in stream IMAGE_STREAM
Published metadata for 'senior_python_developer_nlplogix1_sm.jpg' to images.process in stre