## This Notebook contains the logic to Receive the Text Published in The Nats Stream TEXT_STREAM. Published by OCR Model

In [1]:
import asyncio
import nats
from nats.errors import TimeoutError

In [None]:
# nc = await nats.connect("localhost")

# # Create JetStream context.
# js = nc.jetstream()

In [None]:
import asyncio
import nats
import json
import logging
import os
import nest_asyncio
from datetime import datetime

# Apply nest_asyncio to allow nested asyncio loops in Jupyter Notebook
nest_asyncio.apply()

async def read_messages_from_text_stream(nats_url, stream_name, subject, output_dir="./message_logs"):
    """
    Reads messages from a JetStream stream and subject, prints them, and saves to disk as JSON.
    
    Args:
        nats_url (str): URL of the NATS server
        stream_name (str): Name of the JetStream stream to subscribe to
        subject (str): Subject to subscribe to
        output_dir (str): Directory to save message JSON files
    """
    try:
        # Create output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)
        
        nc = await nats.connect(nats_url)
        js = nc.jetstream()
        
        async def cb(msg):
            try:
                # Decode the message
                decoded_text = msg.data.decode()
                
                # Get filename from headers or use timestamp if not available
                filename = msg.headers.get("file-name", ["unknown"]) if msg.headers else "unknown"
                if isinstance(filename, list) and len(filename) > 0:
                    filename = filename[0]
                
                # Print message details
                print(f"Received message: {decoded_text}\n")
                print(f"Received text from image data with filename: {filename}")
                print("*" * 50)
                
                # Prepare data to save
                message_data = {
                    "timestamp": datetime.now().isoformat(),
                    "filename": filename,
                    "content": decoded_text
                }
                
                # Add headers to the saved data if available
                if msg.headers:
                    message_data["headers"] = {k: v for k, v in msg.headers.items()}
                
                # Create a unique filename for the JSON
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                safe_filename = "".join(c if c.isalnum() else "_" for c in filename)
                json_filename = f"{safe_filename}_{timestamp}.json"
                json_path = os.path.join(output_dir, json_filename)
                
                # Save the message as JSON
                try:
                    # Try to parse the message content as JSON first
                    content_json = json.loads(decoded_text)
                    message_data["content"] = content_json  # Store as parsed JSON
                except json.JSONDecodeError:
                    # If not valid JSON, keep as string
                    pass
                
                # Write to file
                with open(json_path, 'w', encoding='utf-8') as f:
                    json.dump(message_data, f, indent=2, ensure_ascii=False)
                
                print(f"Message saved to: {json_path}")
                
                # Acknowledge the message
                await msg.ack()
                
            except Exception as e:
                logging.error(f"Error processing message: {e}")
                await msg.nak()
        
        sub = await js.subscribe(subject, stream=stream_name, cb=cb)
        print(f"Subscribed to {subject} in stream {stream_name}")
        print(f"Saving messages to directory: {os.path.abspath(output_dir)}")
        
        # Keep the subscriber running until interrupted
        await asyncio.Future()
        
    except Exception as e:
        logging.error(f"Error connecting to NATS: {e}")
    finally:
        if 'nc' in locals() and nc:
            await nc.close()

# Example usage (replace with your NATS URL, stream, and subject)
nats_url = "nats://localhost:4222"
stream_name = "TEXT_STREAM"
subject = "text.results"
output_dir = "./message_logs"  # Directory where messages will be saved

# Run the async function in a Jupyter Notebook
asyncio.run(read_messages_from_text_stream(nats_url, stream_name, subject, output_dir))

Subscribed to text.results in stream TEXT_STREAM
Saving messages to directory: /mnt/d/repos2/nats/message_logs
Received message: {"version": "1.0", "batchId": "535ad126-c4be-4fa0-965c-973bdfa3eac0", "source": {"uri": "file:///mnt/d/repos2/nats/docs/base64.html", "mimeType": "text/html", "size": 2147977}, "outcome": {"success": true, "texts": [{"language": "en", "text": "Senior Python Developer \nNLP Logix Jacksonville,FL,USA \nBenefits Offered \n401K,Dental, Life,Medical, Vision \nEmployment Type \nFull-Time \nWhy Work Here? \n\"Working for NLP LOGlx will give you a unique opportunity to learn and play an integral role building new and \nexciting products.\" \nSeeking Senior Software Developer for a cutting-edge computer vision product company, \nScribe Fusion (www.scribefusion.com), a division of NLP Logix.The ideal candidate will be \ncomfortable working with multiple technology stacks and operating systems. Responsibilities \ninclude developing both back-end and front-end components

# Only Print

In [None]:
import asyncio
import nats
import json
import logging
import nest_asyncio

# Apply nest_asyncio to allow nested asyncio loops in Jupyter Notebook
nest_asyncio.apply()

async def read_messages_from_text_stream(nats_url, stream_name, subject):
    """
    Reads messages from a JetStream stream and subject and prints them.
    """
    try:
        nc = await nats.connect(nats_url)
        js = nc.jetstream()

        async def cb(msg):
            try:
                decoded_text = msg.data.decode()
                filename = msg.headers.get("file-name", ["unknown"])
                print(f"Received message: {decoded_text}\n")
                print(f"Received text from image data with filename: {filename}")
                print("*" * 50)
                await msg.ack()  # Acknowledge the message

            except Exception as e:
                logging.error(f"Error processing message: {e}")
                await msg.nak()

        sub = await js.subscribe(subject, stream=stream_name, cb=cb)
        print(f"Subscribed to {subject} in stream {stream_name}")

        # Keep the subscriber running until interrupted
        await asyncio.Future()

    except Exception as e:
        logging.error(f"Error connecting to NATS: {e}")
    finally:
        if 'nc' in locals() and nc:
            await nc.close()

# Example usage (replace with your NATS URL, stream, and subject)
nats_url = "nats://localhost:4222"
stream_name = "TEXT_STREAM"
subject = "text.results"

# Run the async function in a Jupyter Notebook
asyncio.run(read_messages_from_text_stream(nats_url, stream_name, subject))

In [None]:


# Fetch and ack messagess from consumer.
for i in range(0, 10):
    msgs = await psub.fetch(1)
    for msg in msgs:
        await msg.ack()
        print(msg.data)

In [None]:
msgs = await psub.fetch(1)

In [None]:
msgs[0].headers

In [None]:
for msg in msgs:
    await msg.ack()
    data = msg.data

In [None]:
msg.header

In [None]:
with open("output/image1.jpg", "wb") as f:
    f.write(data)
    f.close