In [2]:
import sys
import json
from kafka import KafkaConsumer
from avro.io import DatumReader, BinaryDecoder
from avro.schema import parse
from io import BytesIO

# Ensure transaction-metadata package path is available (project-local)
transaction_metadata_path = "/Users/manojitroy/flink-practice/transaction-project/transaction-metadata"
if transaction_metadata_path not in sys.path:
    sys.path.insert(0, transaction_metadata_path)

from transaction_metadata.loader import get_schema

In [3]:
# Load the transaction schema (returns a python dict/object) and parse into Avro schema object
transaction_schema = get_schema("transaction", version="v1")
schema = parse(json.dumps(transaction_schema))
print("Schema loaded successfully.")

Schema loaded successfully.


In [4]:
def decode_avro(binary_message: bytes, avro_schema):
    """Decode Avro binary `binary_message` using a parsed Avro schema object."""
    if binary_message is None:
        return None
    try:
        bytes_reader = BytesIO(binary_message)
        decoder = BinaryDecoder(bytes_reader)
        reader = DatumReader(avro_schema)
        return reader.read(decoder)
    except Exception as e:
        # If decoding fails, try to fall back to JSON parse (some messages may be JSON)
        try:
            text = binary_message.decode('utf-8')
            return json.loads(text)
        except Exception:
            print('Failed to decode message as Avro or JSON:', e)
            return None

In [5]:
# Create a consumer that automatically decodes Avro messages using `decode_avro`
consumer = KafkaConsumer(
    "transactions",
    bootstrap_servers="localhost:9092",
    security_protocol="PLAINTEXT",
    auto_offset_reset="earliest",
    enable_auto_commit=True,
    consumer_timeout_ms=1000,  # short timeout so notebook cells return
    value_deserializer=lambda m: decode_avro(m, schema),
)

print("Consumer created. You can call `consume_messages()` to fetch and print messages.")

Consumer created. You can call `consume_messages()` to fetch and print messages.


In [6]:
def consume_messages(max_messages=None, timeout_seconds=10):
    """Consume messages from the already-created `consumer`, decode and pretty print them."""
    received = 0
    try:
        for message in consumer:
            data = message.value
            print('---')
            print(f'Partition: {message.partition}, Offset: {message.offset}')
            if data is None:
                print('Received empty or undecodable message')
            else:
                # Print a compact summary then the full payload
                print('Transaction ID:', data.get('transaction_id'))
                print('User ID:', data.get('user_id'))
                print('Amount: â‚¹', data.get('amount'))
                print('Location:', data.get('location'))
                print('Full payload:\n', json.dumps(data, indent=2))
            received += 1
            if max_messages is not None and received >= max_messages:
                break
    except KeyboardInterrupt:
        print('Stopped by user')
    finally:
        # do not close the consumer here if you want to reuse it, but provide a helper
        pass

def close_consumer():
    consumer.close()
    print('Consumer closed')

# Example usage (uncomment to run):
# consume_messages(max_messages=5)
# close_consumer()

In [None]:
# Ensure the lz4 codec library is installed in the notebook environment so Avro can decode lz4-compressed payloads.
# Use the %pip magic to install into the running kernel environment.
%pip install --quiet lz4

# Import lz4 so the codec is available to the Avro runtime
import lz4

# Now consume messages (will work for lz4-compressed Avro messages)
consume_messages()

In [8]:
consumer.close()