In [None]:
! pip install confluent_kafka

Collecting confluent_kafka
  Downloading confluent_kafka-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl.metadata (22 kB)
Downloading confluent_kafka-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl (15.2 MB)
[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━[0m [32m10.5/15.2 MB[0m [31m9.8 MB/s[0m eta [36m0:00:01[0m

In [3]:
import sys
import os
import struct
import random
import time
import requests
from confluent_kafka import Producer
from google.protobuf.internal.encoder import _VarintBytes

# Get the directory where search_result_pb2.py is located
proto_dir = os.getcwd()  # Or specify the absolute path if needed
sys.path.append(proto_dir)
import search_result_pb2
from search_result_pb2 import SearchRequest

# Kafka and Schema Registry Config
KAFKA_BROKER = "redpanda:9092"
SCHEMA_REGISTRY_URL = "http://redpanda:8081"
TOPIC_NAME = "search_requests_with_schema"

# Configure Kafka Producer
producer = Producer({
    "bootstrap.servers": KAFKA_BROKER,
    "client.id": "protobuf-producer"
})

# Schema Registry subject name
schema_subject = f"{TOPIC_NAME}-value"

# Schema payload
schema_payload = {
    "schemaType": "PROTOBUF",
    "schema": """
        syntax = "proto3";
        message SearchRequest {
            string query = 1;
            int32 page_number = 2;
            int32 results_per_page = 3;
        }
    """
}

# Check if schema is already registered and get schema ID
response = requests.get(f"{SCHEMA_REGISTRY_URL}/subjects/{schema_subject}/versions/latest")

if response.status_code == 200:
    schema_id = response.json()["id"]
    print(f"Schema already registered with ID: {schema_id}")
else:
    # Register schema if it doesn't exist
    reg_response = requests.post(
        f"{SCHEMA_REGISTRY_URL}/subjects/{schema_subject}/versions",
        json=schema_payload
    )
    
    if reg_response.status_code == 200:
        schema_id = reg_response.json()["id"]
        print(f"Schema registered with ID: {schema_id}")
    else:
        print(f"Failed to register schema: {reg_response.text}")
        sys.exit(1)

# Function to generate a correctly formatted Protobuf message with schema ID
def generate_serialized_event():
    search_request = SearchRequest(
        query=random.choice(["kafka", "redpanda", "protobuf", "streaming"]),
        page_number=random.randint(1, 10),
        results_per_page=random.randint(10, 50),
    )
    
    serialized_data = search_request.SerializeToString()

    # Define the schema ID and message index (assuming it's the first message in the schema)
    message_indexes = [0]

    # Encode message index as a varint
    message_indexes_bytes = b"".join(_VarintBytes(idx) for idx in message_indexes)

    # Construct the final message (Magic Byte + Schema ID + Message Index + Payload)
    message_with_schema = b"\x00" + struct.pack(">I", schema_id) + message_indexes_bytes + serialized_data
    
    return message_with_schema

# Kafka message delivery report callback
def delivery_report(err, msg):
    if err:
        print(f"Message delivery failed: {err}")
    else:
        print(f"Message delivered to {msg.topic()} [{msg.partition()}]")

# Publish messages to Kafka
for _ in range(10):
    event_data = generate_serialized_event()
    producer.produce(TOPIC_NAME, value=event_data, callback=delivery_report)
    producer.flush()
    time.sleep(1)  # Simulate event streaming

print("✅ Events published successfully!")


Schema registered with ID: 1
Message delivered to search_requests_with_schema [0]
Message delivered to search_requests_with_schema [0]
Message delivered to search_requests_with_schema [0]
Message delivered to search_requests_with_schema [0]
Message delivered to search_requests_with_schema [0]
Message delivered to search_requests_with_schema [0]
Message delivered to search_requests_with_schema [0]
Message delivered to search_requests_with_schema [0]
Message delivered to search_requests_with_schema [0]
Message delivered to search_requests_with_schema [0]
✅ Events published successfully!
