In [1]:
topic_name = "all_datatypes_json_sr"
schema_file_path = "./schemas/jsonschema_all_datatypes.json"

In [2]:
import json


class Config:
    def __init__(self):
        config = json.loads(open("../config.json").read())
        self.kc_endpoint = config["kc_endpoint"]
        self.kc_api_key = config["kc_api_key"]
        self.kc_api_secret = config["kc_api_secret"]
        self.sr_endpoint = config["sr_endpoint"]
        self.sr_api_key = config["sr_api_key"]
        self.sr_api_secret = config["sr_api_secret"]


c = Config()

In [3]:
from confluent_kafka import Producer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.json_schema import JSONSerializer
from confluent_kafka.serialization import MessageField, SerializationContext

import datagen

# serializer setup
sr_client = SchemaRegistryClient(
    {
        "url": c.sr_endpoint,
        "basic.auth.user.info": f"{c.sr_api_key}:{c.sr_api_secret}",
    }
)
serializer = JSONSerializer(
    schema_registry_client=sr_client,
    schema_str=open(schema_file_path).read(),
    to_dict=lambda x, ctx: x,  # I still don't know why this is necessary.
)

conf = {
    "bootstrap.servers": c.kc_endpoint,
    "security.protocol": "SASL_SSL",
    "sasl.mechanisms": "PLAIN",
    "sasl.username": c.kc_api_key,
    "sasl.password": c.kc_api_secret,
}
producer = Producer(conf)


def produce_messages(num_messages: int = 5):
    messages_sent = []

    for _ in range(num_messages):
        key = datagen.generate_key()
        value = datagen.generate_jsonschema_all_datatypes()
        headers = datagen.generate_headers()
        serialized_value = serializer(
            value, SerializationContext(topic_name, MessageField.VALUE)
        )
        producer.produce(
            topic_name,
            key=key,
            value=serialized_value,
            headers=headers,
            on_delivery=lambda err, msg: print(f"{err=}") if err else print(f"{msg=}"),
        )
        messages_sent.append(
            {
                "key": key,
                "value": value,
                "headers": headers,
                "topic": topic_name,
                "serialized_value": serialized_value,
            }
        )
    producer.flush()

    return messages_sent


messages = produce_messages(5)

msg=<cimpl.Message object at 0x10b195840>
msg=<cimpl.Message object at 0x10b0cea40>
msg=<cimpl.Message object at 0x10b0cea40>
msg=<cimpl.Message object at 0x10b0cea40>
msg=<cimpl.Message object at 0x10b0cea40>


In [4]:
len(json.dumps(messages[0])), messages[0]

TypeError: Object of type bytes is not JSON serializable