In [1]:
topic_name = "all_datatypes_avro"
schema_file_path = "./schemas/avro_all_datatypes.avsc"

In [2]:
import json


class Config:
    def __init__(self):
        config = json.loads(open("../config.json").read())
        self.kc_endpoint = config["kc_endpoint"]
        self.kc_api_key = config["kc_api_key"]
        self.kc_api_secret = config["kc_api_secret"]
        self.sr_endpoint = config["sr_endpoint"]
        self.sr_api_key = config["sr_api_key"]
        self.sr_api_secret = config["sr_api_secret"]


c = Config()

In [3]:
from confluent_kafka import Producer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer
from confluent_kafka.serialization import MessageField, SerializationContext

import datagen

# serializer setup
sr_client = SchemaRegistryClient(
    {
        "url": c.sr_endpoint,
        "basic.auth.user.info": f"{c.sr_api_key}:{c.sr_api_secret}",
    }
)
serializer = AvroSerializer(
    schema_registry_client=sr_client,
    schema_str=open(schema_file_path).read(),
    to_dict=lambda x, ctx: x,  # I still don't know why this is necessary.
)

conf = {
    "bootstrap.servers": c.kc_endpoint,
    "security.protocol": "SASL_SSL",
    "sasl.mechanisms": "PLAIN",
    "sasl.username": c.kc_api_key,
    "sasl.password": c.kc_api_secret,
}
producer = Producer(conf)


def produce_messages(num_messages: int = 5):
    messages_sent = []

    for _ in range(num_messages):
        key = datagen.generate_key()
        value = datagen.generate_avro_all_datatypes()
        headers = datagen.generate_headers()
        serialized_value = serializer(
            value, SerializationContext(topic_name, MessageField.VALUE)
        )
        producer.produce(
            topic_name,
            key=key,
            value=serialized_value,
            headers=headers,
            on_delivery=lambda err, msg: print(f"{err=}") if err else None,
        )
        messages_sent.append(
            {
                "key": key,
                "value": value,
                "headers": headers,
                "topic": topic_name,
                "serialized_value": serialized_value,
            }
        )
    producer.flush()

    return messages_sent


messages = produce_messages(5)

msg=<cimpl.Message object at 0x10cbf9840>
msg=<cimpl.Message object at 0x10cbf9840>
msg=<cimpl.Message object at 0x10cbf9740>
msg=<cimpl.Message object at 0x10cbf9740>
msg=<cimpl.Message object at 0x10cbf9740>


In [5]:
len(str(messages[0])), messages[0]

(977,
 {'key': 'f684f92b-d8e9-4dc6-af1f-fa88377a8f3c',
  'value': {'nullField': None,
   'booleanField': False,
   'intField': 13,
   'floatField': 579340535,
   'doubleField': 67798,
   'bytesField': b'\xb9h{\x16\xc4aoE\xd2o',
   'stringField': 'Say despite of whole.',
   'arrayField': ['up', 'such', 'teach', 'right', 'style'],
   'mapField': {'situation': 6561,
    'leg': 5032,
    'control': 9522,
    'room': 8301,
    'gun': 1849},
   'enumField': 'A',
   'fixedField': b'\x80\xd3*I\x9aa/>o\x17[]09R\xfc',
   'unionField': None,
   'recordField': {'nestedField': 'least'}},
  'headers': {'foo': 'election',
   'bar': '5615',
   'baz': '457699652',
   'schema_id': '100001',
   'schema_version': '1'},
  'topic': 'all_datatypes_avro',
  'serialized_value': b'\x00\x00\x01\x86\xa9\x00\x1a$ \nN\x00\x00\x00\x00`\x8d\xf0@\x14\xb9h{\x16\xc4aoE\xd2o*Say despite of whole.\n\x04up\x08such\nteach\nright\nstyle\x00\n\x12situation\xc2f\x06leg\xd0N\x0econtrol\xe4\x94\x01\x08room\xda\x81\x01\x06gun\xf2