In [1]:
from pathlib import Path
from confluent_kafka import SerializingProducer
from confluent_kafka.serialization import StringSerializer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.protobuf import ProtobufSerializer
from faker import Faker
import time
import random
import os
import sys

# Load environment variables from .env file
from dotenv import load_dotenv
dotenv_path = Path('/resources/.env')
load_dotenv(dotenv_path=dotenv_path)

# Access environment variables
project_name = os.getenv('COMPOSE_PROJECT_NAME')
kafka_host = os.getenv('KAFKA_HOST')
topic_name = os.getenv('KAFKA_TOPIC_NAME')
replication_factor = int(os.getenv('KAFKA_REPLICATION'))
num_partitions = int(os.getenv('KAFKA_PARTITION'))
schema_registry_host = os.getenv('SCHEMA_REG_HOST')

# Protobuf schema import
sys.path.append('./protobuf')
import protobuf_schema_pb2 

# Kafka Configuration
bootstrap_servers = f'{kafka_host}:9092' 
schema_registry_url = f'http://{schema_registry_host}:8081' 

# Producer Configuration
producer_conf = {
    'bootstrap.servers': bootstrap_servers,
    'key.serializer': StringSerializer('utf_8'),
}

# Schema Registry and Protobuf Serializer
schema_registry_client = SchemaRegistryClient({'url': schema_registry_url})

protobuf_serializer = ProtobufSerializer(protobuf_schema_pb2.EmployeeSalary, 
                                         schema_registry_client, 
                                         {'use.deprecated.format': False})

producer_conf['value.serializer'] = protobuf_serializer

# Kafka Producer
producer = SerializingProducer(producer_conf)
fake = Faker()

def generate_salary_data():
    salary_data = protobuf_schema_pb2.EmployeeSalary()
    salary_data.employee_id = fake.uuid4()
    salary_data.employee_name = fake.name()
    salary_data.salary = round(random.uniform(30000, 100000), 2) 
    salary_data.department = fake.random_element(elements=('HR', 'IT', 'Finance', 'Marketing', 'Sales'))
    return salary_data

while True:
    salary_data = generate_salary_data()
    print(salary_data)
    producer.produce(topic=topic_name, key=salary_data.employee_id, value=salary_data)
    producer.flush()
    time.sleep(5) 

employee_id: "410a0bb7-57f6-4a11-bdd9-6ffadf2f709c"
employee_name: "Steven Walker"
salary: 32188.04
department: "IT"

employee_id: "9ad2e6d0-2533-4057-bc53-4df09cb078a1"
employee_name: "Aaron Marquez"
salary: 50587.96
department: "HR"

employee_id: "8163ae9e-6d98-4590-8d6b-57277c7a9ce6"
employee_name: "Justin Orozco"
salary: 58461.64
department: "Marketing"

employee_id: "7839a5c3-adc5-48fa-8c2f-29941dca2028"
employee_name: "Dalton Boyd"
salary: 98559.27
department: "Finance"

employee_id: "8b4b7ee7-3228-432e-9a21-b2e817af4719"
employee_name: "Robin Martin"
salary: 69884.6
department: "IT"

employee_id: "5ab46cf4-b9fa-4281-8597-fe952bf8319d"
employee_name: "Emily Murillo"
salary: 66204.15
department: "HR"

employee_id: "ce2b3937-797b-47f6-b448-0ec363dfd0d6"
employee_name: "John Chavez"
salary: 92322.2
department: "Sales"

employee_id: "7c5c820d-e926-4ca2-8f34-abbe68e775a5"
employee_name: "Darrell Green"
salary: 79241.1
department: "Sales"

employee_id: "7a6727be-ae4b-4ed0-a1b9-13b76e2d

KeyboardInterrupt: 