In [1]:
import json
import uuid
import os
from dotenv import load_dotenv
from pathlib import Path
from kafka import KafkaProducer
from kafka.admin import KafkaAdminClient, NewPartitions
from faker import Faker
from time import sleep
import random

In [2]:
# Load environment variables
dotenv_path = Path('/resources/.env')
load_dotenv(dotenv_path=dotenv_path)

True

In [3]:
# Kafka configuration
kafka_host = os.getenv('KAFKA_HOST')
kafka_topic = os.getenv('KAFKA_TOPIC_NAME')

In [4]:
print(f"Kafka Host: {kafka_host}")
print(f"Kafka Topic: {kafka_topic}")

Kafka Host: dataeng-kafka
Kafka Topic: test-topic


In [5]:
# Initialize Kafka Producer
producer = KafkaProducer(bootstrap_servers=f'{kafka_host}:9092')
faker = Faker()

In [6]:
# Initialize Kafka Admin Client
admin_client = KafkaAdminClient(bootstrap_servers=f"{kafka_host}:9092")

In [7]:
# Ensure the topic has at least 2 partitions
def ensure_two_partitions(topic_name):
    try:
        # Fetch topic metadata
        topic_metadata = admin_client.describe_topics([topic_name])
        partitions = topic_metadata[0]['partitions']
        partition_count = len(partitions)
        print(f"Current partition count for topic '{topic_name}': {partition_count}")
        
        # Add partitions if less than 2
        if partition_count < 2:
            print(f"Adding partitions to ensure topic '{topic_name}' has 2 partitions...")
            admin_client.create_partitions(
                topic_partitions={topic_name: NewPartitions(total_count=2)}
            )
            print(f"Successfully added partitions to topic '{topic_name}'.")
        else:
            print(f"Topic '{topic_name}' already has 2 or more partitions.")
    except Exception as e:
        print(f"Error while ensuring partitions for topic '{topic_name}': {e}")

In [8]:
# Call the function to ensure the topic has at least 2 partitions
ensure_two_partitions(kafka_topic)

Current partition count for topic 'test-topic': 2
Topic 'test-topic' already has 2 or more partitions.


In [9]:
# Validate partitions again to get the final count
topic_metadata = admin_client.describe_topics([kafka_topic])
partition_count = len(topic_metadata[0]['partitions'])
print(f"Final partition count for topic '{kafka_topic}': {partition_count}")

Final partition count for topic 'test-topic': 2


In [10]:
class DataGenerator:
    @staticmethod
    def get_data():
        """Generate random consumer data."""
        return {
            "consumer_id": str(uuid.uuid4()),
            "name": faker.name(),
            "address": faker.address().replace("\n", ", "),
            "email": faker.email(),
            "phone_number": faker.phone_number(),
            "gender": faker.random_element(elements=["Male", "Female"]),
            "birth_date": faker.date_of_birth(minimum_age=18, maximum_age=80).strftime("%Y-%m-%d"),
            "marital_status": faker.random_element(elements=["Single", "Married", "Divorced", "Widowed"]),
            "annual_income": faker.random_int(min=20000, max=200000),
            "signup_date": faker.date_this_decade().strftime("%Y-%m-%d"),
        }

In [13]:
# Number of events to send
num_events = 10  # Change this to the number of events you want to send

# Send a fixed number of events to Kafka
for i in range(num_events):
    # Generate random consumer data
    data = DataGenerator.get_data()
    payload = json.dumps(data).encode("utf-8")

    # Randomly assign partition based on available partitions
    partition = random.choice(range(partition_count))

    try:
        response = producer.send(topic=kafka_topic, value=payload, partition=partition)
        print(f"Sent to partition {partition}: {data}")
    except Exception as e:
        print(f"Error sending to partition {partition}: {e}")
    
    # Sleep for 5 seconds before sending the next event
    sleep(5)

Sent to partition 0: {'consumer_id': 'd9f81eeb-9466-4b2f-b260-daa427c85bd6', 'name': 'Jeffrey Reed', 'address': '513 Ballard Prairie Apt. 327, New Carrie, IL 19679', 'email': 'jenniferlopez@example.com', 'phone_number': '7376981391', 'gender': 'Female', 'birth_date': '1950-03-18', 'marital_status': 'Divorced', 'annual_income': 71824, 'signup_date': '2023-07-15'}
Sent to partition 1: {'consumer_id': '19ab9524-bcf7-48da-bebd-ec1be9d72a88', 'name': 'Jonathan Morgan', 'address': 'USNS Davis, FPO AA 41982', 'email': 'padillakelly@example.net', 'phone_number': '886-839-7158x46026', 'gender': 'Female', 'birth_date': '1945-12-18', 'marital_status': 'Divorced', 'annual_income': 150935, 'signup_date': '2022-11-14'}
Sent to partition 0: {'consumer_id': '029c034f-bc51-43fc-8018-f33cf20ca328', 'name': 'Tonya Turner', 'address': '74431 Tamara Key, West Robertville, ND 40040', 'email': 'qsmith@example.com', 'phone_number': '342-705-8856x9884', 'gender': 'Male', 'birth_date': '1982-08-05', 'marital_st