In [1]:
# Labraries
from kafka import KafkaProducer
import json, time, random, uuid
from datetime import datetime, timedelta

In [2]:
# # Data generator
producer = KafkaProducer(
    bootstrap_servers='kafka:9092',
    value_serializer=lambda v: json.dumps(v).encode('utf-8')
)

In [None]:
# Orders Data (Transactional Facts)
def generate_orders(n=1000):
    customers = [f"CUST{str(i).zfill(4)}" for i in range(1, 201)]
    products = [
        {"id": "P001", "name": "Laptop", "price": 1200},
        {"id": "P002", "name": "Phone", "price": 800},
        {"id": "P003", "name": "Headphones", "price": 150},
        {"id": "P004", "name": "Monitor", "price": 300},
        {"id": "P005", "name": "Keyboard", "price": 60},
    ]
    start_date = datetime(2024, 1, 1)

    data = []
    for _ in range(n):
        cust = random.choice(customers)
        product = random.choice(products)
        qty = random.randint(1, 5)
        order_date = start_date + timedelta(days=random.randint(0, 180))
        data = {
            "order_id": str(uuid.uuid4()),
            "customer_id": cust,
            "product_id": product["id"],
            "product_name": product["name"],
            "quantity": qty,
            "price": product["price"],
            "total_value": qty * product["price"],
            "order_date": order_date.strftime("%Y-%m-%d")
        }
    return data
# Customer Events (Behavioral Data)
def generate_customer_events(n=1000):
    customers = [f"CUST{str(i).zfill(4)}" for i in range(1, 201)]
    actions = ["view_product", "add_to_cart", "remove_from_cart", "checkout", "wishlist"]
    products = ["P001", "P002", "P003", "P004", "P005"]

    data = []
    current_time = int(time.time())
    for _ in range(n):
        cust = random.choice(customers)
        action = random.choice(actions)
        prod = random.choice(products)
        timestamp = current_time - random.randint(0, 3600)

        data = {
            "event_id": str(uuid.uuid4()),
            "customer_id": cust,
            "product_id": prod,
            "action": action,
            "timestamp": timestamp
        }
    return data

while True:
    # Send data to "order_data" topic
    message_1 = generate_orders()
    producer.send("order_data", value=message_1)
    print("Sent:", message_1)
    time.sleep(5)
    # Send data to "customer_event" topic
    message_2 = generate_customer_events()
    producer.send("customer_event", value=message_2)
    print("Sent:", message_2)
    time.sleep(5)

Sent: {'order_id': '00e91b79-ba0b-48d4-bfe5-dbe9b993a234', 'customer_id': 'CUST0099', 'product_id': 'P005', 'product_name': 'Keyboard', 'quantity': 4, 'price': 60, 'total_value': 240, 'order_date': '2024-03-14'}
Sent: {'event_id': '9bb5a546-15fd-4505-b47d-0dfd0d96a7c9', 'customer_id': 'CUST0030', 'product_id': 'P004', 'action': 'view_product', 'timestamp': 1755700050}
Sent: {'order_id': '7e6042af-9abb-4096-b147-8e22814230fb', 'customer_id': 'CUST0168', 'product_id': 'P001', 'product_name': 'Laptop', 'quantity': 4, 'price': 1200, 'total_value': 4800, 'order_date': '2024-02-22'}
Sent: {'event_id': '5e2be872-7853-4083-80a5-e548b1f05c79', 'customer_id': 'CUST0114', 'product_id': 'P004', 'action': 'wishlist', 'timestamp': 1755701433}
Sent: {'order_id': '8fa473c7-fc77-4ff2-a814-993ea1a243db', 'customer_id': 'CUST0077', 'product_id': 'P003', 'product_name': 'Headphones', 'quantity': 3, 'price': 150, 'total_value': 450, 'order_date': '2024-05-06'}
Sent: {'event_id': 'adf14b61-e8ca-4cae-8bcf-89