# <center> <img src="../../labs/img/ITESOLogo.png" alt="ITESO" width="480" height="130"> </center>
# <center> **Departamento de Electrónica, Sistemas e Informática** </center>
---
## <center> **Big Data** </center>
---
### <center> **Spring 2025** </center>
---
### <center> **Kafka Producer: Financial Transaction Generator** </center>

---
**Profesor**: Dr. Pablo Camarillo Ramirez

**Team members**: 
- Miguel Alberto Torres Dueñas
- Juan Pablo Cortez Navarro
- Luther Williams Sandria 
- Ferdinand Bierbaum

### Invoke the data generator

In [1]:
# from grandeInformacion.streaming_generator import generate_streaming_event
import threading
import logging
import time
import json
import random
from kafka import KafkaProducer
from grandeInformacion.streaming_generator import generate_streaming_event


KAFKA_SERVER = 'ed69dac0a4e4:9093'
TEAM_SIZE = 4
MIN_MESSAGES = 30
MAX_MESSAGES = 150

def create_producer():
    return KafkaProducer(
        bootstrap_servers=KAFKA_SERVER,
        value_serializer=lambda v: json.dumps(v).encode('utf-8')
    )

def run_producer(producer_id):
    topic_name = f'kafka-spark-example-{producer_id}'
    log_filename = f'/home/jovyan/notebooks/data/producer_{producer_id}_log.txt'
    num_messages = random.randint(MIN_MESSAGES, MAX_MESSAGES)

    logger = logging.getLogger(f'Producer-{producer_id}')
    handler = logging.FileHandler(log_filename)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)

    producer = create_producer()

    logger.info(f"Starting producer {producer_id} with {num_messages} messages to topic '{topic_name}'")
    print(f"[Producer-{producer_id}] Sending {num_messages} messages to topic '{topic_name}'")

    try:
        for i in range(num_messages):
            data = generate_streaming_event()
            producer.send(topic_name, data)
            logger.info(f"Sent message {i+1}: {data}")
            print(f"[Producer-{producer_id}] Sent: {data}")
            time.sleep(2)
        logger.info(f"Finished. Total messages sent: {num_messages}")
    except KeyboardInterrupt:
        logger.warning("Interrupted by user")
    finally:
        producer.close()

# Crear y lanzar hilos para cada productor
if __name__ == "__main__":
    threads = []
    for i in range(TEAM_SIZE):
        t = threading.Thread(target=run_producer, args=(i,))
        threads.append(t)
        t.start()

    for t in threads:
        t.join()

    print("Todos los productores han terminado.")


[Producer-3] Sending 37 messages to topic 'kafka-spark-example-3'
[Producer-2] Sending 94 messages to topic 'kafka-spark-example-2'
[Producer-0] Sending 30 messages to topic 'kafka-spark-example-0'
[Producer-1] Sending 30 messages to topic 'kafka-spark-example-1'
[Producer-3] Sent: {'user_id': 'user_2151', 'video_id': 'vid_007', 'watch_time_seconds': 959, 'resolution': '4K', 'bitrate_kbps': 2520, 'buffering_events': 4, 'paused': False, 'skipped': False, 'genre': 'Documentary', 'region': 'JP', 'recommended': True, 'timestamp': '05/14/2025'}
[Producer-2] Sent: {'user_id': 'user_2870', 'video_id': 'vid_067', 'watch_time_seconds': 76, 'resolution': '720p', 'bitrate_kbps': 4592, 'buffering_events': 5, 'paused': False, 'skipped': False, 'genre': 'Documentary', 'region': 'US', 'recommended': True, 'timestamp': '05/14/2025'}
[Producer-0] Sent: {'user_id': 'user_2964', 'video_id': 'vid_017', 'watch_time_seconds': 751, 'resolution': '720p', 'bitrate_kbps': 963, 'buffering_events': 4, 'paused': F