In [16]:
# Imported libraries
import time
import json
import pandas as pd
from kafka3 import KafkaProducer
from datetime import datetime as dt

In [17]:
# CONFIGURATION:
# Path to the CSV file
CSV_PATH      = 'data/camera_event_B.csv'
# Kafka bootstrap server
KAFKA_SERVER  = '172.22.32.1'
# Kafka topic
TOPIC         = 'camera_events_b'
# Interval between batches, in seconds
BATCH_INTERVAL = 5
# Identifier to tag each message with the producer name
PRODUCER_ID    = 'B'

In [18]:
def connect_kafka_producer(bootstrap_server: str) -> KafkaProducer:
    """
    Create and return a KafkaProducer instance.
    We set api_version to match your broker (here 0.10.x+).
    Also set a value_serializer so we can send dicts directly.
    """
    try:
        producer = KafkaProducer(
            bootstrap_servers=[bootstrap_server],
            api_version=(0, 10),
            value_serializer=lambda v: json.dumps(v).encode('utf-8'),
            key_serializer=lambda k: k.encode('utf-8')
        )
        return producer
    except Exception as e:
        print(f"[ERROR] Failed to connect to Kafka at {bootstrap_server}: {e}")
        raise

In [19]:
def load_events(csv_path: str) -> pd.DataFrame:
    """
    Load the camera events CSV into a DataFrame.
    We parse the timestamp column so we can convert it back to ISO.
    """
    df = pd.read_csv(
        csv_path,
        sep=',',
        parse_dates=['timestamp'],
        dtype={'batch_id': int}
    )
    return df

In [20]:
def produce_batches(df: pd.DataFrame,
                    producer: KafkaProducer,
                    topic: str,
                    interval: float,
                    producer_id: str):
    """
    Iterate through each batch_id in order, wrap each record
    as a dict + producer tag, and send to Kafka. Then sleep.
    """
    for batch_id in sorted(df['batch_id'].unique()):
        batch_df = df[df['batch_id'] == batch_id]
        print(f"[INFO] Publishing batch #{batch_id} ({len(batch_df)} records)...")
        for _, row in batch_df.iterrows():
            event = row.to_dict()
            # Ensure timestamp is serializable
            event['timestamp'] = event['timestamp'].isoformat()
            # Tag with producer identity
            event['producer_id'] = producer_id
            # Record when the batch is sent exactly
            event['sent_at'] = dt.now().isoformat()

            try:
                producer.send(
                    topic, 
                    key=event['car_plate'],
                    value=event,
                    timestamp_ms=int(time.time()*1000)
                )
            except Exception as e:
                print(f"[WARN] Failed to send event {event['event_id']}: {e}")

        # Force all buffered messages out
        producer.flush()
        print(f"[INFO] Batch #{batch_id} sent. Sleeping {interval}s...")
        # can comment the below line if you don't need to see the output
        print(f"[DATA] Batch #{batch_id}:\n{batch_df}\n")
        time.sleep(interval)
        
    # Loop has finished
    producer.flush()
    producer.close()

In [None]:
def main():
    # 1) Connect to Kafka broker
    producer = connect_kafka_producer(KAFKA_SERVER)

    # 2) Load all events from CSV
    df = load_events(CSV_PATH)

    # 3) Produce them in time-spaced batches
    produce_batches(df, producer, TOPIC, BATCH_INTERVAL, PRODUCER_ID)

    print("[DONE] All batches published.")

if __name__ == '__main__':
    main()

[INFO] Publishing batch #1 (5 records)...
[INFO] Batch #1 sent. Sleeping 5s...
[DATA] Batch #1:
                               event_id  batch_id car_plate  camera_id  \
0  b5b7dae7-3bf9-4f75-aea8-398d3bde3a41         1   CJW 924          2   
1  1649c03b-cd6f-47bd-b4f2-c44fcf60524d         1    CZ 592          2   
2  a9c6a4b7-a6b6-4bd7-9719-88bd895814f4         1   UTT 229          2   
3  4e13ec51-02db-442c-b7aa-3a2050dd1ad1         1   WA 0712          2   
4  74248dd9-c9f8-48d2-94fb-dc5610a0f735         1      AH 8          2   

                   timestamp  speed_reading  
0 2024-01-01 08:00:26.029536          156.8  
1 2024-01-01 08:00:26.729115          163.9  
2 2024-01-01 08:00:26.887170          134.1  
3 2024-01-01 08:00:27.601617          133.8  
4 2024-01-01 08:00:28.125701          158.9  

[INFO] Publishing batch #2 (1 records)...
[INFO] Batch #2 sent. Sleeping 5s...
[DATA] Batch #2:
                               event_id  batch_id car_plate  camera_id  \
5  c14b050b-

[INFO] Publishing batch #19 (4 records)...
[INFO] Batch #19 sent. Sleeping 5s...
[DATA] Batch #19:
                                event_id  batch_id car_plate  camera_id  \
35  315c64ac-0f96-4a67-98c4-7d7b85cfa455        19   CIY 810          2   
36  d40dc066-ba11-4d44-a4e5-48f7038544a7        19     ZQ 22          2   
37  40d72911-cdde-4af8-9a18-68aa908a46a5        19     EC 40          2   
38  1622ac4e-d0d1-4713-85fe-e5b5f01d1c28        19    NGP 66          2   

                    timestamp  speed_reading  
35 2024-01-01 08:13:34.334415          136.3  
36 2024-01-01 08:13:36.902545          113.0  
37 2024-01-01 08:13:37.027065          144.7  
38 2024-01-01 08:13:38.833509          107.2  

[INFO] Publishing batch #20 (2 records)...
[INFO] Batch #20 sent. Sleeping 5s...
[DATA] Batch #20:
                                event_id  batch_id car_plate  camera_id  \
39  2ae298d2-5496-4732-95ef-82f4b57ef4b7        20   KWO 421          2   
40  61ffc37e-73c0-4fa9-a604-e2596e8f6289

[INFO] Publishing batch #38 (1 records)...
[INFO] Batch #38 sent. Sleeping 5s...
[DATA] Batch #38:
                                event_id  batch_id car_plate  camera_id  \
71  84d8dbf6-322d-422f-ad4e-72fbcbefc63a        38     ZCL 3          2   

                    timestamp  speed_reading  
71 2024-01-01 08:20:23.108400          101.1  

[INFO] Publishing batch #39 (2 records)...
[INFO] Batch #39 sent. Sleeping 5s...
[DATA] Batch #39:
                                event_id  batch_id car_plate  camera_id  \
72  686f079d-3e40-4e3f-80a7-3b450f9a2e86        39     WX 49          2   
73  9160a62b-341c-4c59-937c-381ba9c45765        39  HLZ 1649          2   

                    timestamp  speed_reading  
72 2024-01-01 08:20:29.626095           77.7  
73 2024-01-01 08:20:29.641873           85.8  

[INFO] Publishing batch #40 (1 records)...
[INFO] Batch #40 sent. Sleeping 5s...
[DATA] Batch #40:
                                event_id  batch_id car_plate  camera_id  \
74  31f0d1d0-8