In [23]:
# Imported libraries
import time
import json
import pandas as pd
from kafka3 import KafkaProducer
from datetime import datetime as dt

In [24]:
# CONFIGURATION:
# Path to the CSV file
CSV_PATH      = 'data/camera_event_C.csv'
# Kafka bootstrap server
KAFKA_SERVER  = '172.22.32.1'
# Kafka topic
TOPIC         = 'camera_events_c'
# Interval between batches, in seconds
BATCH_INTERVAL = 5
# Identifier to tag each message with the producer name
PRODUCER_ID    = 'C'

In [25]:
def connect_kafka_producer(bootstrap_server: str) -> KafkaProducer:
    """
    Create and return a KafkaProducer instance.
    We set api_version to match your broker (here 0.10.x+).
    Also set a value_serializer so we can send dicts directly.
    """
    try:
        producer = KafkaProducer(
            bootstrap_servers=[bootstrap_server],
            api_version=(0, 10),
            value_serializer=lambda v: json.dumps(v).encode('utf-8'),
            key_serializer=lambda k: k.encode('utf-8')
        )
        return producer
    except Exception as e:
        print(f"[ERROR] Failed to connect to Kafka at {bootstrap_server}: {e}")
        raise

In [26]:
def load_events(csv_path: str) -> pd.DataFrame:
    """
    Load the camera events CSV into a DataFrame.
    We parse the timestamp column so we can convert it back to ISO.
    """
    df = pd.read_csv(
        csv_path,
        sep=',',
        parse_dates=['timestamp'],
        dtype={'batch_id': int}
    )
    return df

In [27]:
def produce_batches(df: pd.DataFrame,
                    producer: KafkaProducer,
                    topic: str,
                    interval: float,
                    producer_id: str):
    """
    Iterate through each batch_id in order, wrap each record
    as a dict + producer tag, and send to Kafka. Then sleep.
    """
    for batch_id in sorted(df['batch_id'].unique()):
        batch_df = df[df['batch_id'] == batch_id]
        print(f"[INFO] Publishing batch #{batch_id} ({len(batch_df)} records)...")
        for _, row in batch_df.iterrows():
            event = row.to_dict()
            # Ensure timestamp is serializable
            event['timestamp'] = event['timestamp'].isoformat()
            # Tag with producer identity
            event['producer_id'] = producer_id
            # Record when the batch is sent exactly
            event['sent_at'] = dt.now().isoformat()

            try:
                producer.send(
                    topic, 
                    key=event['car_plate'],
                    value=event,
                    timestamp_ms=int(time.time()*1000)
                )
            except Exception as e:
                print(f"[WARN] Failed to send event {event['event_id']}: {e}")

        # Force all buffered messages out
        producer.flush()
        print(f"[INFO] Batch #{batch_id} sent. Sleeping {interval}s...")
        # can comment the below line if you don't need to see the output
        print(f"[DATA] Batch #{batch_id}:\n{batch_df}\n")
        time.sleep(interval)
        
    # Loop has finished
    producer.flush()
    producer.close()

In [None]:
def main():
    # 1) Connect to Kafka broker
    producer = connect_kafka_producer(KAFKA_SERVER)

    # 2) Load all events from CSV
    df = load_events(CSV_PATH)

    # 3) Produce them in time-spaced batches
    produce_batches(df, producer, TOPIC, BATCH_INTERVAL, PRODUCER_ID)

    print("[DONE] All batches published.")

if __name__ == '__main__':
    main()

[INFO] Publishing batch #1 (1 records)...
[INFO] Batch #1 sent. Sleeping 5s...
[DATA] Batch #1:
                               event_id  batch_id car_plate  camera_id  \
0  d86e8cdb-c387-4ccb-b35a-346302238824         1   UTT 229          3   

                   timestamp  speed_reading  
0 2024-01-01 08:00:54.958092          130.8  

[INFO] Publishing batch #2 (1 records)...
[INFO] Batch #2 sent. Sleeping 5s...
[DATA] Batch #2:
                               event_id  batch_id car_plate  camera_id  \
1  f804c4e8-022d-4243-b43e-9ec0860ce6ff         2   CJW 924          3   

                   timestamp  speed_reading  
1 2024-01-01 08:00:47.168827          164.5  

[INFO] Publishing batch #3 (2 records)...
[INFO] Batch #3 sent. Sleeping 5s...
[DATA] Batch #3:
                               event_id  batch_id car_plate  camera_id  \
2  a246b391-fb7c-44a3-8f8b-ddf40bba3d05         3   WA 0712          3   
3  e0dae388-6036-4b7d-ba58-bed0808c1611         3     KZE 5          3   

     

[INFO] Publishing batch #23 (2 records)...
[INFO] Batch #23 sent. Sleeping 5s...
[DATA] Batch #23:
                                event_id  batch_id car_plate  camera_id  \
28  4154c60d-205c-4b9a-b6f5-3fce0de4602f        23    WB 418          3   
29  81b00cc5-6329-4799-a1ef-90084142d5c7        23   UQV 232          3   

                    timestamp  speed_reading  
28 2024-01-01 08:08:49.468866          135.5  
29 2024-01-01 08:08:49.576904          169.8  

[INFO] Publishing batch #24 (2 records)...
[INFO] Batch #24 sent. Sleeping 5s...
[DATA] Batch #24:
                                event_id  batch_id car_plate  camera_id  \
30  6929adcc-9074-432d-b802-2bff7bc183eb        24   DWT 789          3   
31  a47f07b1-cf3e-45a5-8ce9-fe6457c4d960        24    VWM 13          3   

                    timestamp  speed_reading  
30 2024-01-01 08:09:00.148574          130.4  
31 2024-01-01 08:09:01.054869          110.9  

[INFO] Publishing batch #25 (2 records)...
[INFO] Batch #25 sent. 

[INFO] Publishing batch #44 (1 records)...
[INFO] Batch #44 sent. Sleeping 5s...
[DATA] Batch #44:
                                event_id  batch_id car_plate  camera_id  \
58  78858d73-7233-4d39-8c20-2891278930a9        44      BU 9          3   

                    timestamp  speed_reading  
58 2024-01-01 08:21:01.968054           92.8  

[INFO] Publishing batch #45 (2 records)...
[INFO] Batch #45 sent. Sleeping 5s...
[DATA] Batch #45:
                                event_id  batch_id car_plate  camera_id  \
59  b4cba9e5-4be3-4a81-a70f-f78372052199        45    VLY 61          3   
60  f7791bb7-92e6-4842-9c83-0512cda7f529        45  HLZ 1649          3   

                    timestamp  speed_reading  
59 2024-01-01 08:21:08.699219           80.9  
60 2024-01-01 08:21:10.109678           89.3  

[INFO] Publishing batch #46 (1 records)...
[INFO] Batch #46 sent. Sleeping 5s...
[DATA] Batch #46:
                                event_id  batch_id car_plate  camera_id  \
61  b1a191f8-6