In [1]:
import sys
import os
import threading
import time
import json
from kafka3 import KafkaProducer, KafkaConsumer
from datetime import datetime as dt
from pymongo import MongoClient
from pyspark.sql import Row
import pandas as pd
import os
# add the folder where util.py lives
#sys.path.append(os.path.join(os.getcwd(), 'fit3182-a2'))
from util import kafkaProducer

In [2]:
from kafka3.admin import KafkaAdminClient, NewTopic
import time
def recreate_topics(bootstrap_servers:str, topics:str,
                    num_partitions:int=1, replication_factor:int=1):
    """
    This function are created to help purge Kafka producer streams we just created to
    
    """
    # Ensure we pass a list of host:port strings
    if isinstance(bootstrap_servers, str):
        bs_list = [bootstrap_servers]
    else:
        bs_list = bootstrap_servers

    admin = KafkaAdminClient(bootstrap_servers=bs_list)
    try:
        admin.delete_topics(topics)
        print(f"[INFO] Deletion of topics {topics} initiated.")
    except Exception as e:
        print(f"[WARN] Could not delete topics: {e}")

    time.sleep(2)

    new_topics = [
        NewTopic(name=t, num_partitions=num_partitions, replication_factor=replication_factor)
        for t in topics
    ]
    try:
        admin.create_topics(new_topics)
        print(f"[INFO] Recreated topics {topics}.")
    except Exception as e:
        print(f"[ERROR] Failed to create topics {topics}: {e}")
        raise
    finally:
        admin.close()


In [3]:
csv_path_a="data/camera_event_A.csv"
csv_path_b="data/camera_event_B.csv"
csv_path_c="data/camera_event_C.csv"

kafka_server="172.17.0.1:9092"

producer_id_a="A"
producer_id_b="B"
producer_id_c="C"

topic_a="camera_event_a"
topic_b="camera_event_b"
topic_c="camera_event_c"

batch_interval=5

topics = ["camera_event_a", "camera_event_b", "camera_event_c"]

# PURGE & RECREATE before producing
recreate_topics(kafka_server, topics,
                num_partitions=1,
                replication_factor=1)

    
producer_a = kafkaProducer(csv_path_a, kafka_server, producer_id_a, topic_a, batch_interval)
producer_b = kafkaProducer(csv_path_b, kafka_server, producer_id_b, topic_b, batch_interval)
producer_c = kafkaProducer(csv_path_c, kafka_server, producer_id_c, topic_c, batch_interval)

threads = []
for p in (producer_a, producer_b, producer_c):
    t = threading.Thread(target=p.produce_batches, daemon=True)
    t.start()
    threads.append(t)

for t in threads:
    t.join()

[INFO] Publishing batch #809 (1 records)...
[INFO] Publishing batch #810 (1 records)...
[INFO] Batch #809 sent. Sleeping 5s...
[DATA] Batch #809:
                                  event_id  batch_id car_plate  camera_id  \
1537  621f11e0-af78-492b-91ef-90b2df6d5812       809   QS 3306          2   

                      timestamp  speed_reading  
1537 2024-01-02 09:03:10.104034           91.5  

[INFO] Batch #810 sent. Sleeping 5s...
[DATA] Batch #810:
                                  event_id  batch_id car_plate  camera_id  \
1080  dc9615b3-575f-4db9-adc8-a69596a7387d       810    YP 332          3   

                      timestamp  speed_reading  
1080 2024-01-01 14:39:27.713090          141.5  

[INFO] Publishing batch #804 (20 records)...
[INFO] Batch #804 sent. Sleeping 5s...
[DATA] Batch #804:
                                   event_id  batch_id car_plate  camera_id  \
16060  0b0fc71c-f895-4979-87d5-998d29ca612d       804    QDR 31          1   
16061  7145d90b-02ee-462b-86a

<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.
<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.
<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.
<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.
Node 1 connection failed -- refreshing metadata
Node 1 connection failed -- refreshing metadata
<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.
Node 1 connection failed -- refreshing metadata


[INFO] Batch #6 sent. Sleeping 5s...
[INFO] Batch #6 sent. Sleeping 5s...
[INFO] Batch #6 sent. Sleeping 5s...


<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.


[INFO] Batch #821 sent. Sleeping 5s...


<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.
Node 1 connection failed -- refreshing metadata
Node 1 connection failed -- refreshing metadata
Node 1 connection failed -- refreshing metadata


[DATA] Batch #6:
                               event_id  batch_id car_plate  camera_id  \
7  052e725b-3098-4f7d-9670-ebcdf30f022c         6   QE 1820          3   

                   timestamp  speed_reading  
7 2024-01-01 08:01:50.367291           65.6  

[DATA] Batch #6:
                                event_id  batch_id car_plate  camera_id  \
15  8916338e-f0f5-4fac-8d8d-cdb6c03d1d07         6     KRN 7          2   
16  6626efb2-32f3-4bda-8a34-d79eedf615c8         6   QE 1820          2   

                    timestamp  speed_reading  
15 2024-01-01 08:00:52.541902           75.5  
16 2024-01-01 08:00:57.368642           66.1  

[DATA] Batch #6:
                                 event_id  batch_id car_plate  camera_id  \
100  0b08e2f7-9b0d-4ce6-b846-105e39b80d77         6     KNZ 1          1   
101  4fb23ec1-4956-466b-9ad9-07d534e0872b         6    WK 223          1   
102  8044f404-e5e5-496d-bd76-7e92fb9ad4ff         6   IF 7805          1   
103  b668da0a-8cd1-4baa-90c8-078d2e

<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.


[INFO] Publishing batch #1102 (2 records)...[INFO] Publishing batch #1101 (1 records)...

[INFO] Publishing batch #283 (20 records)...
[INFO] Publishing batch #287 (1 records)...
[INFO] Publishing batch #287 (1 records)...
[INFO] Batch #1092 sent. Sleeping 5s...[INFO] Batch #287 sent. Sleeping 5s...

[INFO] Batch #1102 sent. Sleeping 5s...
[INFO] Batch #283 sent. Sleeping 5s...
[INFO] Batch #1101 sent. Sleeping 5s...
[DATA] Batch #287:
                                 event_id  batch_id car_plate  camera_id  \
557  e08be35b-5b84-4eed-9660-3fac2cde1fc4       287  DQG 6317          2   

                     timestamp  speed_reading  
557 2024-01-01 11:35:01.528288           70.3  

[DATA] Batch #1102:
                                  event_id  batch_id car_plate  camera_id  \
2067  b639b674-e55c-491d-8cf2-bbc2917b6377      1102   UHI 504          2   
2068  bf6baee8-e29a-423d-81a3-c7fb34d5fd31      1102     RE 95          2   

                      timestamp  speed_reading  
2067 2024

<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.
Node 1 connection failed -- refreshing metadata
<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.
<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.


[INFO] Batch #293 sent. Sleeping 5s...
[INFO] Batch #1108 sent. Sleeping 5s...


Node 1 connection failed -- refreshing metadata
Node 1 connection failed -- refreshing metadata


[DATA] Batch #294:
                                 event_id  batch_id car_plate  camera_id  \
570  52666907-0fe5-4c18-96e8-103a7dc8d304       294    RFQ 34          2   

                     timestamp  speed_reading  
570 2024-01-01 11:34:56.287956           83.4  

[DATA] Batch #293:
                                 event_id  batch_id car_plate  camera_id  \
391  4e4d1eb4-896d-483e-a640-8ca31a675492       293    PB 116          3   
392  ac24be2e-e316-4d21-a9f3-1a938efc0c69       293    VHM 34          3   

                     timestamp  speed_reading  
391 2024-01-01 10:37:23.418510          110.6  
392 2024-01-01 10:37:25.391770          110.8  

[DATA] Batch #1109:
                                  event_id  batch_id car_plate  camera_id  \
2083  252cf82c-6b27-4970-bced-184dc4ac0329      1109   OUN 083          2   
2084  113a4285-20f6-4e1a-8bab-5df5707111b7      1109      NQ 3          2   
2085  ccc46ad4-70e4-4188-b91e-f0f816a52ff8      1109     FQ 52          2   

         

<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.


[INFO] Publishing batch #297 (1 records)...
[INFO] Publishing batch #1111 (1 records)...[INFO] Publishing batch #293 (20 records)...
[INFO] Publishing batch #1102 (20 records)...



<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.
<BrokerConnection node_id=1 host=172.17.0.1:9092 <connected> [IPv4 ('172.17.0.1', 9092)]> timed out after 30000 ms. Closing connection.
Node 1 connection failed -- refreshing metadata
Node 1 connection failed -- refreshing metadata
Node 1 connection failed -- refreshing metadata
