In [1]:
import os
import pandas as pd
from kafka import KafkaProducer
from time import sleep

In [2]:
class KafkaDataStreamer:
    """
    Class to stream data to Kafka
    The data is read from a CSV file and sent to Kafka topic
    """
    def __init__(self, bootstrap_servers, topic):
        """
        Initialize the Kafka producer
        :param bootstrap_servers: bootstrap servers for the Kafka cluster
        :param topic: Kafka topic to which the data is sent
        """
        self.bootstrap_servers = bootstrap_servers
        self.topic = topic
        # Initialize the Kafka producer
        self.producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers, api_version=(0, 10, 1))

    def stream_data(self, data_paths):
        """
        Read the data from the CSV file and send it to Kafka topic
        :param data_path: the path to the CSV file
        :return: 
        """
        for data_path in data_paths:
            data = pd.read_csv(data_path)
            for _, row in data.iterrows():
                message = row.to_json()
                self.producer.send(self.topic, value=message.encode('utf-8'))  
                if _ % 10000 == 0:
                    print('Sent message #{}'.format(_))

In [3]:
data_folder = '../../hai_dataset/hai/hai-21.03'
test_data_filenames = ['test1.csv', 'test2.csv', 'test3.csv', 'test4.csv', 'test5.csv']
data_paths = [os.path.join(data_folder, filename).replace(os.sep, '/') for filename in test_data_filenames]
#data_path = os.path.join(data_folder, 'test1.csv').replace(os.sep, '/')
bootstrap_servers = ['localhost:9092']  # Update with your Kafka bootstrap servers
topic = 'hai-input'

# Stream the stored data to Kafka
streamer = KafkaDataStreamer(bootstrap_servers, topic)
streamer.stream_data(data_paths)

Sent message #0
Sent message #10000
Sent message #20000
Sent message #30000
Sent message #40000
Sent message #0
Sent message #10000
Sent message #20000
Sent message #30000
Sent message #40000
Sent message #50000
Sent message #60000
Sent message #70000
Sent message #80000
Sent message #90000
Sent message #100000
Sent message #110000
Sent message #0
Sent message #10000
Sent message #20000
Sent message #30000
Sent message #40000
Sent message #50000
Sent message #60000
Sent message #70000
Sent message #80000
Sent message #90000
Sent message #100000
Sent message #0
Sent message #10000
Sent message #20000
Sent message #30000
Sent message #0
Sent message #10000
Sent message #20000
Sent message #30000
Sent message #40000
Sent message #50000
Sent message #60000
Sent message #70000
Sent message #80000
Sent message #90000
