In [1]:
#  !pip install confluent_kafka

In [2]:
from confluent_kafka import Producer
import csv

# Define the Kafka broker and topic
broker = 'localhost:9092' 
topic = 'csv_topic'  # Change to your Kafka topic

# Create a Kafka producer instance
producer = Producer({'bootstrap.servers': broker})

# Define a function to read the CSV file and send its data to Kafka
def send_data_to_kafka(file_path):
    with open(file_path, 'r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        for row in csv_reader:
            # Convert the CSV row to a message that can be sent to Kafka
            message_value = ','.join([f"{key}:{value}" for key, value in row.items()])
            producer.produce(topic=topic, value=message_value, key=row['Datetime'])

    producer.flush()

# Specify the path to your CSV file
csv_file_path = '../data/powerconsumption.csv' 

send_data_to_kafka(csv_file_path)

In [3]:
from confluent_kafka import Consumer, KafkaException
import pandas as pd
import json

In [4]:
def msg_dict(data_string):
    # Split the string into key-value pairs
    key_value_pairs = data_string.split(',')

    # Create a dictionary from the key-value pairs
    data_dict = {}
    for pair in key_value_pairs:
        try:
            key, value = pair.split(':')
            data_dict[key] = value
        except:
            key, value = pair.split('e:')
            data_dict[key+'e'] = value

    return data_dict

In [5]:
def kafka_consumer_dataframe():
    consumer_conf = {'bootstrap.servers': 'localhost:9092', 
                     'group.id': 'processed-data-group',
                     'auto.offset.reset': 'earliest' }
    c = Consumer(consumer_conf)
    c.subscribe(['csv_topic'])
    list_msg=list()
    try:
        while True:
            msg = c.poll(1.0)
            if msg is None:
                continue
            if msg.error():
                if msg.error().code() == KafkaException._PARTITION_EOF:
                    continue
                else:
                    print(msg.error())
                    break

            list_msg.append(msg_dict(msg.value().decode('utf-8')))
            
            if len(list_msg) % 1000 == 0:
                print(f"Rows Consumed: {len(list_msg)}")
    except KeyboardInterrupt:
        pass

    finally:
        c.close()
    print(f"Total:{len(list_msg)}")
    return pd.DataFrame(list_msg)


In [6]:
df_stream=kafka_consumer_dataframe()

Rows Consumed: 1000
Rows Consumed: 2000
Rows Consumed: 3000
Rows Consumed: 4000
Rows Consumed: 5000
Rows Consumed: 6000
Rows Consumed: 7000
Rows Consumed: 8000
Rows Consumed: 9000
Rows Consumed: 10000
Rows Consumed: 11000
Rows Consumed: 12000
Rows Consumed: 13000
Rows Consumed: 14000
Rows Consumed: 15000
Rows Consumed: 16000
Rows Consumed: 17000
Rows Consumed: 18000
Rows Consumed: 19000
Rows Consumed: 20000
Rows Consumed: 21000
Rows Consumed: 22000
Rows Consumed: 23000
Rows Consumed: 24000
Rows Consumed: 25000
Rows Consumed: 26000
Rows Consumed: 27000
Rows Consumed: 28000
Rows Consumed: 29000
Rows Consumed: 30000
Rows Consumed: 31000
Rows Consumed: 32000
Rows Consumed: 33000
Rows Consumed: 34000
Rows Consumed: 35000
Rows Consumed: 36000
Rows Consumed: 37000
Rows Consumed: 38000
Rows Consumed: 39000
Rows Consumed: 40000
Rows Consumed: 41000
Rows Consumed: 42000
Rows Consumed: 43000
Rows Consumed: 44000
Rows Consumed: 45000
Rows Consumed: 46000
Rows Consumed: 47000
Rows Consumed: 48000
R

In [7]:
df_stream

Unnamed: 0,Datetime,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,DiffuseFlows,PowerConsumption_Zone1,PowerConsumption_Zone2,PowerConsumption_Zone3
0,1/1/2017 0:00,6.559,73.8,0.083,0.051,0.119,34055.6962,16128.87538,20240.96386
1,1/1/2017 0:10,6.414,74.5,0.083,0.07,0.085,29814.68354,19375.07599,20131.08434
2,1/1/2017 0:20,6.313,74.5,0.08,0.062,0.1,29128.10127,19006.68693,19668.43373
3,1/1/2017 0:30,6.121,75,0.083,0.091,0.096,28228.86076,18361.09422,18899.27711
4,1/1/2017 0:40,5.921,75.7,0.081,0.048,0.085,27335.6962,17872.34043,18442.40964
...,...,...,...,...,...,...,...,...,...
52411,12/30/2017 23:10,7.01,72.4,0.08,0.04,0.096,31160.45627,26857.3182,14780.31212
52412,12/30/2017 23:20,6.947,72.6,0.082,0.051,0.093,30430.41825,26124.57809,14428.81152
52413,12/30/2017 23:30,6.9,72.8,0.086,0.084,0.074,29590.87452,25277.69254,13806.48259
52414,12/30/2017 23:40,6.758,73,0.08,0.066,0.089,28958.1749,24692.23688,13512.60504
