## **Task 1.2: Producer 2**

In [1]:
from time import sleep
from kafka3 import KafkaProducer
import random
import pandas as pd
from datetime import time
import json


ip_address = "10.192.68.151" 

topic_name = 'climate_hotspot'  #use the same topic for all producers
producer_identifier = "producer_2"


def prepare_streaming_data():
        
    #read csv file and extract rows
    hotspot_aqua_csv = pd.read_csv("hotspot_AQUA_streaming.csv").iterrows()

    hotspot_aqua_data = []

    #get all streaming data rows as dictionaries (json) and store them
    for id, data_row in hotspot_aqua_csv:
        row_dict = data_row.to_dict()
        hotspot_aqua_data.append(row_dict)

    return hotspot_aqua_data

#add additional attributes to each data document
def add_additional_data(data_document):

    #generate random time
    hour = random.randint(0, 23)
    minutes = random.randint(0, 59)
    seconds = random.randint(0, 59)
    random_time = time(hour, minutes, seconds)

    #add random time and producer identifier label
    data_document["created_time"] = random_time.strftime("%H:%M:%S")    #convert to string to make it json serialisable
    data_document["producer_identifier"] = producer_identifier

    return data_document


def publish_message(producer_instance, kafka_topic, data):
    try:
        producer_instance.send(kafka_topic, value=data)  #send the message to the specified topic
        producer_instance.flush()  #make sure that the message is actually sent to Kafka before moving on

        print('Success publishing message. Data: ' + str(data))
        
    except Exception as e:
        print('Error! Message could not be published')
        print(str(e))
        
def connect_kafka_producer():
    _producer = None
    try:
        #create producer at given address port
        _producer = KafkaProducer(bootstrap_servers=[f'{ip_address}:9092'],
                                  value_serializer=lambda x: json.dumps(x).encode('ascii'),  #encode as bytes because Kafka messages are sent as byte arrays
                                  api_version=(0, 10))
        
    except Exception as e:
        print('Could not create producer. Error connecting to Kafka.')
        print(str(e))

    finally:
        return _producer
    
if __name__ == '__main__':
   
    streaming_climate_data = prepare_streaming_data()  #get all streaming data as dictionary objects (like json)
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    while True:
        #publish data in random intervals (1 to 20 seconds)
        publish_interval = random.randint(1, 20)  

        #choose document randomly
        random_doc_id = random.randint(0, len(streaming_climate_data)-1) 
        random_data_doc = streaming_climate_data[random_doc_id]

        data_doc = add_additional_data(random_data_doc)
        publish_message(producer, topic_name, data_doc)  #publish chosen data document

        sleep(publish_interval)  

Publishing records..
Success publishing message. Data: {'latitude': -37.7126, 'longitude': 141.6103, 'confidence': 76.0, 'surface_temperature_celcius': 50.0, 'created_time': '04:35:37', 'producer_identifier': 'producer_2'}
Success publishing message. Data: {'latitude': -36.6248, 'longitude': 142.2042, 'confidence': 71.0, 'surface_temperature_celcius': 46.0, 'created_time': '14:58:08', 'producer_identifier': 'producer_2'}
Success publishing message. Data: {'latitude': -36.9175, 'longitude': 142.6639, 'confidence': 77.0, 'surface_temperature_celcius': 50.0, 'created_time': '22:08:19', 'producer_identifier': 'producer_2'}
Success publishing message. Data: {'latitude': -37.34, 'longitude': 149.3668, 'confidence': 67.0, 'surface_temperature_celcius': 74.0, 'created_time': '09:06:35', 'producer_identifier': 'producer_2'}
Success publishing message. Data: {'latitude': -37.8167, 'longitude': 142.1718, 'confidence': 86.0, 'surface_temperature_celcius': 61.0, 'created_time': '03:41:47', 'produce

KeyboardInterrupt: 