# FIT3182 Assignment 3 Part B

- Name: Ong Di Sheng
- Student ID: 31109667
- Email: dong0009@student.monash.edu

## Task 1: Processing Data Stream 

### ***a. Event Producer 1*** ###

*Write a python program that loads all the data from climate_streaming.csv and randomly (with replacement) feed the data to the stream every 10 seconds. You will need to append additional information such as producer information to identify the producer and created date.*

In [None]:
# import libraries 
from time import sleep
from json import dumps
from kafka3 import KafkaProducer
import random
import datetime as dt
import pandas as pd
from pymongo import MongoClient

# replace with your own IP address 
hostip = '192.168.1.110'

def publish_message(producer_instance, topic_name, key, value):
    try:
        # encode key and value into bytes
        key_bytes = bytes(key, encoding='utf-8')
        value_bytes = bytes(value, encoding='utf-8')
        
        # publish message using topic name
        producer_instance.send(topic_name, key=key_bytes, value=value_bytes)
        producer_instance.flush()
        print('Message published successfully. Data: ' + str(value) + ' Key: ' + key)
        
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))

def connect_kafka_producer():
    _producer = None
    try:
        # create kafka producer instance 
        _producer = KafkaProducer(bootstrap_servers=[f'{hostip}:9092'],
                                  api_version=(0, 10))
        
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
        
    finally:
        return _producer

def find_latest_date():
    # create connection with mongodb
    # replace with your own IP address and port number
    client = MongoClient(hostip, 27017)
    
    # access collection created in Part A 
    db = client.fit3182_assignment_db
    collection = db.embedded_climate_hotspot
    
    # sort by date in decreasing order 
    # retrieve latest date
    result = collection.find().sort('date', -1).limit(1)
    latest_date = None
    for document in result:
        latest_date = document['date']
    return latest_date

def get_climate_streaming_data():
    # read climate streaming csv file
    climate_streaming = pd.read_csv('climate_streaming.csv')

    # convert to dictionary
    climate_streaming_dict = climate_streaming.to_dict('records')
    
    # preprocessing 
    for climate in climate_streaming_dict:
        
        # remove whitespace in `precipitation `
        climate['precipitation'] = climate['precipitation ']
        del climate['precipitation ']

        # separate `precipitation` into value and flag
        precipitation = climate['precipitation']
        climate['precipitation'] = float(precipitation[:-1])
        climate['precipitation_flag'] = precipitation[-1]
    
    return climate_streaming_dict

if __name__ == '__main__':
    
    # define topic name
    topic = 'climate'
    
    # connect to kafka producer 
    climate_producer = connect_kafka_producer()
    
    # retrieve latest date from db created in Part A
    latest_date = find_latest_date()
    
    # retrieve climate streaming data 
    climate_streaming = get_climate_streaming_data()
    
    print('Publishing records..')
    
    while True:
        
        # generate new date 
        latest_date += dt.timedelta(days=1)
        
        # convert date to string
        latest_date_str = latest_date.strftime('%d-%m-%Y')  
        
        # retrieve random climate data
        rand_climate_data = climate_streaming[random.randrange(len(climate_streaming))]
        
        # include date information 
        rand_climate_data['date'] = latest_date_str
        
        # include producer information as key
        # publish key value pair
        publish_message(climate_producer, topic, 'climate', dumps(rand_climate_data))
        
        # publish data every 10 seconds
        sleep(10)


Publishing records..
Message published successfully. Data: {"latitude": -36.0714, "longitude": 145.7665, "air_temperature_celcius": 18, "relative_humidity": 54.6, "windspeed_knots": 12.0, "max_wind_speed": 25.1, "GHI_w/m2": 148, "precipitation": 0.12, "precipitation_flag": "G", "date": "02-01-2023"} Key: climate
Message published successfully. Data: {"latitude": -36.152, "longitude": 143.578, "air_temperature_celcius": 15, "relative_humidity": 56.1, "windspeed_knots": 5.1, "max_wind_speed": 9.9, "GHI_w/m2": 122, "precipitation": 0.0, "precipitation_flag": "I", "date": "03-01-2023"} Key: climate
Message published successfully. Data: {"latitude": -36.575, "longitude": 146.6668, "air_temperature_celcius": 18, "relative_humidity": 53.6, "windspeed_knots": 7.9, "max_wind_speed": 15.9, "GHI_w/m2": 150, "precipitation": 0.0, "precipitation_flag": "G", "date": "04-01-2023"} Key: climate
Message published successfully. Data: {"latitude": -36.9194, "longitude": 143.6131, "air_temperature_celcius

Message published successfully. Data: {"latitude": -37.461, "longitude": 148.105, "air_temperature_celcius": 10, "relative_humidity": 45.6, "windspeed_knots": 4.5, "max_wind_speed": 7.0, "GHI_w/m2": 89, "precipitation": 0.0, "precipitation_flag": "I", "date": "31-01-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.368, "longitude": 148.05, "air_temperature_celcius": 10, "relative_humidity": 41.4, "windspeed_knots": 9.4, "max_wind_speed": 14.0, "GHI_w/m2": 92, "precipitation": 0.0, "precipitation_flag": "I", "date": "01-02-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.1104, "longitude": 141.828, "air_temperature_celcius": 11, "relative_humidity": 46.2, "windspeed_knots": 10.5, "max_wind_speed": 15.0, "GHI_w/m2": 97, "precipitation": 0.28, "precipitation_flag": "G", "date": "02-02-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.749, "longitude": 148.297, "air_temperature_celcius": 17, "relative_humidity": 5

Message published successfully. Data: {"latitude": -37.758, "longitude": 144.693, "air_temperature_celcius": 20, "relative_humidity": 58.8, "windspeed_knots": 11.5, "max_wind_speed": 15.9, "GHI_w/m2": 159, "precipitation": 0.0, "precipitation_flag": "I", "date": "01-03-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.479, "longitude": 143.358, "air_temperature_celcius": 23, "relative_humidity": 60.6, "windspeed_knots": 10.1, "max_wind_speed": 26.0, "GHI_w/m2": 180, "precipitation": 0.0, "precipitation_flag": "I", "date": "02-03-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.945, "longitude": 144.354, "air_temperature_celcius": 24, "relative_humidity": 55.6, "windspeed_knots": 5.9, "max_wind_speed": 13.0, "GHI_w/m2": 196, "precipitation": 0.0, "precipitation_flag": "I", "date": "03-03-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.583, "longitude": 149.316, "air_temperature_celcius": 25, "relative_humidity

Message published successfully. Data: {"latitude": -37.0669, "longitude": 141.0556, "air_temperature_celcius": 12, "relative_humidity": 47.5, "windspeed_knots": 15.0, "max_wind_speed": 18.1, "GHI_w/m2": 105, "precipitation": 0.2, "precipitation_flag": "G", "date": "30-03-2023"} Key: climate
Message published successfully. Data: {"latitude": -36.8835, "longitude": 142.2098, "air_temperature_celcius": 11, "relative_humidity": 41.6, "windspeed_knots": 7.9, "max_wind_speed": 15.0, "GHI_w/m2": 101, "precipitation": 0.01, "precipitation_flag": "G", "date": "31-03-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.294, "longitude": 141.232, "air_temperature_celcius": 12, "relative_humidity": 49.0, "windspeed_knots": 4.6, "max_wind_speed": 8.9, "GHI_w/m2": 104, "precipitation": 0.0, "precipitation_flag": "G", "date": "01-04-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.59, "longitude": 149.31, "air_temperature_celcius": 19, "relative_humidit

Message published successfully. Data: {"latitude": -36.1, "longitude": 143.767, "air_temperature_celcius": 20, "relative_humidity": 53.5, "windspeed_knots": 7.2, "max_wind_speed": 15.9, "GHI_w/m2": 166, "precipitation": 0.31, "precipitation_flag": "G", "date": "28-04-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.477, "longitude": 143.352, "air_temperature_celcius": 18, "relative_humidity": 50.6, "windspeed_knots": 6.0, "max_wind_speed": 13.0, "GHI_w/m2": 154, "precipitation": 0.0, "precipitation_flag": "G", "date": "29-04-2023"} Key: climate
Message published successfully. Data: {"latitude": -36.2111, "longitude": 141.505, "air_temperature_celcius": 24, "relative_humidity": 55.5, "windspeed_knots": 7.9, "max_wind_speed": 15.0, "GHI_w/m2": 196, "precipitation": 0.0, "precipitation_flag": "I", "date": "30-04-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.245, "longitude": 143.426, "air_temperature_celcius": 10, "relative_humidity":

Message published successfully. Data: {"latitude": -36.098, "longitude": 143.74, "air_temperature_celcius": 17, "relative_humidity": 59.3, "windspeed_knots": 4.2, "max_wind_speed": 7.0, "GHI_w/m2": 134, "precipitation": 0.01, "precipitation_flag": "G", "date": "27-05-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.635, "longitude": 149.303, "air_temperature_celcius": 24, "relative_humidity": 53.4, "windspeed_knots": 9.9, "max_wind_speed": 15.9, "GHI_w/m2": 200, "precipitation": 0.0, "precipitation_flag": "I", "date": "28-05-2023"} Key: climate
Message published successfully. Data: {"latitude": -36.1704, "longitude": 144.0433, "air_temperature_celcius": 11, "relative_humidity": 41.7, "windspeed_knots": 8.7, "max_wind_speed": 19.0, "GHI_w/m2": 101, "precipitation": 0.0, "precipitation_flag": "I", "date": "29-05-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.293, "longitude": 141.245, "air_temperature_celcius": 10, "relative_humidity"

Message published successfully. Data: {"latitude": -34.282, "longitude": 142.121, "air_temperature_celcius": 15, "relative_humidity": 49.1, "windspeed_knots": 9.6, "max_wind_speed": 16.9, "GHI_w/m2": 130, "precipitation": 0.01, "precipitation_flag": "G", "date": "25-06-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.538, "longitude": 148.895, "air_temperature_celcius": 6, "relative_humidity": 37.1, "windspeed_knots": 5.8, "max_wind_speed": 9.9, "GHI_w/m2": 57, "precipitation": 0.0, "precipitation_flag": "I", "date": "26-06-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.976, "longitude": 145.649, "air_temperature_celcius": 22, "relative_humidity": 62.0, "windspeed_knots": 7.9, "max_wind_speed": 11.1, "GHI_w/m2": 170, "precipitation": 0.0, "precipitation_flag": "I", "date": "27-06-2023"} Key: climate
Message published successfully. Data: {"latitude": -37.8276, "longitude": 142.7573, "air_temperature_celcius": 9, "relative_humidity": 

Message published successfully. Data: {"latitude": -37.485, "longitude": 148.095, "air_temperature_celcius": 11, "relative_humidity": 45.4, "windspeed_knots": 5.2, "max_wind_speed": 8.9, "GHI_w/m2": 98, "precipitation": 0.0, "precipitation_flag": "A", "date": "24-07-2023"} Key: climate
Message published successfully. Data: {"latitude": -36.6732, "longitude": 142.5162, "air_temperature_celcius": 19, "relative_humidity": 53.9, "windspeed_knots": 8.3, "max_wind_speed": 14.0, "GHI_w/m2": 158, "precipitation": 0.0, "precipitation_flag": "I", "date": "25-07-2023"} Key: climate
Message published successfully. Data: {"latitude": -35.2881, "longitude": 142.5679, "air_temperature_celcius": 17, "relative_humidity": 38.7, "windspeed_knots": 16.8, "max_wind_speed": 22.9, "GHI_w/m2": 159, "precipitation": 0.0, "precipitation_flag": "I", "date": "26-07-2023"} Key: climate
Message published successfully. Data: {"latitude": -35.937, "longitude": 145.607, "air_temperature_celcius": 10, "relative_humidit