##### **Getting the latest date from the climate collection in mongodb:**

In [2]:
from pymongo import MongoClient
from datetime import datetime

#setup mongo client
ip_address = '10.192.68.151'
mongo_client = MongoClient(ip_address, 27017)

#create or get database 
db = mongo_client["fit3182_assignment_db"]

#create collection for climate data
climate_col = db.climates_historic

#create pipeline to get all dates from climate collection
pipeline_stages = [{"$project": {"date": 1}}] 
latest_climate_doc = climate_col.aggregate(pipeline_stages)   

dates = []  #list of all dates from climate collection

for doc in latest_climate_doc:
    date_object = datetime.strptime(doc["date"], "%d/%m/%Y")  #convert into datetime object so it can be sorted
    dates.append(date_object)
  
dates.sort(reverse=True) #sort dates in descending order
latest_date = dates[0].date()

print (latest_date)

2024-01-01


## **Task 1.1: Producer 1**

In [3]:
from time import sleep
from kafka3 import KafkaProducer
import random
import pandas as pd
from datetime import timedelta as td
import json


ip_address = "10.192.68.151" 

new_date = latest_date
topic_name = 'climate_hotspot'  #use the same topic for all producers
producer_identifier = "producer_1"


def prepare_streaming_data():
        
    #read csv file and extract rows
    streaming_data_csv = pd.read_csv("climate_streaming.csv").iterrows()

    streaming_climate_data = []

    #get all streaming data rows as dictionaries (json) and store them
    for id, data_row in streaming_data_csv:
        row_dict = data_row.to_dict()
        streaming_climate_data.append(row_dict)

    return streaming_climate_data

#add additional attributes to each data document
def add_additional_data(data_document):
    global new_date  #ensure date is incremented sequentially by using global variable

    new_date = new_date + td(days=1)  #increment date by 1 

    #add created date and producer identifier label
    data_document["date"] = new_date.strftime("%Y-%m-%d")    #convert to string to make it json serialisable
    data_document["producer_identifier"] = producer_identifier

    return data_document


def publish_message(producer_instance, kafka_topic, data):
    try:
        producer_instance.send(kafka_topic, value=data)  #send the message to the specified topic
        producer_instance.flush()  #make sure that the message is actually sent to Kafka before moving on

        print('Success publishing message. Data: ' + str(data))
        
    except Exception as e:
        print('Error! Message could not be published')
        print(str(e))
        
def connect_kafka_producer():
    _producer = None
    try:
        #create producer at given address port
        _producer = KafkaProducer(bootstrap_servers=[f'{ip_address}:9092'],
                                  value_serializer=lambda x: json.dumps(x).encode('ascii'),  #encode as bytes because Kafka messages are sent as byte arrays
                                  api_version=(0, 10))
        
    except Exception as e:
        print('Could not create producer. Error connecting to Kafka.')
        print(str(e))

    finally:
        return _producer
    
if __name__ == '__main__':
   
    streaming_climate_data = prepare_streaming_data()  #get all streaming data as dictionary objects (like json)
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    while True:

        #choose document randomly
        random_doc_id = random.randint(0, len(streaming_climate_data)-1) 
        random_data_doc = streaming_climate_data[random_doc_id]

        data_doc = add_additional_data(random_data_doc)
        publish_message(producer, topic_name, data_doc)  #publish chosen data document

        sleep(10)  #publish data every 10 seconds

Publishing records..
Success publishing message. Data: {'latitude': -36.6851, 'longitude': 141.6125, 'air_temperature_celcius': 17, 'relative_humidity': 59.0, 'windspeed_knots': 9.4, 'max_wind_speed': 15.0, 'precipitation ': ' 0.75G', 'GHI_w/m2': 135, 'date': '2024-01-02', 'producer_identifier': 'producer_1'}
Success publishing message. Data: {'latitude': -37.461, 'longitude': 148.109, 'air_temperature_celcius': 14, 'relative_humidity': 50.7, 'windspeed_knots': 8.6, 'max_wind_speed': 13.0, 'precipitation ': ' 0.12G', 'GHI_w/m2': 119, 'date': '2024-01-03', 'producer_identifier': 'producer_1'}
Success publishing message. Data: {'latitude': -37.634, 'longitude': 149.237, 'air_temperature_celcius': 16, 'relative_humidity': 48.4, 'windspeed_knots': 8.1, 'max_wind_speed': 15.9, 'precipitation ': ' 0.00G', 'GHI_w/m2': 139, 'date': '2024-01-04', 'producer_identifier': 'producer_1'}
Success publishing message. Data: {'latitude': -38.495, 'longitude': 146.944, 'air_temperature_celcius': 8, 'rela

KeyboardInterrupt: 