# Part B: Task 1

**(b)**

In [1]:
# import needed library
import json
import pandas as pd
from pprint import pprint
from datetime import datetime, timedelta
import pymongo
from pymongo import MongoClient

#kafka required libraries
from time import sleep
from json import dumps
from kafka3 import KafkaProducer
import random
import datetime as dt

Since the question want us to find the latest date in climate data that was inserted into the MongoDB database i had created in Part A, Task 2, so i will connect to the database first

In [2]:
# Method 1: specify the host and port explicitly
client = MongoClient('192.168.1.27', 27017) # 27017 number of ports


In [3]:
# to get all the databases in client 
result = client.list_database_names()
print(result)

['admin', 'config', 'fit3182_assignment_db', 'fit3182_db', 'local']


In [4]:
# connect to database
db = client.fit3182_assignment_db

In [5]:
# list collections
print(db.list_collection_names())

['climate_hotspot', 'hotspot', 'climate', 'clihot']


In [6]:
# connect to the collection 
cli_hot = db.clihot

In [7]:
#find the latest climate date

data = cli_hot.aggregate([{"$sort": {"date": 1}},{"$limit": 1}, {"$project": {"date": {"$dateToString": {"format": "%Y-%m-%d", "date": "$date"}}}}])
for res in data:
    pprint(res)

{'_id': ObjectId('647f53babeaf2b433c177939'), 'date': '2021-12-31'}


so we can see from the result the latest date would be 31/12/2021

read the data first

In [8]:
terrastream = pd.read_csv("./hotspot_TERRA_streaming.csv")

**Data inspection**<br>
inspect on the data and see how the data looks like, what fields it contains

In [9]:
terrastream.head()

Unnamed: 0,latitude,longitude,confidence,surface_temperature_celcius
0,-37.966,145.051,78,68
1,-35.541,143.311,82,63
2,-35.554,143.307,67,53
3,-35.543,143.316,86,67
4,-37.708,145.1,80,54


**Json format**<br>
convert the csv data into json format

In [10]:
terrastreamjs = terrastream.to_json(orient='records')
terrastreamjs = json.loads(terrastreamjs)

In [11]:
pprint(terrastreamjs)

[{'confidence': 78,
  'latitude': -37.966,
  'longitude': 145.051,
  'surface_temperature_celcius': 68},
 {'confidence': 82,
  'latitude': -35.541,
  'longitude': 143.311,
  'surface_temperature_celcius': 63},
 {'confidence': 67,
  'latitude': -35.554,
  'longitude': 143.307,
  'surface_temperature_celcius': 53},
 {'confidence': 86,
  'latitude': -35.543,
  'longitude': 143.316,
  'surface_temperature_celcius': 67},
 {'confidence': 80,
  'latitude': -37.708,
  'longitude': 145.1,
  'surface_temperature_celcius': 54},
 {'confidence': 65,
  'latitude': -35.646,
  'longitude': 142.282,
  'surface_temperature_celcius': 32},
 {'confidence': 72,
  'latitude': -38.075,
  'longitude': 143.76,
  'surface_temperature_celcius': 46},
 {'confidence': 94,
  'latitude': -37.636,
  'longitude': 149.33,
  'surface_temperature_celcius': 43},
 {'confidence': 69,
  'latitude': -37.624,
  'longitude': 149.332,
  'surface_temperature_celcius': 33},
 {'confidence': 62,
  'latitude': -37.82,
  'longitude': 14

# Kafka producer 

**To publish the message**

**IP address**

In [12]:
host_ip = "192.168.1.27"

**Function to publish the message**

In [13]:
def publish_message(producer_instance, topic_name, key, data):
    try:
        key_bytes = bytes(key, encoding='utf-8')
        producer_instance.send(topic_name, key=key_bytes, value=data)
        producer_instance.flush()
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))



**connect to kafka producer**

In [14]:
def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=[f'{host_ip}:9092'],
                                  # kafka only accept bytes
                                  # convert message to bytes before sending
                                  value_serializer=lambda x:dumps(x).encode('ascii'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer



**basically to run the message publishing**

In [15]:
 # this code will terminate only if we interupt the kernel
if __name__ == '__main__':
   
    #topic = 'terra'
    topic = 'climate'
    
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    current_datetime = datetime.now()
    # identified from previous the lastest date would be 31/12/2021
    new_date = datetime.strptime("2021-12-31", "%Y-%m-%d")
    time_date = new_date
    while True:
        data = random.choice(terrastreamjs)  # Randomly select data from aquastreamjs
        
        if datetime.now() - current_datetime >= timedelta(seconds=2):
            #according to faq, every second = 4.8hours
            time_date = time_date + timedelta(hours=4.8)
        
        # check if it is already after 10 seconds then add one day to new date
        if datetime.now() - current_datetime >= timedelta(seconds=10):
            current_datetime = datetime.now()
            new_date = new_date+timedelta(days=1)
            time_date = new_date
            
        

        data["producer"] = "terra_streaming"
        data["newdate"] = str(new_date)
        data["datetime"] = str(time_date.time())
        print(data)
        publish_message(producer, topic, 'jsondata', data)
        
        sleep(2)
        


Publishing records..
{'latitude': -35.4591, 'longitude': 142.062, 'confidence': 74, 'surface_temperature_celcius': 48, 'producer': 'terra_streaming', 'newdate': '2021-12-31 00:00:00', 'datetime': '00:00:00'}
Message published successfully. Data: {'latitude': -35.4591, 'longitude': 142.062, 'confidence': 74, 'surface_temperature_celcius': 48, 'producer': 'terra_streaming', 'newdate': '2021-12-31 00:00:00', 'datetime': '00:00:00'}
{'latitude': -36.0828, 'longitude': 145.873, 'confidence': 94, 'surface_temperature_celcius': 75, 'producer': 'terra_streaming', 'newdate': '2021-12-31 00:00:00', 'datetime': '04:48:00'}
Message published successfully. Data: {'latitude': -36.0828, 'longitude': 145.873, 'confidence': 94, 'surface_temperature_celcius': 75, 'producer': 'terra_streaming', 'newdate': '2021-12-31 00:00:00', 'datetime': '04:48:00'}
{'latitude': -37.333, 'longitude': 148.099, 'confidence': 94, 'surface_temperature_celcius': 43, 'producer': 'terra_streaming', 'newdate': '2021-12-31 00:0

KeyboardInterrupt: 