In [25]:
# Import library
import time
import datetime
from random import randint
from kafka3 import KafkaProducer
import pandas as pd
import pymongo
from pymongo import MongoClient
import json

In [26]:
# Load TERRA streaming data
terra_streaming = pd.read_csv('../Datasets/hotspot_TERRA_streaming.csv')

In [27]:
# Change the ip address to host ip address
ip_address = "192.168.224.1"

In [28]:
# Get Latest Climate Date
client = MongoClient(ip_address, 27017) 
db = client['A3_db'] # Using A3_db
fire_collection = db['fire_historic'] # Using collection fire_historic
response = fire_collection.find({},{'date':1}).sort('date',-1).limit(1) # Get latest document
latest_time = response[0]['date']

In [29]:
# Function to turn string into ISO date
def date_to_iso(date, date_format='%d/%m/%Y'):
    return datetime.strptime(str(date), date_format)

# Map function to value, if error is raised return None
# If value is classified as null also return None
def val_or_null(value, func, null=None):
    try:
        value = func(value)
        if value == null:
            return None
        else:
            return value
    except:
        return None

In [30]:
def publish_message(producer_instance, topic_name, message):
    try:
        data = message.encode('utf-8')
        producer_instance.send(topic_name, data)
        producer_instance.flush()
        print('Message published successfully. Data:\n ' + str(data) + '\n')
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))

In [32]:
# Create Kafka producer
producer = KafkaProducer(bootstrap_servers=f'{ip_address}:9092', 
                         api_version=(0, 10))

# We will make the data being send every 1/5 days + some random minutes (within 1/5 days)
# This to ensure the hotspot data being send is scattered across different hours
minute_window = (60*24)/5 # Number of minutes in a day / number of time hotspot data being daily

event_time = latest_time

# Iterate over the TERRA data and send messages to Kafka topic
while True:
    # Select row with replacement
    row = terra_streaming.sample(n=1, replace=True) 
    
    # Increment created_time event_time (1/5 day) + some random minutes
    created_time = pd.to_datetime(event_time) + datetime.timedelta(minutes=randint(0,minute_window))
    created_date = created_time.strftime("%Y/%m/%d") # Convert date to string
    created_time = created_time.strftime("%Y/%m/%d %H:%M:%S") # Convert datetime to string
    event_time = event_time + datetime.timedelta(days=1/5) # Increment event_time by 1/5 day 
    
    # Get the value for everything. Get null if value is invalid
    latitude = val_or_null(row.latitude, float)
    longitude = val_or_null(row.longitude, float)
    confidence = val_or_null(row.confidence, float)
    surface_temperature = val_or_null(row.surface_temperature_celcius, float)

    # Produce the document
    document = {
        'date': created_date,
        'datetime': created_time,
        'latitude': latitude,
        'longitude': longitude,
        'confidence': confidence,
        'surface_temperature': surface_temperature,
        'producer': 'Producer3'
    }
    
    # Convert dict (document) into json
    message = json.dumps(document)
    
    # Send message to Kafka topic
    publish_message(producer, 'hotspot_topic', message)
    
    # Sleep for 2 seconds (1/5 day)
    time.sleep(2)

SyntaxError: invalid syntax (777968473.py, line 12)