In [None]:
# import statements
import uuid
from time import sleep
import datetime
from kafka import KafkaProducer
import random
import copy as copy_util


def publish_message(producer_instance, topic, key, value):
    """
    Publish message to Kafka
    :param key: Should be unique to achieve parallelism
    :param value: record to send
    """
    try:
        producer_instance.send(topic, value=value, key=key)
        print('Message published successfully. ' + value)
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))


def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda x: x.encode('utf-8'),
                                  key_serializer=lambda x: x.encode('utf-8'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer


def get_record(header, data_row):
    """
    Construct the record to send as a json
    :param header: A list of field names that denote the attributes of the record.
    :param data_row: A list of data.
    :return: formatted list of records
    """
    if len(header) != len(data_row):
        raise ValueError('Data might be corrupted -> header length does not match record length')

    record = {}
    for index, attribute in enumerate(header):
        attribute = attribute.strip()
        if attribute == "latitude" or attribute == "longitude" or attribute == "relative_humidity" or attribute == "windspeed_knots" or attribute == "max_wind_speed":
            record[attribute] = float(data_row[index])
        elif attribute == "air_temperature_celcius":
            record[attribute] = int(data_row[index])
        elif attribute == "precipitation":
            record[attribute] = str(data_row[index])
        else:
            raise ValueError("Failed to catch all the data fields")

    return record


def parse_file(file_location):
    """
    Get raw data from the file and parse it to a list of dictionary

    :param file_location: CSV file where the new table is derived from

    :return: A list of dictionary that represents the records
    """

    data_to_insert = []  # a list of hotspot records to insert to the database

    hotspot_AQUA_file = open(file_location)

    file_as_list = hotspot_AQUA_file.readlines()
    header = file_as_list[0].replace("\n", "").split(",")  # get a list of field name

    for index, raw_data in enumerate(file_as_list[1:]):  # loop all rows except for the header
        data_row = raw_data.replace("\n", "").split(",")
        data_to_insert.append(get_record(header, data_row))

    return data_to_insert


def start_streaming(producer, data_list, topic, interval, sender_id):
    """
    Initiate the streaming process by sending records from the given list to kafka in an arbitrary order
    :param producer: Instance of the kafka server
    :param data_list: A list of records to stream. Records are randomly selected to push to kafka
    :param topic: The topic of the record
    :param interval: The time interval of streaming data arrival
    :param sender_id: A id for the consumer to identify the producers
    """
    max_index_of_list = len(data_list)
    while True:
        index_of_list = random.randrange(0, max_index_of_list)
        selected_record = copy_util.copy(data_list[index_of_list])

        selected_record["sender_id"] = sender_id
        selected_record["created_time"] = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

        ''' push the data to kafka '''
        publish_message(producer_instance=producer, topic=topic, key=str(uuid.uuid4()), value=str(selected_record))

        sleep(interval)



print("Publishing climate steaming...")

# get data from the file
data_list_to_stream = parse_file("/Users/frank/Desktop/Sem3/FIT5148/Assignment/data/climate_streaming.csv")

producer1 = connect_kafka_producer()  # connect to kafka as a producer

start_streaming(producer1, data_list_to_stream, "temperature_analysis", 5, "climate")  # initiate the streaming process



Publishing climate steaming...
Message published successfully. {'latitude': -36.0856, 'longitude': 144.233, 'air_temperature_celcius': 11, 'relative_humidity': 43.2, 'windspeed_knots': 5.9, 'max_wind_speed': 14.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:38:40'}
Message published successfully. {'latitude': -37.425, 'longitude': 148.107, 'air_temperature_celcius': 15, 'relative_humidity': 41.2, 'windspeed_knots': 13.8, 'max_wind_speed': 16.9, 'precipitation': ' 0.00G', 'sender_id': 'climate', 'created_time': '2019-05-24T10:38:45'}
Message published successfully. {'latitude': -37.856, 'longitude': 143.416, 'air_temperature_celcius': 15, 'relative_humidity': 53.7, 'windspeed_knots': 6.8, 'max_wind_speed': 11.1, 'precipitation': ' 0.51G', 'sender_id': 'climate', 'created_time': '2019-05-24T10:38:50'}
Message published successfully. {'latitude': -36.6833, 'longitude': 141.6347, 'air_temperature_celcius': 15, 'relative_humidity': 56.2, 'windspeed_knot

Message published successfully. {'latitude': -37.3583, 'longitude': 143.0203, 'air_temperature_celcius': 13, 'relative_humidity': 43.3, 'windspeed_knots': 9.4, 'max_wind_speed': 14.0, 'precipitation': ' 0.00G', 'sender_id': 'climate', 'created_time': '2019-05-24T10:41:10'}
Message published successfully. {'latitude': -38.231, 'longitude': 147.172, 'air_temperature_celcius': 24, 'relative_humidity': 61.6, 'windspeed_knots': 7.7, 'max_wind_speed': 14.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:41:15'}
Message published successfully. {'latitude': -37.091, 'longitude': 145.362, 'air_temperature_celcius': 12, 'relative_humidity': 48.8, 'windspeed_knots': 4.2, 'max_wind_speed': 7.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:41:20'}
Message published successfully. {'latitude': -37.758, 'longitude': 144.693, 'air_temperature_celcius': 20, 'relative_humidity': 58.8, 'windspeed_knots': 11.5, 'max_wind_speed': 15.9, 

Message published successfully. {'latitude': -36.0856, 'longitude': 144.233, 'air_temperature_celcius': 11, 'relative_humidity': 43.2, 'windspeed_knots': 5.9, 'max_wind_speed': 14.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:43:45'}
Message published successfully. {'latitude': -38.226, 'longitude': 147.167, 'air_temperature_celcius': 10, 'relative_humidity': 43.7, 'windspeed_knots': 7.2, 'max_wind_speed': 11.1, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:43:50'}
Message published successfully. {'latitude': -35.962, 'longitude': 143.791, 'air_temperature_celcius': 14, 'relative_humidity': 49.9, 'windspeed_knots': 6.8, 'max_wind_speed': 15.9, 'precipitation': ' 0.00G', 'sender_id': 'climate', 'created_time': '2019-05-24T10:43:55'}
Message published successfully. {'latitude': -36.851, 'longitude': 148.117, 'air_temperature_celcius': 9, 'relative_humidity': 45.3, 'windspeed_knots': 2.5, 'max_wind_speed': 6.0, 'pr

Message published successfully. {'latitude': -37.368, 'longitude': 148.05, 'air_temperature_celcius': 10, 'relative_humidity': 41.4, 'windspeed_knots': 9.4, 'max_wind_speed': 14.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:46:20'}
Message published successfully. {'latitude': -38.0427, 'longitude': 141.0271, 'air_temperature_celcius': 19, 'relative_humidity': 52.9, 'windspeed_knots': 8.1, 'max_wind_speed': 15.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:46:25'}
Message published successfully. {'latitude': -37.3847, 'longitude': 142.8935, 'air_temperature_celcius': 20, 'relative_humidity': 43.6, 'windspeed_knots': 10.3, 'max_wind_speed': 15.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:46:30'}
Message published successfully. {'latitude': -37.1875, 'longitude': 146.8024, 'air_temperature_celcius': 28, 'relative_humidity': 58.3, 'windspeed_knots': 9.3, 'max_wind_speed': 15

Message published successfully. {'latitude': -37.609, 'longitude': 149.32, 'air_temperature_celcius': 16, 'relative_humidity': 48.3, 'windspeed_knots': 9.4, 'max_wind_speed': 14.0, 'precipitation': ' 0.01G', 'sender_id': 'climate', 'created_time': '2019-05-24T10:48:55'}
Message published successfully. {'latitude': -37.477, 'longitude': 148.097, 'air_temperature_celcius': 8, 'relative_humidity': 42.6, 'windspeed_knots': 2.0, 'max_wind_speed': 6.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:49:00'}
Message published successfully. {'latitude': -37.0884, 'longitude': 141.0357, 'air_temperature_celcius': 12, 'relative_humidity': 40.4, 'windspeed_knots': 5.9, 'max_wind_speed': 8.9, 'precipitation': ' 0.00G', 'sender_id': 'climate', 'created_time': '2019-05-24T10:49:05'}
Message published successfully. {'latitude': -36.6511, 'longitude': 143.915, 'air_temperature_celcius': 13, 'relative_humidity': 46.2, 'windspeed_knots': 5.6, 'max_wind_speed': 12.0, 'pr

Message published successfully. {'latitude': -36.942, 'longitude': 143.292, 'air_temperature_celcius': 18, 'relative_humidity': 51.1, 'windspeed_knots': 8.1, 'max_wind_speed': 15.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:51:30'}
Message published successfully. {'latitude': -34.282, 'longitude': 142.121, 'air_temperature_celcius': 15, 'relative_humidity': 49.1, 'windspeed_knots': 9.6, 'max_wind_speed': 16.9, 'precipitation': ' 0.01G', 'sender_id': 'climate', 'created_time': '2019-05-24T10:51:35'}
Message published successfully. {'latitude': -36.2212, 'longitude': 143.1666, 'air_temperature_celcius': 11, 'relative_humidity': 44.2, 'windspeed_knots': 5.8, 'max_wind_speed': 9.9, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:51:40'}
Message published successfully. {'latitude': -35.6374, 'longitude': 142.3787, 'air_temperature_celcius': 14, 'relative_humidity': 41.6, 'windspeed_knots': 13.1, 'max_wind_speed': 18.1

Message published successfully. {'latitude': -37.477, 'longitude': 148.097, 'air_temperature_celcius': 8, 'relative_humidity': 42.6, 'windspeed_knots': 2.0, 'max_wind_speed': 6.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:54:06'}
Message published successfully. {'latitude': -37.4437, 'longitude': 143.4924, 'air_temperature_celcius': 13, 'relative_humidity': 50.7, 'windspeed_knots': 6.1, 'max_wind_speed': 13.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:54:11'}
Message published successfully. {'latitude': -37.605, 'longitude': 149.326, 'air_temperature_celcius': 23, 'relative_humidity': 49.6, 'windspeed_knots': 10.0, 'max_wind_speed': 19.0, 'precipitation': ' 0.00I', 'sender_id': 'climate', 'created_time': '2019-05-24T10:54:16'}
Message published successfully. {'latitude': -35.6374, 'longitude': 142.3787, 'air_temperature_celcius': 14, 'relative_humidity': 41.6, 'windspeed_knots': 13.1, 'max_wind_speed': 18.1