# Task 1. Processing Data Stream (45%)
## Streaming Application

In [1]:
import json
from pymongo import MongoClient
from pyspark.sql import SparkSession
import geohash2
from pprint import pprint
import csv
from datetime import datetime
from collections import defaultdict

import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.3.0,org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.0 pyspark-shell'

host_ip = "192.168.100.7"

client = MongoClient(host_ip, 27017)

db = client["fit3182_asgn3"]
db.stream_data.drop()
collection = db["stream_data"]

In [2]:
spark = (
    SparkSession.builder
    .master('local[*]')
    .appName('Streaming Climate and Hotspot Data')
    .getOrCreate()
)

In [3]:
topic = 'Climate_Hotspot_Stream'

In [4]:
kafka_sdf = (
    spark.readStream
    .format('kafka')
    .option('kafka.bootstrap.servers', f'{host_ip}:9092')
    .option('subscribe', topic)
    .option('startingOffsets', 'latest') #start consuming messages from the latest offset available in the topic when the streaming query starts
    .load()
)

In [5]:
climate_sdf = kafka_sdf.select('value')

In [6]:
def process_batch(df, epoch_id):
    
    # Collect streaming data
    print("Batch " + str(epoch_id))
    batch_data_list = []
    data = df.collect()

    # Loop through streaming dataframe and decode data
    for row in data:
        json_dict = json.loads(row.value.decode("utf-8"))
        json_data = json.dumps(json_dict)
        batch_data_list.append(json_data)

    # Compute geohash of precision 3 and 5
    result = []
    for json_str in batch_data_list:
        data = json.loads(json_str)
        latitude = data['latitude']
        longitude = data['longitude']
        geohash3 = geohash2.encode(float(latitude), float(longitude), precision=3)
        geohash5 = geohash2.encode(float(latitude), float(longitude), precision=5)
        data['geohash3'] = geohash3
        data['geohash5'] = geohash5
        result.append(data)

    # Append data as json object into geohash_result json document
    geohash_result = [json.dumps(data) for data in result]

    climate_geohash3 = ''
    climate_record = {}
    fire_cause = ''

    # Loop through all json strings in geohash_result
    for json_str in geohash_result:
        
        # Load json string as json object
        data = json.loads(json_str)
        
        # If data is from climate_streaming dataset
        if data['producer'] == 'Climate':
            
            # Climate record's geohash3 value
            climate_geohash3 = data['geohash3'] 
            
            # Get the current climate record
            climate_record = data
            
            # Check fire cause
            if int(climate_record['air_temperature_celcius']) > 20 and int(climate_record['GHI_w/m2']) > 180:
                fire_cause ='natural'
            else:
                fire_cause ='other'
            
            # Break if found one climate record as we only get one per batch
            break

    # If at least one climate record exists in the batch
    if climate_geohash3 != '':
        
        # Add hotspot list for the climate record
        climate_record['hotspots'] = []

        # If producer is not Climate, append the hotspot record (from producer AQUA and TERRA) into hotspot list
        for json_str in geohash_result:
            data = json.loads(json_str)
            if data['producer'] != "Climate":
                climate_record['hotspots'].append(data)
    
    # Check if climate record exists
    if climate_geohash3 != '':
        grouped_hotspots = defaultdict(list)
        
        # if climate's hotspot list is not empty, append all hotspots with the same geohash3 as the climate record,
        # group the hotspots with the same geohash5 together
        for hotspot in climate_record['hotspots']:
            if hotspot['geohash3']==climate_geohash3:
                geohash5 = hotspot['geohash5']
                grouped_hotspots[geohash5].append(hotspot)

        # Add into fire_event list if hotspot has the same geohash3 value as climate, 
        # for hotspot records that has the same geohash5 value, we assume that they are the same fire event, 
        # hence get their average (for confidence and surface_temperature_celsius) and 
        # get the last record's latitude, llongitude and datetime
        # finallt append to the fire_event list
        fire_event = []
        for geohash5, hotspots in grouped_hotspots.items():
            if len(hotspots) > 1:
                total_confidence = 0
                total_surface_temperature = 0
                for hotspot in hotspots:
                    total_confidence += int(hotspot['confidence'])
                    total_surface_temperature += int(hotspot['surface_temperature_celcius'])

                average_confidence = total_confidence / len(hotspots)
                average_surface_temperature = total_surface_temperature / len(hotspots)
                
                fire_event.append({
                    'hotspot_geohash5': geohash5,
                    'confidence': average_confidence,
                    'surface_temperature_celcius': average_surface_temperature,
                    'latitude': hotspots[-1]['latitude'],
                    'longitude': hotspots[-1]['longitude'],
                    'datetime': hotspots[-1]['datetime'],
                    'fire_cause': fire_cause
                })
            else:
                fire_event.append({
                    'hotspot_geohash5': geohash5,
                    'confidence': hotspots[0]['confidence'], #hotspots[0] is used to access the first hotspot document in the group when there is only one hotspot with a specific geohash5 value.
                    'surface_temperature_celcius': hotspots[0]['surface_temperature_celcius'],
                    'latitude': hotspots[0]['latitude'],
                    'longitude': hotspots[0]['longitude'],
                    'datetime': hotspots[0]['datetime'],
                    'fire_cause': fire_cause
                })

        climate_record['fire_event'] = fire_event

    pprint(climate_record)
    
    # Insert into mongoDB
    if climate_record != {}:
        collection.insert_one(climate_record)


In [7]:
writer = (
    climate_sdf.writeStream.format("Console")
    .option("checkpointLocation", "./climate_sdf_checkpoints")
    .outputMode('append')
    .trigger(processingTime='10 seconds')
    .foreachBatch(process_batch)
)

In [8]:
try:
    query = writer.start()
    query.awaitTermination()
except KeyboardInterrupt:
    print('Interrupted by CTRL-C. Stopping query.')
finally:
    query.stop()

Batch 3789
{'GHI_w/m2': '200',
 'air_temperature_celcius': '24',
 'date': '18/01/2023',
 'fire_event': [{'confidence': 81.91666666666667,
                 'datetime': '2023-01-08T14:24:00',
                 'fire_cause': 'natural',
                 'hotspot_geohash5': 'r364j',
                 'latitude': '-37.605',
                 'longitude': '149.302',
                 'surface_temperature_celcius': 55.083333333333336},
                {'confidence': 64.6,
                 'datetime': '2023-01-06T19:12:00',
                 'fire_cause': 'natural',
                 'hotspot_geohash5': 'r361v',
                 'latitude': '-37.623',
                 'longitude': '149.323',
                 'surface_temperature_celcius': 49.8},
                {'confidence': 75.25,
                 'datetime': '2023-01-06T14:24:00',
                 'fire_cause': 'natural',
                 'hotspot_geohash5': 'r361y',
                 'latitude': '-37.658',
                 'longitude': '149.339',


Batch 3790
{'GHI_w/m2': '154',
 'air_temperature_celcius': '19',
 'date': '10/01/2023',
 'fire_event': [{'confidence': 85.71428571428571,
                 'datetime': '2023-01-09T19:12:00',
                 'fire_cause': 'other',
                 'hotspot_geohash5': 'r1xct',
                 'latitude': '-36.291',
                 'longitude': '146.159',
                 'surface_temperature_celcius': 52.42857142857143},
                {'confidence': '84',
                 'datetime': '2023-01-08T14:24:00',
                 'fire_cause': 'other',
                 'hotspot_geohash5': 'r1xcw',
                 'latitude': '-36.277',
                 'longitude': '146.165',
                 'surface_temperature_celcius': '58'}],
 'geohash3': 'r1x',
 'geohash5': 'r1xct',
 'hotspots': [{'confidence': '81',
               'datetime': '2023-01-08T19:12:00',
               'geohash3': 'r36',
               'geohash5': 'r361u',
               'latitude': '-37.627',
               'longitude': 

Batch 3793
{'GHI_w/m2': '179',
 'air_temperature_celcius': '22',
 'date': '13/01/2023',
 'fire_event': [],
 'geohash3': 'r1x',
 'geohash5': 'r1xct',
 'hotspots': [{'confidence': '100',
               'datetime': '2023-01-12T04:48:00',
               'geohash3': 'r36',
               'geohash5': 'r364j',
               'latitude': '-37.606',
               'longitude': '149.312',
               'producer': 'AQUA',
               'surface_temperature_celcius': '87'},
              {'confidence': '64',
               'datetime': '2023-01-11T19:12:00',
               'geohash3': 'r36',
               'geohash5': 'r364j',
               'latitude': '-37.598',
               'longitude': '149.29',
               'producer': 'TERRA',
               'surface_temperature_celcius': '32'},
              {'confidence': '64',
               'datetime': '2023-01-12T09:36:00',
               'geohash3': 'r36',
               'geohash5': 'r364j',
               'latitude': '-37.605',
               'l

Batch 3796
{'GHI_w/m2': '136',
 'air_temperature_celcius': '16',
 'date': '16/01/2023',
 'fire_event': [],
 'geohash3': 'r1x',
 'geohash5': 'r1xct',
 'hotspots': [{'confidence': '64',
               'datetime': '2023-01-15T04:48:00',
               'geohash3': 'r1m',
               'geohash5': 'r1mgw',
               'latitude': '-37.332',
               'longitude': '143.375',
               'producer': 'AQUA',
               'surface_temperature_celcius': '46'},
              {'confidence': '87',
               'datetime': '2023-01-14T19:12:00',
               'geohash3': 'r36',
               'geohash5': 'r361v',
               'latitude': '-37.62',
               'longitude': '149.294',
               'producer': 'TERRA',
               'surface_temperature_celcius': '63'},
              {'confidence': '87',
               'datetime': '2023-01-15T09:36:00',
               'geohash3': 'r1q',
               'geohash5': 'r1q1b',
               'latitude': '-37.621',
               'lo

Batch 3799
{'GHI_w/m2': '141',
 'air_temperature_celcius': '16',
 'date': '19/01/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r364n',
 'hotspots': [{'confidence': '68',
               'datetime': '2023-01-18T04:48:00',
               'geohash3': 'r1r',
               'geohash5': 'r1rnz',
               'latitude': '-36.759',
               'longitude': '145.179',
               'producer': 'AQUA',
               'surface_temperature_celcius': '52'},
              {'confidence': '50',
               'datetime': '2023-01-17T19:12:00',
               'geohash3': 'r33',
               'geohash5': 'r333q',
               'latitude': '-37.749',
               'longitude': '148.297',
               'producer': 'TERRA',
               'surface_temperature_celcius': '42'},
              {'confidence': '62',
               'datetime': '2023-01-18T09:36:00',
               'geohash3': 'r1r',
               'geohash5': 'r1rnz',
               'latitude': '-36.758',
               'l

Batch 3802
{'GHI_w/m2': '143',
 'air_temperature_celcius': '17',
 'date': '22/01/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r364n',
 'hotspots': [{'confidence': '89',
               'datetime': '2023-01-21T04:48:00',
               'geohash3': 'r1m',
               'geohash5': 'r1mg9',
               'latitude': '-37.329',
               'longitude': '143.136',
               'producer': 'AQUA',
               'surface_temperature_celcius': '62'},
              {'confidence': '51',
               'datetime': '2023-01-20T19:12:00',
               'geohash3': 'r1n',
               'geohash5': 'r1nqc',
               'latitude': '-38.167',
               'longitude': '143.841',
               'producer': 'TERRA',
               'surface_temperature_celcius': '38'},
              {'confidence': '81',
               'datetime': '2023-01-21T09:36:00',
               'geohash3': 'r1m',
               'geohash5': 'r1mg8',
               'latitude': '-37.34',
               'lo

Batch 3805
{'GHI_w/m2': '139',
 'air_temperature_celcius': '16',
 'date': '25/01/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r365q',
 'hotspots': [{'confidence': '65',
               'datetime': '2023-01-24T04:48:00',
               'geohash3': 'r1w',
               'geohash5': 'r1w4x',
               'latitude': '-36.1',
               'longitude': '143.772',
               'producer': 'AQUA',
               'surface_temperature_celcius': '35'},
              {'confidence': '72',
               'datetime': '2023-01-23T19:12:00',
               'geohash3': 'r1m',
               'geohash5': 'r1mfy',
               'latitude': '-37.467',
               'longitude': '143.351',
               'producer': 'TERRA',
               'surface_temperature_celcius': '47'},
              {'confidence': '92',
               'datetime': '2023-01-24T09:36:00',
               'geohash3': 'r1w',
               'geohash5': 'r1w4w',
               'latitude': '-36.098',
               'lon

Batch 3808
{'GHI_w/m2': '155',
 'air_temperature_celcius': '19',
 'date': '28/01/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r364j',
 'hotspots': [{'confidence': '82',
               'datetime': '2023-01-27T04:48:00',
               'geohash3': 'r1q',
               'geohash5': 'r1q8b',
               'latitude': '-37.805',
               'longitude': '144.15',
               'producer': 'AQUA',
               'surface_temperature_celcius': '55'},
              {'confidence': '86',
               'datetime': '2023-01-26T19:12:00',
               'geohash3': 'r1q',
               'geohash5': 'r1qwb',
               'latitude': '-36.759',
               'longitude': '144.158',
               'producer': 'TERRA',
               'surface_temperature_celcius': '60'},
              {'confidence': '62',
               'datetime': '2023-01-27T09:36:00',
               'geohash3': 'r1q',
               'geohash5': 'r1qev',
               'latitude': '-37.288',
               'lo

Batch 3811
{'GHI_w/m2': '163',
 'air_temperature_celcius': '20',
 'date': '31/01/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r361u',
 'hotspots': [{'confidence': '80',
               'datetime': '2023-01-30T04:48:00',
               'geohash3': 'r33',
               'geohash5': 'r336g',
               'latitude': '-37.466',
               'longitude': '148.143',
               'producer': 'AQUA',
               'surface_temperature_celcius': '46'},
              {'confidence': '65',
               'datetime': '2023-01-29T19:12:00',
               'geohash3': 'r1q',
               'geohash5': 'r1qev',
               'latitude': '-37.296',
               'longitude': '144.386',
               'producer': 'TERRA',
               'surface_temperature_celcius': '54'},
              {'confidence': '85',
               'datetime': '2023-01-30T09:36:00',
               'geohash3': 'r33',
               'geohash5': 'r336f',
               'latitude': '-37.472',
               'l

Batch 3814
{'GHI_w/m2': '150',
 'air_temperature_celcius': '18',
 'date': '03/02/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r365m',
 'hotspots': [{'confidence': '71',
               'datetime': '2023-02-02T00:00:00',
               'geohash3': 'r33',
               'geohash5': 'r3372',
               'latitude': '-37.367',
               'longitude': '148.04',
               'producer': 'AQUA',
               'surface_temperature_celcius': '55'},
              {'confidence': '99',
               'datetime': '2023-02-01T14:24:00',
               'geohash3': 'r1k',
               'geohash5': 'r1k7y',
               'latitude': '-37.293',
               'longitude': '141.245',
               'producer': 'TERRA',
               'surface_temperature_celcius': '85'},
              {'confidence': '71',
               'datetime': '2023-02-02T04:48:00',
               'geohash3': 'r33',
               'geohash5': 'r336c',
               'latitude': '-37.471',
               'lo

Batch 3817
{'GHI_w/m2': '158',
 'air_temperature_celcius': '21',
 'date': '06/02/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r361v',
 'hotspots': [{'confidence': '100',
               'datetime': '2023-02-05T00:00:00',
               'geohash3': 'r33',
               'geohash5': 'r336f',
               'latitude': '-37.453',
               'longitude': '148.111',
               'producer': 'AQUA',
               'surface_temperature_celcius': '54'},
              {'confidence': '69',
               'datetime': '2023-02-04T14:24:00',
               'geohash3': 'r33',
               'geohash5': 'r336f',
               'latitude': '-37.461',
               'longitude': '148.109',
               'producer': 'TERRA',
               'surface_temperature_celcius': '33'},
              {'confidence': '77',
               'datetime': '2023-02-05T04:48:00',
               'geohash3': 'r33',
               'geohash5': 'r336f',
               'latitude': '-37.461',
               '

Batch 3820
{'GHI_w/m2': '239',
 'air_temperature_celcius': '31',
 'date': '09/02/2023',
 'fire_event': [],
 'geohash3': 'r32',
 'geohash5': 'r321b',
 'hotspots': [{'confidence': '79',
               'datetime': '2023-02-08T00:00:00',
               'geohash3': 'r1s',
               'geohash5': 'r1s76',
               'latitude': '-35.957',
               'longitude': '141.088',
               'producer': 'AQUA',
               'surface_temperature_celcius': '49'},
              {'confidence': '87',
               'datetime': '2023-02-07T14:24:00',
               'geohash3': 'r33',
               'geohash5': 'r336f',
               'latitude': '-37.469',
               'longitude': '148.103',
               'producer': 'TERRA',
               'surface_temperature_celcius': '59'},
              {'confidence': '64',
               'datetime': '2023-02-08T04:48:00',
               'geohash3': 'r1r',
               'geohash5': 'r1rm5',
               'latitude': '-37.087',
               'l

Batch 3823
{'GHI_w/m2': '158',
 'air_temperature_celcius': '19',
 'date': '12/02/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r361u',
 'hotspots': [{'confidence': '82',
               'datetime': '2023-02-11T00:00:00',
               'geohash3': 'r33',
               'geohash5': 'r3379',
               'latitude': '-37.336',
               'longitude': '148.073',
               'producer': 'AQUA',
               'surface_temperature_celcius': '38'},
              {'confidence': '89',
               'datetime': '2023-02-10T14:24:00',
               'geohash3': 'r33',
               'geohash5': 'r336f',
               'latitude': '-37.467',
               'longitude': '148.127',
               'producer': 'TERRA',
               'surface_temperature_celcius': '61'},
              {'confidence': '100',
               'datetime': '2023-02-11T04:48:00',
               'geohash3': 'r33',
               'geohash5': 'r3379',
               'latitude': '-37.332',
               '

Batch 3826
{'GHI_w/m2': '122',
 'air_temperature_celcius': '14',
 'date': '15/02/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r364j',
 'hotspots': [{'confidence': '70',
               'datetime': '2023-02-14T00:00:00',
               'geohash3': 'r32',
               'geohash5': 'r32dx',
               'latitude': '-37.497',
               'longitude': '147.266',
               'producer': 'AQUA',
               'surface_temperature_celcius': '37'},
              {'confidence': '100',
               'datetime': '2023-02-13T14:24:00',
               'geohash3': 'r33',
               'geohash5': 'r336f',
               'latitude': '-37.446',
               'longitude': '148.102',
               'producer': 'TERRA',
               'surface_temperature_celcius': '65'},
              {'confidence': '86',
               'datetime': '2023-02-14T04:48:00',
               'geohash3': 'r33',
               'geohash5': 'r3362',
               'latitude': '-37.57',
               'l

Batch 3829
{'GHI_w/m2': '155',
 'air_temperature_celcius': '18',
 'date': '18/02/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r364h',
 'hotspots': [{'confidence': '61',
               'datetime': '2023-02-17T00:00:00',
               'geohash3': 'r33',
               'geohash5': 'r3378',
               'latitude': '-37.347',
               'longitude': '148.019',
               'producer': 'AQUA',
               'surface_temperature_celcius': '36'},
              {'confidence': '100',
               'datetime': '2023-02-16T14:24:00',
               'geohash3': 'r33',
               'geohash5': 'r3374',
               'latitude': '-37.406',
               'longitude': '148.123',
               'producer': 'TERRA',
               'surface_temperature_celcius': '88'},
              {'confidence': '87',
               'datetime': '2023-02-17T04:48:00',
               'geohash3': 'r33',
               'geohash5': 'r3362',
               'latitude': '-37.569',
               '

Batch 3832
{'GHI_w/m2': '130',
 'air_temperature_celcius': '15',
 'date': '21/02/2023',
 'fire_event': [{'confidence': '80',
                 'datetime': '2023-02-20T04:48:00',
                 'fire_cause': 'other',
                 'hotspot_geohash5': 'r1vj5',
                 'latitude': '-34.2695',
                 'longitude': '142.2025',
                 'surface_temperature_celcius': '54'},
                {'confidence': 85.0,
                 'datetime': '2023-02-20T14:24:00',
                 'fire_cause': 'other',
                 'hotspot_geohash5': 'r1vhg',
                 'latitude': '-34.2803',
                 'longitude': '142.1964',
                 'surface_temperature_celcius': 59.0},
                {'confidence': '81',
                 'datetime': '2023-02-20T19:12:00',
                 'fire_cause': 'other',
                 'hotspot_geohash5': 'r1vhu',
                 'latitude': '-34.2782',
                 'longitude': '142.2253',
                 'surface_te

Batch 3835
{'GHI_w/m2': '147',
 'air_temperature_celcius': '18',
 'date': '24/02/2023',
 'fire_event': [],
 'geohash3': 'r36',
 'geohash5': 'r364j',
 'hotspots': [{'confidence': '70',
               'datetime': '2023-02-22T14:24:00',
               'geohash3': 'r33',
               'geohash5': 'r33mt',
               'latitude': '-36.996',
               'longitude': '148.228',
               'producer': 'TERRA',
               'surface_temperature_celcius': '40'},
              {'confidence': '63',
               'datetime': '2023-02-23T00:00:00',
               'geohash3': 'r1u',
               'geohash5': 'r1udj',
               'latitude': '-34.793',
               'longitude': '141.5898',
               'producer': 'AQUA',
               'surface_temperature_celcius': '42'},
              {'confidence': '77',
               'datetime': '2023-02-23T04:48:00',
               'geohash3': 'r1u',
               'geohash5': 'r1udn',
               'latitude': '-34.8006',
               

Batch 3838
{'GHI_w/m2': '173',
 'air_temperature_celcius': '21',
 'date': '27/02/2023',
 'fire_event': [],
 'geohash3': 'r1q',
 'geohash5': 'r1q8h',
 'hotspots': [{'confidence': '61',
               'datetime': '2023-02-26T00:00:00',
               'geohash3': 'r1j',
               'geohash5': 'r1juz',
               'latitude': '-38.5233',
               'longitude': '143.433',
               'producer': 'AQUA',
               'surface_temperature_celcius': '41'},
              {'confidence': '81',
               'datetime': '2023-02-25T14:24:00',
               'geohash3': 'r1v',
               'geohash5': 'r1vhu',
               'latitude': '-34.2782',
               'longitude': '142.2253',
               'producer': 'TERRA',
               'surface_temperature_celcius': '54'},
              {'confidence': '83',
               'datetime': '2023-02-26T04:48:00',
               'geohash3': 'r1m',
               'geohash5': 'r1mbx',
               'latitude': '-37.8662',
             

Batch 3841
{'GHI_w/m2': '169',
 'air_temperature_celcius': '21',
 'date': '02/03/2023',
 'fire_event': [{'confidence': 61.0,
                 'datetime': '2023-03-01T09:36:00',
                 'fire_cause': 'other',
                 'hotspot_geohash5': 'r1ryu',
                 'latitude': '-36.779',
                 'longitude': '146.108',
                 'surface_temperature_celcius': 32.0}],
 'geohash3': 'r1r',
 'geohash5': 'r1rjf',
 'hotspots': [{'confidence': '74',
               'datetime': '2023-02-28T14:24:00',
               'geohash3': 'r1k',
               'geohash5': 'r1kfm',
               'latitude': '-37.5537',
               'longitude': '141.9264',
               'producer': 'TERRA',
               'surface_temperature_celcius': '63'},
              {'confidence': '53',
               'datetime': '2023-03-01T00:00:00',
               'geohash3': 'r1k',
               'geohash5': 'r1kt1',
               'latitude': '-37.062',
               'longitude': '141.373',
   

Batch 3844
{'GHI_w/m2': '170',
 'air_temperature_celcius': '22',
 'date': '05/03/2023',
 'fire_event': [],
 'geohash3': 'r1p',
 'geohash5': 'r1pxf',
 'hotspots': [{'confidence': '100',
               'datetime': '2023-03-04T00:00:00',
               'geohash3': 'r1w',
               'geohash5': 'r1w1n',
               'latitude': '-36.3799',
               'longitude': '143.7149',
               'producer': 'AQUA',
               'surface_temperature_celcius': '89'},
              {'confidence': '69',
               'datetime': '2023-03-03T14:24:00',
               'geohash3': 'r30',
               'geohash5': 'r30td',
               'latitude': '-38.4031',
               'longitude': '147.0701',
               'producer': 'TERRA',
               'surface_temperature_celcius': '45'},
              {'confidence': '70',
               'datetime': '2023-03-04T04:48:00',
               'geohash3': 'r1m',
               'geohash5': 'r1mpj',
               'latitude': '-36.7179',
           

Batch 3847
{'GHI_w/m2': '161',
 'air_temperature_celcius': '19',
 'date': '08/03/2023',
 'fire_event': [],
 'geohash3': 'r1n',
 'geohash5': 'r1nhc',
 'hotspots': [{'confidence': '86',
               'datetime': '2023-03-06T19:12:00',
               'geohash3': 'r1m',
               'geohash5': 'r1mr5',
               'latitude': '-36.701',
               'longitude': '142.538',
               'producer': 'AQUA',
               'surface_temperature_celcius': '56'},
              {'confidence': '78',
               'datetime': '2023-03-06T09:36:00',
               'geohash3': 'r1m',
               'geohash5': 'r1mq7',
               'latitude': '-36.834',
               'longitude': '142.524',
               'producer': 'TERRA',
               'surface_temperature_celcius': '44'},
              {'confidence': '76',
               'datetime': '2023-03-07T00:00:00',
               'geohash3': 'r1m',
               'geohash5': 'r1mrv',
               'latitude': '-36.5775',
               '

Batch 3850
{'GHI_w/m2': '149',
 'air_temperature_celcius': '19',
 'date': '11/03/2023',
 'fire_event': [{'confidence': '74',
                 'datetime': '2023-03-10T14:24:00',
                 'fire_cause': 'other',
                 'hotspot_geohash5': 'r1q2r',
                 'latitude': '-37.9243',
                 'longitude': '144.1064',
                 'surface_temperature_celcius': '48'}],
 'geohash3': 'r1q',
 'geohash5': 'r1q40',
 'hotspots': [{'confidence': '71',
               'datetime': '2023-03-09T19:12:00',
               'geohash3': 'r1k',
               'geohash5': 'r1kxq',
               'latitude': '-36.6851',
               'longitude': '141.6125',
               'producer': 'AQUA',
               'surface_temperature_celcius': '46'},
              {'confidence': '66',
               'datetime': '2023-03-09T09:36:00',
               'geohash3': 'r1m',
               'geohash5': 'r1meq',
               'latitude': '-37.3583',
               'longitude': '143.0203',


Batch 3853
{'GHI_w/m2': '154',
 'air_temperature_celcius': '20',
 'date': '14/03/2023',
 'fire_event': [],
 'geohash3': 'r1n',
 'geohash5': 'r1nqb',
 'hotspots': [{'confidence': '68',
               'datetime': '2023-03-12T09:36:00',
               'geohash3': 'r1s',
               'geohash5': 'r1s9u',
               'latitude': '-36.2111',
               'longitude': '141.505',
               'producer': 'TERRA',
               'surface_temperature_celcius': '44'},
              {'confidence': '65',
               'datetime': '2023-03-12T19:12:00',
               'geohash3': 'r1t',
               'geohash5': 'r1tu2',
               'latitude': '-35.779',
               'longitude': '143.1057',
               'producer': 'AQUA',
               'surface_temperature_celcius': '42'},
              {'confidence': '53',
               'datetime': '2023-03-12T14:24:00',
               'geohash3': 'r1m',
               'geohash5': 'r1mru',
               'latitude': '-36.5794',
              

ERROR:root:KeyboardInterrupt while sending command.
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/py4j/java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "/opt/conda/lib/python3.8/site-packages/py4j/clientserver.py", line 511, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "/opt/conda/lib/python3.8/socket.py", line 669, in readinto
    return self._sock.recv_into(b)
KeyboardInterrupt


Interrupted by CTRL-C. Stopping query.


In [9]:
# Print all documents in hist_data collection for checking (if required)
# cursor = collection.find({})
# for document in cursor: 
#     pprint(document)