#### Import Libraries

In [21]:
# !pip install pandas
# !pip install pymongo
# !pip install pprint
import pandas as pd
import pprint
from datetime import datetime
import re
import pymongo
from pymongo import MongoClient

#### Read CSVs

In [22]:
climate_historic = pd.read_csv('../Datasets/climate_historic.csv')
hotspot_historic = pd.read_csv('../Datasets/hotspot_historic.csv')

In [23]:
climate_historic.head()

Unnamed: 0,station,date,air_temperature_celcius,relative_humidity,windspeed_knots,max_wind_speed,precipitation,GHI_w/m2
0,948700,31/12/2021,19,56.8,7.9,11.1,0.00I,154
1,948700,2/1/2022,15,50.7,9.2,13.0,0.02G,128
2,948700,3/1/2022,16,53.6,8.1,15.0,0.00G,133
3,948700,4/1/2022,24,61.6,7.7,14.0,0.00I,186
4,948700,5/1/2022,24,62.3,7.0,13.0,0.00I,185


In [24]:
hotspot_historic.head()

Unnamed: 0,latitude,longitude,datetime,confidence,date,surface_temperature_celcius
0,-37.966,145.051,2022-12-27T04:16:51,78,27/12/2022,68
1,-35.541,143.311,2022-12-27T00:02:15,82,27/12/2022,63
2,-35.554,143.307,2022-12-27T00:02:15,67,27/12/2022,53
3,-35.543,143.316,2022-12-27T00:02:14,86,27/12/2022,67
4,-37.708,145.1,2022-12-25T04:29:08,80,25/12/2022,54


Based on the two data sets provided i.e. hotspot_historic.csv and climate_historic.csv, design a suitable data model to support the efficient querying of the two data sets in MongoDB. Justify your data model design.   
The output of this task should be 
- An example of the data model.
- The justification for choosing that data model.


## Data Model
Based on the two datasets given, embedded document seems to be best choice as we can embed hotspot data within the climate data. Climate data contain document a day, while hotspot data can have more than 1 document a day. It will make a lot of sense, to have multiple hotspot embedded into climate data due to several reason:

1. Simplicity: Embedded data are easier to understand and work with. All the relevant and related data is stored within one document instead of spread out across multiple documents. This make query and manipulation on the data cleaner and simpler.
2. Performance: Embedded can provide better time performance in certain situation, especially when data are regularly accessed together. Since, all the relevant and related data is stored within one document, query such as update and join will be much faster as we don't have to reference multiple documents.
3. Atomicity: Embedded data allow for operation on single document. When deleting, updating or modifying a document, we only need to perform the operation on one document instead of multiple referenced document. This is a property of atomicity. 


#### Example of Embedded Document

In [25]:
"""
{
    _id: ObjectId("64806e495dfd959c2ee65a26"),
    date: '2021-12-31T00:00:00.000Z',
    station: 948700,
    air_temperature: 19,
    relative_humidity: 56.8,
    windspeed_knots: 7.9,
    max_wind_speed: 11.1,
    precipitation: 0,
    precipitation_flag: 'I',
    GHI: 154,
    hotspot: [
      {
        datetime: '2021-12-31T04:42:00.000Z',
        hotspot_latitude: -37.3803,
        hotspot_longitude: 145.611,
        confidence: 91,
        surface_temperature: 68
      },
      {
        datetime: '2021-12-31T03:55:00.000Z',
        hotspot_latitude: -36.4355,
        hotspot_longitude: 141.5901,
        confidence: 72,
        surface_temperature: 46
      },
    ]
}
""";

## Task 2.1 - Load Data From CSVs Into MongoDB

In [26]:
# Specify host and port
ip_address = '192.168.0.221' # Change the ip_adress to host
client = MongoClient(ip_address, 27017) 

In [27]:
# Using A3_db
db = client['A3_db'] 

# Using collection fire_historic
fire_collection = db['fire_historic'] # Collection of Climate Historic and Hotspot Historic (embedded)

# fire_collection.drop()

In [28]:
# Function to turn string into ISO date
def date_to_iso(date, date_format='%d/%m/%Y'):
    return datetime.strptime(str(date), date_format)

# Map function to value, if error is raised return None
# If value is classified as null also return None
def val_or_null(value, func, null=None):
    try:
        value = func(value)
        if value == null:
            return None
        else:
            return value
    except:
        return None

In [29]:
# Parse Climate Historic
for index, row in climate_historic.iterrows():
    date = date_to_iso(row.date)
    precipitation = str(row['precipitation ']).strip()
    p = re.match(r'([0-9.]+)([a-z]+)', precipitation, re.I)

    if p:
        p = p.groups()
        precipitation_flag = p[1]
        precipitation = float(p[0])
    
    station = val_or_null(row.station, int)
    air_temperature = val_or_null(row.air_temperature_celcius, float)
    relative_humidity = val_or_null(row.relative_humidity, float)
    windspeed_knots = val_or_null(row.windspeed_knots, float)
    max_wind_speed = val_or_null(row.max_wind_speed, float)
    GHI = val_or_null(row['GHI_w/m2'], float)

    document = {
        'station' : station,
        'date' : date,
        'hotspot':[],
        'air_temperature': air_temperature,
        'relative_humidity': relative_humidity,
        'windspeed_knots': windspeed_knots,
        'max_wind_speed': max_wind_speed,
        'precipitation_flag': precipitation_flag,
        'precipitation': precipitation,
        'GHI' : GHI
    }
    
    fire_collection.insert_one(document)

In [30]:
# Parse Hotspot Historic
for index, row in hotspot_historic.iterrows():
    latitude = val_or_null(row.latitude, float)
    longitude = val_or_null(row.longitude, float)
    dtime = val_or_null(row.datetime, lambda x: datetime.fromisoformat(str(x)))
    confidence = val_or_null(row.confidence, float)
    date = val_or_null(row.date, date_to_iso)
    surface_temperature = val_or_null(row.surface_temperature_celcius, float)


    document = {
#         'location': {'type': 'Point', 'coordinates': [longitude, latitude]}, # Store longitude and latitude as location
        'longitude': longitude,
        'latitude': latitude,
        'datetime': dtime, 
        'confidence': confidence,
        'surface_temperature': surface_temperature
    }
    
    fire_collection.update_one({'date':date}, {'$push':{'hotspot':document}}, upsert=True)

## Task 2.2 - Writing Queries

In [31]:
# Function for printing query
def print_response(response):
    for res in response:
        pprint.pp(res)
        print()

a) Find climate data on 12th December 2022. 


In [32]:
response = fire_collection.find({'date':date_to_iso('12/12/2022')})

In [33]:
print_response(response)

{'_id': ObjectId('64809eaa99470914613da023'),
 'station': 948702,
 'date': datetime.datetime(2022, 12, 12, 0, 0),
 'hotspot': [{'longitude': 145.25,
              'latitude': -37.903,
              'datetime': datetime.datetime(2022, 12, 12, 0, 45, 38),
              'confidence': 53.0,
              'surface_temperature': 44.0}],
 'air_temperature': 19.0,
 'relative_humidity': 55.3,
 'windspeed_knots': 6.2,
 'max_wind_speed': 12.0,
 'precipitation_flag': 'I',
 'precipitation': 0.0,
 'GHI': 156.0}



b) Find the latitude, longitude, surface temperature (°C), and confidence when the surface temperature (°C) was between 65 °C and 100 °C.

In [34]:
response = fire_collection.aggregate([
    {'$unwind':'$hotspot'},
    {'$match':{
        'hotspot.surface_temperature':{'$gte':65, '$lte':100}
    }},
    {'$project':{'_id':0, 
                 'hotspot.latitude':1, 
                 'hotspot.longitude':1, 
                 'hotspot.surface_temperature':1,
                 'hotspot.confidence':1
                }}
]
)

In [35]:
print_response(response)

{'hotspot': {'longitude': 147.9187,
             'latitude': -37.2284,
             'confidence': 94.0,
             'surface_temperature': 73.0}}

{'hotspot': {'longitude': 142.0703,
             'latitude': -37.6572,
             'confidence': 97.0,
             'surface_temperature': 80.0}}

{'hotspot': {'longitude': 148.1459,
             'latitude': -37.0193,
             'confidence': 84.0,
             'surface_temperature': 71.0}}

{'hotspot': {'longitude': 147.027,
             'latitude': -37.4229,
             'confidence': 100.0,
             'surface_temperature': 99.0}}

{'hotspot': {'longitude': 148.1582,
             'latitude': -37.0055,
             'confidence': 80.0,
             'surface_temperature': 68.0}}

{'hotspot': {'longitude': 147.0242,
             'latitude': -37.4128,
             'confidence': 85.0,
             'surface_temperature': 98.0}}

{'hotspot': {'longitude': 141.5361,
             'latitude': -34.357,
             'confidence': 90.0,
         

c) Find date, surface temperature (°C), air temperature (°C), relative humidity and max wind speed on 15th and 16th of December 2022. 

In [36]:
response = fire_collection.find(
    {'date':{
        '$in':[date_to_iso('15/12/2022'), date_to_iso('16/12/2022')]
    }},
    {'_id':0, 
     'date':1, 
     'hotspot.datetime':1,
     'hotspot.surface_temperature':1,
     'air_temperature':1,
     'relative_humidity':1, 
     'max_wind_speed':1}
)

In [37]:
print_response(response)

{'date': datetime.datetime(2022, 12, 15, 0, 0),
 'hotspot': [{'datetime': datetime.datetime(2022, 12, 15, 13, 17, 17),
              'surface_temperature': 42.0},
             {'datetime': datetime.datetime(2022, 12, 15, 13, 17, 17),
              'surface_temperature': 36.0},
             {'datetime': datetime.datetime(2022, 12, 15, 13, 17, 17),
              'surface_temperature': 38.0},
             {'datetime': datetime.datetime(2022, 12, 15, 1, 16, 23),
              'surface_temperature': 40.0}],
 'air_temperature': 18.0,
 'relative_humidity': 52.0,
 'max_wind_speed': 14.0}

{'date': datetime.datetime(2022, 12, 16, 0, 0),
 'hotspot': [{'datetime': datetime.datetime(2022, 12, 16, 15, 38, 39),
              'surface_temperature': 43.0},
             {'datetime': datetime.datetime(2022, 12, 16, 15, 38, 39),
              'surface_temperature': 33.0},
             {'datetime': datetime.datetime(2022, 12, 16, 4, 35, 15),
              'surface_temperature': 54.0},
             {'datet

d) Find datetime, air temperature (°C), surface temperature (°C) and confidence when the confidence is between 80 and 100. 

In [38]:
response = fire_collection.aggregate([
    {'$unwind':'$hotspot'},
    {'$match':{'hotspot.confidence':{'$gte':80, '$lte':100}}},
    {'$project':{
        '_id':0,
        'hotspot.datetime':1,
        'air_temperature':1,
        'hotspot.surface_temperature':1,
        'hotspot.confidence':1
    }}
])

In [39]:
print_response(response)

{'hotspot': {'datetime': datetime.datetime(2022, 3, 6, 5, 6, 30),
             'confidence': 87.0,
             'surface_temperature': 62.0},
 'air_temperature': 20.0}

{'hotspot': {'datetime': datetime.datetime(2022, 3, 6, 5, 6, 20),
             'confidence': 85.0,
             'surface_temperature': 59.0},
 'air_temperature': 20.0}

{'hotspot': {'datetime': datetime.datetime(2022, 3, 7, 4, 16, 10),
             'confidence': 88.0,
             'surface_temperature': 64.0},
 'air_temperature': 19.0}

{'hotspot': {'datetime': datetime.datetime(2022, 3, 9, 13, 23, 40),
             'confidence': 86.0,
             'surface_temperature': 41.0},
 'air_temperature': 23.0}

{'hotspot': {'datetime': datetime.datetime(2022, 3, 10, 4, 48, 40),
             'confidence': 100.0,
             'surface_temperature': 105.0},
 'air_temperature': 19.0}

{'hotspot': {'datetime': datetime.datetime(2022, 3, 10, 4, 46, 20),
             'confidence': 100.0,
             'surface_temperature': 109.0},
 '

{'hotspot': {'datetime': datetime.datetime(2022, 4, 4, 4, 37, 10),
             'confidence': 88.0,
             'surface_temperature': 64.0},
 'air_temperature': 16.0}

{'hotspot': {'datetime': datetime.datetime(2022, 4, 4, 4, 36, 10),
             'confidence': 88.0,
             'surface_temperature': 63.0},
 'air_temperature': 16.0}

{'hotspot': {'datetime': datetime.datetime(2022, 4, 4, 4, 36, 10),
             'confidence': 87.0,
             'surface_temperature': 62.0},
 'air_temperature': 16.0}

{'hotspot': {'datetime': datetime.datetime(2022, 4, 4, 4, 36),
             'confidence': 90.0,
             'surface_temperature': 76.0},
 'air_temperature': 16.0}

{'hotspot': {'datetime': datetime.datetime(2022, 4, 4, 4, 35),
             'confidence': 100.0,
             'surface_temperature': 95.0},
 'air_temperature': 16.0}

{'hotspot': {'datetime': datetime.datetime(2022, 4, 4, 4, 34, 50),
             'confidence': 83.0,
             'surface_temperature': 57.0},
 'air_temperat

{'hotspot': {'datetime': datetime.datetime(2022, 5, 10, 4, 8, 10),
             'confidence': 81.0,
             'surface_temperature': 55.0},
 'air_temperature': 10.0}

{'hotspot': {'datetime': datetime.datetime(2022, 5, 10, 4, 8, 10),
             'confidence': 86.0,
             'surface_temperature': 60.0},
 'air_temperature': 10.0}

{'hotspot': {'datetime': datetime.datetime(2022, 5, 10, 4, 8, 10),
             'confidence': 80.0,
             'surface_temperature': 53.0},
 'air_temperature': 10.0}

{'hotspot': {'datetime': datetime.datetime(2022, 5, 10, 4, 8, 10),
             'confidence': 81.0,
             'surface_temperature': 55.0},
 'air_temperature': 10.0}

{'hotspot': {'datetime': datetime.datetime(2022, 5, 10, 4, 8, 10),
             'confidence': 87.0,
             'surface_temperature': 62.0},
 'air_temperature': 10.0}

{'hotspot': {'datetime': datetime.datetime(2022, 5, 10, 4, 8, 10),
             'confidence': 84.0,
             'surface_temperature': 58.0},
 'air_t

e) Find the top 10 records with the highest surface temperature (°C).

In [40]:
response = fire_collection.aggregate([
    {'$unwind':'$hotspot'},
    {'$sort':{'hotspot.surface_temperature':-1}},
    {'$limit':10}
    ])

In [41]:
print_response(response)

{'_id': ObjectId('64809ea999470914613d9f35'),
 'station': 948701,
 'date': datetime.datetime(2022, 4, 18, 0, 0),
 'hotspot': {'longitude': 143.062,
             'latitude': -38.1665,
             'datetime': datetime.datetime(2022, 4, 18, 4, 52),
             'confidence': 100.0,
             'surface_temperature': 124.0},
 'air_temperature': 15.0,
 'relative_humidity': 56.1,
 'windspeed_knots': 5.1,
 'max_wind_speed': 9.9,
 'precipitation_flag': 'I',
 'precipitation': 0.0,
 'GHI': 122.0}

{'_id': ObjectId('64809ea999470914613d9f27'),
 'station': 948701,
 'date': datetime.datetime(2022, 4, 4, 0, 0),
 'hotspot': {'longitude': 142.1986,
             'latitude': -36.343,
             'datetime': datetime.datetime(2022, 4, 4, 4, 32, 50),
             'confidence': 100.0,
             'surface_temperature': 123.0},
 'air_temperature': 16.0,
 'relative_humidity': 47.5,
 'windspeed_knots': 5.4,
 'max_wind_speed': 12.0,
 'precipitation_flag': 'I',
 'precipitation': 0.0,
 'GHI': 140.0}

{'_id':

f) Find the number of fires each day. You are required to only display the total number of fires and the date in the output.

In [42]:
response = fire_collection.aggregate([
    {'$project':{'_id':0, 'date':1, 'num_of_fires':{'$size':'$hotspot'}}},
    {'$group':{'_id':'$date', 'num_of_fires':{'$sum':'$num_of_fires'}}},
    {'$project':{'_id':0, 'date': '$_id', 'num_of_fires':1}},
    {'$sort':{'date':1}}
])

In [43]:
print_response(response)

{'num_of_fires': 0, 'date': datetime.datetime(2021, 12, 31, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 2, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 3, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 4, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 5, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 6, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 7, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 8, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 9, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 10, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 11, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 12, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 13, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 14, 0, 0)}

{'num_of_fires': 0, 'date': datetime.datetime(2022, 1, 15, 0, 0)}

{'

g) Find the records of fires where the confidence is below 70.

In [44]:
response = fire_collection.aggregate([
    {'$unwind':'$hotspot'},
    {'$match':{'hotspot.confidence':{'$lt':70}}},
    {'$project':{
        '_id':0,
        'hotspot':1,
        'date':1
    }}
])

In [45]:
print_response(response)

{'date': datetime.datetime(2022, 3, 8, 0, 0),
 'hotspot': {'longitude': 141.9352,
             'latitude': -37.7885,
             'datetime': datetime.datetime(2022, 3, 8, 4, 51),
             'confidence': 68.0,
             'surface_temperature': 55.0}}

{'date': datetime.datetime(2022, 3, 9, 0, 0),
 'hotspot': {'longitude': 147.5866,
             'latitude': -37.7171,
             'datetime': datetime.datetime(2022, 3, 9, 3, 57),
             'confidence': 54.0,
             'surface_temperature': 44.0}}

{'date': datetime.datetime(2022, 3, 10, 0, 0),
 'hotspot': {'longitude': 148.0353,
             'latitude': -36.2544,
             'datetime': datetime.datetime(2022, 3, 10, 4, 43),
             'confidence': 55.0,
             'surface_temperature': 42.0}}

{'date': datetime.datetime(2022, 3, 10, 0, 0),
 'hotspot': {'longitude': 147.9621,
             'latitude': -37.2197,
             'datetime': datetime.datetime(2022, 3, 10, 4, 42, 30),
             'confidence': 54.0,
        

 'hotspot': {'longitude': 143.4796,
             'latitude': -35.7866,
             'datetime': datetime.datetime(2022, 4, 18, 4, 49),
             'confidence': 59.0,
             'surface_temperature': 46.0}}

{'date': datetime.datetime(2022, 4, 18, 0, 0),
 'hotspot': {'longitude': 143.9798,
             'latitude': -36.925,
             'datetime': datetime.datetime(2022, 4, 18, 4, 49),
             'confidence': 57.0,
             'surface_temperature': 41.0}}

{'date': datetime.datetime(2022, 4, 18, 0, 0),
 'hotspot': {'longitude': 142.8528,
             'latitude': -37.9049,
             'datetime': datetime.datetime(2022, 4, 18, 4, 48, 30),
             'confidence': 59.0,
             'surface_temperature': 47.0}}

{'date': datetime.datetime(2022, 4, 18, 0, 0),
 'hotspot': {'longitude': 142.7434,
             'latitude': -36.1581,
             'datetime': datetime.datetime(2022, 4, 18, 4, 48, 30),
             'confidence': 65.0,
             'surface_temperature': 43.0}}

{'da

{'date': datetime.datetime(2022, 5, 13, 0, 0),
 'hotspot': {'longitude': 143.7526,
             'latitude': -36.3883,
             'datetime': datetime.datetime(2022, 5, 13, 4, 40, 20),
             'confidence': 56.0,
             'surface_temperature': 39.0}}

{'date': datetime.datetime(2022, 5, 13, 0, 0),
 'hotspot': {'longitude': 144.1473,
             'latitude': -36.5513,
             'datetime': datetime.datetime(2022, 5, 13, 4, 40, 10),
             'confidence': 66.0,
             'surface_temperature': 43.0}}

{'date': datetime.datetime(2022, 5, 13, 0, 0),
 'hotspot': {'longitude': 144.5124,
             'latitude': -36.6361,
             'datetime': datetime.datetime(2022, 5, 13, 4, 39, 10),
             'confidence': 66.0,
             'surface_temperature': 43.0}}

{'date': datetime.datetime(2022, 5, 13, 0, 0),
 'hotspot': {'longitude': 143.6425,
             'latitude': -37.1999,
             'datetime': datetime.datetime(2022, 5, 13, 4, 38, 40),
             'confidence'

h) Find the average surface temperature (°C) for each day. You are required to only display average surface temperature (°C) and the date in the output. 

In [46]:
response = fire_collection.aggregate([
    {'$unwind':'$hotspot'},
    {'$group':{'_id':'$date', 'avg_surface_temperature':{'$avg':'$hotspot.surface_temperature'}}},
    {'$project':{'_id':0, 'date': '$_id', 'avg_surface_temperature':1}},
    {'$sort':{'date':1}}
])

In [47]:
print_response(response)

{'avg_surface_temperature': 60.5, 'date': datetime.datetime(2022, 3, 6, 0, 0)}

{'avg_surface_temperature': 64.0, 'date': datetime.datetime(2022, 3, 7, 0, 0)}

{'avg_surface_temperature': 51.5, 'date': datetime.datetime(2022, 3, 8, 0, 0)}

{'avg_surface_temperature': 46.666666666666664,
 'date': datetime.datetime(2022, 3, 9, 0, 0)}

{'avg_surface_temperature': 69.375,
 'date': datetime.datetime(2022, 3, 10, 0, 0)}

{'avg_surface_temperature': 88.2, 'date': datetime.datetime(2022, 3, 12, 0, 0)}

{'avg_surface_temperature': 38.5, 'date': datetime.datetime(2022, 3, 13, 0, 0)}

{'avg_surface_temperature': 65.6, 'date': datetime.datetime(2022, 3, 14, 0, 0)}

{'avg_surface_temperature': 46.0, 'date': datetime.datetime(2022, 3, 15, 0, 0)}

{'avg_surface_temperature': 59.5, 'date': datetime.datetime(2022, 3, 17, 0, 0)}

{'avg_surface_temperature': 79.33333333333333,
 'date': datetime.datetime(2022, 3, 18, 0, 0)}

{'avg_surface_temperature': 65.57142857142857,
 'date': datetime.datetime(2022, 3

i) Find the top 10 records with the lowest GHI.

In [48]:
response = fire_collection.find().sort('GHI', -1).limit(10)

In [49]:
print_response(response)

{'_id': ObjectId('64809ea999470914613d9ed0'),
 'station': 948700,
 'date': datetime.datetime(2022, 1, 7, 0, 0),
 'hotspot': [],
 'air_temperature': 32.0,
 'relative_humidity': 54.1,
 'windspeed_knots': 12.8,
 'max_wind_speed': 19.0,
 'precipitation_flag': 'I',
 'precipitation': 0.0,
 'GHI': 265.0}

{'_id': ObjectId('64809eaa99470914613da007'),
 'station': 948702,
 'date': datetime.datetime(2022, 11, 14, 0, 0),
 'hotspot': [{'longitude': 144.175,
              'latitude': -37.862,
              'datetime': datetime.datetime(2022, 11, 14, 4, 35, 4),
              'confidence': 87.0,
              'surface_temperature': 65.0},
             {'longitude': 143.493,
              'latitude': -38.527,
              'datetime': datetime.datetime(2022, 11, 14, 0, 21, 26),
              'confidence': 64.0,
              'surface_temperature': 45.0},
             {'longitude': 143.375,
              'latitude': -37.332,
              'datetime': datetime.datetime(2022, 11, 14, 0, 21, 7),
         

j) Find the records with a 24-hour precipitation recorded between 0.20 to 0.35

In [50]:
response = fire_collection.find(
    {'precipitation_flag':'G', 'precipitation':{'$gte':0.2, '$lte':0.35}}
)

In [51]:
print_response(response)

{'_id': ObjectId('64809ea999470914613d9ed6'),
 'station': 948700,
 'date': datetime.datetime(2022, 1, 13, 0, 0),
 'hotspot': [],
 'air_temperature': 19.0,
 'relative_humidity': 54.1,
 'windspeed_knots': 11.2,
 'max_wind_speed': 18.1,
 'precipitation_flag': 'G',
 'precipitation': 0.31,
 'GHI': 157.0}

{'_id': ObjectId('64809ea999470914613d9f21'),
 'station': 948701,
 'date': datetime.datetime(2022, 3, 29, 0, 0),
 'hotspot': [{'longitude': 141.6325,
              'latitude': -34.2648,
              'datetime': datetime.datetime(2022, 3, 29, 0, 48, 40),
              'confidence': 69.0,
              'surface_temperature': 51.0}],
 'air_temperature': 17.0,
 'relative_humidity': 49.9,
 'windspeed_knots': 12.2,
 'max_wind_speed': 21.0,
 'precipitation_flag': 'G',
 'precipitation': 0.24,
 'GHI': 146.0}

{'_id': ObjectId('64809ea999470914613d9f37'),
 'station': 948701,
 'date': datetime.datetime(2022, 4, 20, 0, 0),
 'hotspot': [{'longitude': 145.1536,
              'latitude': -36.8871,
     

# Task 2.3 - Indexing

From Task 2, the most frequently used attributes are date, surface_temperature, and confidence. Since date is the most used attributes in query, we should use it as an index to increase performance of queries using it. On the other hand, surface_temperature and confidence are attributes within the embedded array, and it will take much longer to find them as we have to go through all the document and the embedded documents. So we will also use surface_temperature and confidence as index. 

In [66]:
# Drop index
fire_collection.drop_indexes()

In [71]:
# -1 is descending
response = fire_collection.create_index([
    ('date', -1), # Latest data play more importance
    ('hotspot.surface_temperature', -1),  # Hotter surface temperature is more important
    ('hotspot.confidence', -1) # Higher confidence is more important
], name="climate_index")

In [72]:
fire_collection.index_information()

{'_id_': {'v': 2, 'key': [('_id', 1)]},
 'climate_index': {'v': 2,
  'key': [('date', -1),
   ('hotspot.surface_temperature', -1),
   ('hotspot.confidence', -1)]}}