## Dependencies

In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

from pathlib import Path
import os, zipfile
import shutil
import glob

from pymongo import MongoClient
import time

## Constants

In [28]:
MONGO_URI = 'mongodb+srv://phil:phil@cluster0-laoqs.mongodb.net/test?retryWrites=true&w=majority'
MONGO_DBNAME = 'australia_fire_db'

## Extract data/csvs from zip file

In [3]:
# unzip files in Resources folder.
extension = ".zip"
extracted_dir_name = "."

# Get the current working directory.
# Need to be in root directory of this project for this to work.
cwd_dir_name = os.getcwd()
print(f"The current working directory is {cwd_dir_name}.")

os.chdir("Resources") # change directory from working dir to dir with zip file.
# This should be the "Resources" folder.
dir_name = os.getcwd()
print(f"You are now in the following directory: {dir_name}.")

for item in os.listdir(dir_name): # loop through the items in the directory.
    if item.endswith(extension): # check for ".zip" extension"
        try:
            file_name = os.path.abspath(item) # get full path of files
            zip_ref = zipfile.ZipFile(file_name) # create zipfile object
            unzipped_directory = os.path.join(extracted_dir_name) # reference to the directory where the zip files will be extracted.
            zip_ref.extractall(unzipped_directory) # extract file to dir
            zip_ref.close() # close file
            print(f"Successfully unzipped {item} into the following folder:{dir_name}.")
        except:
            print(f"Error trying to unzip data file(s).")
            
# Go up one directory into the project root directory.
os.chdir(os.path.normpath(os.getcwd() + os.sep + os.pardir))
print(os.path.normpath(os.getcwd() + os.sep + os.pardir))

The current working directory is C:\Users\phili\Desktop\australia-fire-api-and-dashboard.
You are now in the following directory: C:\Users\phili\Desktop\australia-fire-api-and-dashboard\Resources.
Successfully unzipped fires-from-space-australia-and-new-zeland.zip into the following folder:C:\Users\phili\Desktop\australia-fire-api-and-dashboard\Resources.
C:\Users\phili\Desktop


## Import csv files and read into pandas

In [4]:
# Path to csv files.
path_to_csvs = os.path.join(".", "Resources")
all_files = glob.glob(os.path.join(path_to_csvs, "*.csv"))

df_from_each_file = []

for f in all_files:
    filename = os.path.basename(f)
    df = pd.read_csv(f, encoding ="ISO-8859-1")
    df_from_each_file.append(df)

# Concantenated dataframe
concatenated_df = pd.concat(df_from_each_file, ignore_index=True)
concatenated_df

Unnamed: 0,acq_date,acq_time,bright_t31,bright_ti4,bright_ti5,brightness,confidence,daynight,frp,instrument,latitude,longitude,satellite,scan,track,type,version
0,2019-08-01,56,297.3,,,313.0,48,D,6.6,MODIS,-11.80700,142.05830,Terra,1.00,1.00,0.0,6.3
1,2019-08-01,56,297.3,,,319.3,71,D,11.3,MODIS,-11.79240,142.08500,Terra,1.00,1.00,0.0,6.3
2,2019-08-01,57,298.7,,,311.6,42,D,23.1,MODIS,-12.83980,132.87440,Terra,3.10,1.70,0.0,6.3
3,2019-08-01,57,296.1,,,310.1,33,D,6.5,MODIS,-14.43060,143.30350,Terra,1.10,1.10,0.0,6.3
4,2019-08-01,57,298.8,,,310.3,36,D,27.6,MODIS,-12.49530,131.48970,Terra,4.00,1.90,0.0,6.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1360634,2020-01-11,1630,,306.9,288.6,,n,N,0.8,VIIRS,-32.66628,122.15253,N,0.39,0.44,,1.0NRT
1360635,2020-01-11,1630,,299.0,287.4,,n,N,0.7,VIIRS,-32.58616,123.39582,N,0.51,0.41,,1.0NRT
1360636,2020-01-11,1630,,309.7,291.7,,n,N,1.0,VIIRS,-33.37853,115.94735,N,0.40,0.60,,1.0NRT
1360637,2020-01-11,1806,,300.2,290.2,,n,N,1.1,VIIRS,-33.21827,115.75078,N,0.49,0.65,,1.0NRT


In [5]:
# Remove unneccessary columns
del concatenated_df["confidence"]
del concatenated_df["scan"]
del concatenated_df["track"]
del concatenated_df["version"]
del concatenated_df["type"]

In [6]:
concatenated_df

Unnamed: 0,acq_date,acq_time,bright_t31,bright_ti4,bright_ti5,brightness,daynight,frp,instrument,latitude,longitude,satellite
0,2019-08-01,56,297.3,,,313.0,D,6.6,MODIS,-11.80700,142.05830,Terra
1,2019-08-01,56,297.3,,,319.3,D,11.3,MODIS,-11.79240,142.08500,Terra
2,2019-08-01,57,298.7,,,311.6,D,23.1,MODIS,-12.83980,132.87440,Terra
3,2019-08-01,57,296.1,,,310.1,D,6.5,MODIS,-14.43060,143.30350,Terra
4,2019-08-01,57,298.8,,,310.3,D,27.6,MODIS,-12.49530,131.48970,Terra
...,...,...,...,...,...,...,...,...,...,...,...,...
1360634,2020-01-11,1630,,306.9,288.6,,N,0.8,VIIRS,-32.66628,122.15253,N
1360635,2020-01-11,1630,,299.0,287.4,,N,0.7,VIIRS,-32.58616,123.39582,N
1360636,2020-01-11,1630,,309.7,291.7,,N,1.0,VIIRS,-33.37853,115.94735,N
1360637,2020-01-11,1806,,300.2,290.2,,N,1.1,VIIRS,-33.21827,115.75078,N


In [7]:
# Filter for MODIS fires
modis_df = concatenated_df.loc[concatenated_df["instrument"] == "MODIS"]

modis_df

Unnamed: 0,acq_date,acq_time,bright_t31,bright_ti4,bright_ti5,brightness,daynight,frp,instrument,latitude,longitude,satellite
0,2019-08-01,56,297.3,,,313.0,D,6.6,MODIS,-11.8070,142.0583,Terra
1,2019-08-01,56,297.3,,,319.3,D,11.3,MODIS,-11.7924,142.0850,Terra
2,2019-08-01,57,298.7,,,311.6,D,23.1,MODIS,-12.8398,132.8744,Terra
3,2019-08-01,57,296.1,,,310.1,D,6.5,MODIS,-14.4306,143.3035,Terra
4,2019-08-01,57,298.8,,,310.3,D,27.6,MODIS,-12.4953,131.4897,Terra
...,...,...,...,...,...,...,...,...,...,...,...,...
404377,2020-01-11,1515,289.3,,,300.5,N,15.3,MODIS,-32.6750,122.1130,Terra
404378,2020-01-11,1645,280.8,,,300.5,N,2.9,MODIS,-16.7080,129.6460,Aqua
404379,2020-01-11,1645,277.9,,,301.8,N,3.7,MODIS,-16.7470,129.6530,Aqua
404380,2020-01-11,1650,291.9,,,306.5,N,11.6,MODIS,-32.4840,121.3190,Aqua


In [8]:
# Delete unnecessary columns from modis df.
del modis_df["bright_ti4"]
del modis_df["bright_ti5"]

In [9]:
modis_df

Unnamed: 0,acq_date,acq_time,bright_t31,brightness,daynight,frp,instrument,latitude,longitude,satellite
0,2019-08-01,56,297.3,313.0,D,6.6,MODIS,-11.8070,142.0583,Terra
1,2019-08-01,56,297.3,319.3,D,11.3,MODIS,-11.7924,142.0850,Terra
2,2019-08-01,57,298.7,311.6,D,23.1,MODIS,-12.8398,132.8744,Terra
3,2019-08-01,57,296.1,310.1,D,6.5,MODIS,-14.4306,143.3035,Terra
4,2019-08-01,57,298.8,310.3,D,27.6,MODIS,-12.4953,131.4897,Terra
...,...,...,...,...,...,...,...,...,...,...
404377,2020-01-11,1515,289.3,300.5,N,15.3,MODIS,-32.6750,122.1130,Terra
404378,2020-01-11,1645,280.8,300.5,N,2.9,MODIS,-16.7080,129.6460,Aqua
404379,2020-01-11,1645,277.9,301.8,N,3.7,MODIS,-16.7470,129.6530,Aqua
404380,2020-01-11,1650,291.9,306.5,N,11.6,MODIS,-32.4840,121.3190,Aqua


In [10]:
# Filter for VIIRS fires.
viirs_df = concatenated_df.loc[concatenated_df["instrument"] == "VIIRS"]

viirs_df

Unnamed: 0,acq_date,acq_time,bright_t31,bright_ti4,bright_ti5,brightness,daynight,frp,instrument,latitude,longitude,satellite
36011,2019-08-01,246,,326.5,290.7,,,5.8,VIIRS,-34.45902,150.88040,N
36012,2019-08-01,247,,367.0,291.9,,,10.4,VIIRS,-31.70724,151.07191,N
36013,2019-08-01,247,,325.2,292.7,,,5.6,VIIRS,-31.39626,149.65253,N
36014,2019-08-01,247,,347.8,287.6,,,9.1,VIIRS,-30.39774,152.06432,N
36015,2019-08-01,247,,350.7,292.3,,,9.4,VIIRS,-31.20248,151.89766,N
...,...,...,...,...,...,...,...,...,...,...,...,...
1360634,2020-01-11,1630,,306.9,288.6,,N,0.8,VIIRS,-32.66628,122.15253,N
1360635,2020-01-11,1630,,299.0,287.4,,N,0.7,VIIRS,-32.58616,123.39582,N
1360636,2020-01-11,1630,,309.7,291.7,,N,1.0,VIIRS,-33.37853,115.94735,N
1360637,2020-01-11,1806,,300.2,290.2,,N,1.1,VIIRS,-33.21827,115.75078,N


In [11]:
# Delete unnecessary columns from viirs df.
del viirs_df["bright_t31"]
del viirs_df["brightness"]
del viirs_df["daynight"]

In [12]:
viirs_df

Unnamed: 0,acq_date,acq_time,bright_ti4,bright_ti5,frp,instrument,latitude,longitude,satellite
36011,2019-08-01,246,326.5,290.7,5.8,VIIRS,-34.45902,150.88040,N
36012,2019-08-01,247,367.0,291.9,10.4,VIIRS,-31.70724,151.07191,N
36013,2019-08-01,247,325.2,292.7,5.6,VIIRS,-31.39626,149.65253,N
36014,2019-08-01,247,347.8,287.6,9.1,VIIRS,-30.39774,152.06432,N
36015,2019-08-01,247,350.7,292.3,9.4,VIIRS,-31.20248,151.89766,N
...,...,...,...,...,...,...,...,...,...
1360634,2020-01-11,1630,306.9,288.6,0.8,VIIRS,-32.66628,122.15253,N
1360635,2020-01-11,1630,299.0,287.4,0.7,VIIRS,-32.58616,123.39582,N
1360636,2020-01-11,1630,309.7,291.7,1.0,VIIRS,-33.37853,115.94735,N
1360637,2020-01-11,1806,300.2,290.2,1.1,VIIRS,-33.21827,115.75078,N


In [13]:
# Verify counts
print(modis_df.count())
print(viirs_df.count())

acq_date      219604
acq_time      219604
bright_t31    219604
brightness    219604
daynight      219604
frp           219604
instrument    219604
latitude      219604
longitude     219604
satellite     219604
dtype: int64
acq_date      1141035
acq_time      1141035
bright_ti4    1141035
bright_ti5    1141035
frp           1141035
instrument    1141035
latitude      1141035
longitude     1141035
satellite     1141035
dtype: int64


In [14]:
# Drop nas/null values (if any).
modis_df = modis_df.dropna(how="any")
viirs_df = viirs_df.dropna(how="any")

print(modis_df.count())
print(viirs_df.count())

acq_date      219604
acq_time      219604
bright_t31    219604
brightness    219604
daynight      219604
frp           219604
instrument    219604
latitude      219604
longitude     219604
satellite     219604
dtype: int64
acq_date      1141035
acq_time      1141035
bright_ti4    1141035
bright_ti5    1141035
frp           1141035
instrument    1141035
latitude      1141035
longitude     1141035
satellite     1141035
dtype: int64


In [15]:
# Check data types
print(modis_df.dtypes)
print(viirs_df.dtypes)

acq_date       object
acq_time        int64
bright_t31    float64
brightness    float64
daynight       object
frp           float64
instrument     object
latitude      float64
longitude     float64
satellite      object
dtype: object
acq_date       object
acq_time        int64
bright_ti4    float64
bright_ti5    float64
frp           float64
instrument     object
latitude      float64
longitude     float64
satellite      object
dtype: object


In [16]:
# Convert df to dict.
fires_modis_dict = modis_df.to_dict('range')
fires_viirs_dict = viirs_df.to_dict('range')

In [17]:
fires_modis_dict

[{'acq_date': '2019-08-01',
  'acq_time': 56,
  'bright_t31': 297.3,
  'brightness': 313.0,
  'daynight': 'D',
  'frp': 6.6,
  'instrument': 'MODIS',
  'latitude': -11.807,
  'longitude': 142.0583,
  'satellite': 'Terra'},
 {'acq_date': '2019-08-01',
  'acq_time': 56,
  'bright_t31': 297.3,
  'brightness': 319.3,
  'daynight': 'D',
  'frp': 11.3,
  'instrument': 'MODIS',
  'latitude': -11.7924,
  'longitude': 142.085,
  'satellite': 'Terra'},
 {'acq_date': '2019-08-01',
  'acq_time': 57,
  'bright_t31': 298.7,
  'brightness': 311.6,
  'daynight': 'D',
  'frp': 23.1,
  'instrument': 'MODIS',
  'latitude': -12.8398,
  'longitude': 132.8744,
  'satellite': 'Terra'},
 {'acq_date': '2019-08-01',
  'acq_time': 57,
  'bright_t31': 296.1,
  'brightness': 310.1,
  'daynight': 'D',
  'frp': 6.5,
  'instrument': 'MODIS',
  'latitude': -14.4306,
  'longitude': 143.3035,
  'satellite': 'Terra'},
 {'acq_date': '2019-08-01',
  'acq_time': 57,
  'bright_t31': 298.8,
  'brightness': 310.3,
  'daynight'

In [18]:
fires_viirs_dict

[{'acq_date': '2019-08-01',
  'acq_time': 246,
  'bright_ti4': 326.5,
  'bright_ti5': 290.7,
  'frp': 5.8,
  'instrument': 'VIIRS',
  'latitude': -34.45902,
  'longitude': 150.8804,
  'satellite': 'N'},
 {'acq_date': '2019-08-01',
  'acq_time': 247,
  'bright_ti4': 367.0,
  'bright_ti5': 291.9,
  'frp': 10.4,
  'instrument': 'VIIRS',
  'latitude': -31.707240000000002,
  'longitude': 151.07191,
  'satellite': 'N'},
 {'acq_date': '2019-08-01',
  'acq_time': 247,
  'bright_ti4': 325.2,
  'bright_ti5': 292.7,
  'frp': 5.6,
  'instrument': 'VIIRS',
  'latitude': -31.396259999999998,
  'longitude': 149.65253,
  'satellite': 'N'},
 {'acq_date': '2019-08-01',
  'acq_time': 247,
  'bright_ti4': 347.8,
  'bright_ti5': 287.6,
  'frp': 9.1,
  'instrument': 'VIIRS',
  'latitude': -30.397740000000002,
  'longitude': 152.06432,
  'satellite': 'N'},
 {'acq_date': '2019-08-01',
  'acq_time': 247,
  'bright_ti4': 350.7,
  'bright_ti5': 292.3,
  'frp': 9.4,
  'instrument': 'VIIRS',
  'latitude': -31.2024

## Load into database

In [29]:
client = MongoClient(MONGO_URI)

In [30]:
db = client[MONGO_DBNAME]

In [31]:
modis_collection = db.fires_modis
viirs_collection = db.fires_viirs

## For testing purposes - load 100 documents into database

In [32]:
def load_documents_into_db(documents, collection):

    # Loop through the list of fires and insert into database in chunks.
    print("Beginning load into database.")
    print("--------------------------------------------")
    count = 0
    set = 1
    for idx, fire in enumerate(documents):
        count = count + 1
        if count == 101:
            break
            count = 1
            set = set + 1
            time.sleep(5)
        print(f"Processing Record {count} of Set {set}")
        try:
            collection.insert_one(fire)
        except:
            print("Unable to insert fire data into database. Skipping...")

    print("------------------------------------------------")
    print("Loading data into database is complete")
    print("------------------------------------------------")

In [33]:
load_documents_into_db(fires_modis_dict, modis_collection)

Beginning load into database.
--------------------------------------------
Processing Record 1 of Set 1
Processing Record 2 of Set 1
Processing Record 3 of Set 1
Processing Record 4 of Set 1
Processing Record 5 of Set 1
Processing Record 6 of Set 1
Processing Record 7 of Set 1
Processing Record 8 of Set 1
Processing Record 9 of Set 1
Processing Record 10 of Set 1
Processing Record 11 of Set 1
Processing Record 12 of Set 1
Processing Record 13 of Set 1
Processing Record 14 of Set 1
Processing Record 15 of Set 1
Processing Record 16 of Set 1
Processing Record 17 of Set 1
Processing Record 18 of Set 1
Processing Record 19 of Set 1
Processing Record 20 of Set 1
Processing Record 21 of Set 1
Processing Record 22 of Set 1
Processing Record 23 of Set 1
Processing Record 24 of Set 1
Processing Record 25 of Set 1
Processing Record 26 of Set 1
Processing Record 27 of Set 1
Processing Record 28 of Set 1
Processing Record 29 of Set 1
Processing Record 30 of Set 1
Processing Record 31 of Set 1
Proc

In [None]:
load_documents_into_db(fires_viirs_dict, viirs_collection)

Beginning load into database.
--------------------------------------------
Processing Record 1 of Set 1
Processing Record 2 of Set 1
Processing Record 3 of Set 1
Processing Record 4 of Set 1
Processing Record 5 of Set 1
Processing Record 6 of Set 1
Processing Record 7 of Set 1
Processing Record 8 of Set 1
Processing Record 9 of Set 1
Processing Record 10 of Set 1
Processing Record 11 of Set 1
Processing Record 12 of Set 1
Processing Record 13 of Set 1
Processing Record 14 of Set 1
Processing Record 15 of Set 1
Processing Record 16 of Set 1
Processing Record 17 of Set 1
Processing Record 18 of Set 1
Processing Record 19 of Set 1
Processing Record 20 of Set 1
Processing Record 21 of Set 1
Processing Record 22 of Set 1
Processing Record 23 of Set 1
Processing Record 24 of Set 1
Processing Record 25 of Set 1
Processing Record 26 of Set 1
Processing Record 27 of Set 1
Processing Record 28 of Set 1
Processing Record 29 of Set 1
Processing Record 30 of Set 1
Processing Record 31 of Set 1
Proc