## Dependencies

In [None]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

from pathlib import Path
import os, zipfile
import shutil
import glob

from pymongo import MongoClient
import time

## Constants

In [None]:
MONGO_URI = 'mongodb://localhost:27017/'
MONGO_DBNAME = 'australia_fire_db'

## Extract data/csvs from zip file

In [None]:
# unzip files in Resources folder.
extension = ".zip"
extracted_dir_name = "."

# Get the current working directory.
# Need to be in root directory of this project for this to work.
cwd_dir_name = os.getcwd()
print(f"The current working directory is {cwd_dir_name}.")

os.chdir("Resources") # change directory from working dir to dir with zip file.
# This should be the "Resources folder.
dir_name = os.getcwd()
print(f"You are now in the following directory: {dir_name}.")

for item in os.listdir(dir_name): # loop through the items in the directory.
    if item.endswith(extension): # check for ".zip" extension"
        try:
            file_name = os.path.abspath(item) # get full path of files
            zip_ref = zipfile.ZipFile(file_name) # create zipfile object
            unzipped_directory = os.path.join(extracted_dir_name) # reference to the directory where the zip files will be extracted.
            zip_ref.extractall(unzipped_directory) # extract file to dir
            zip_ref.close() # close file
            print(f"Successfully unzipped {item} into the following folder:{dir_name}.")
        except Exception as e:
            print(f"Error trying to unzip data file(s).")
            print(e)
            
# Go up one directory into the project root directory.
os.chdir(os.path.normpath(os.getcwd() + os.sep + os.pardir))
print(os.path.normpath(os.getcwd() + os.sep + os.pardir))

## Import csv files and read into pandas

In [None]:
# Path to csv files.
path_to_csvs = os.path.join(".", "Resources")
all_files = glob.glob(os.path.join(path_to_csvs, "*.csv"))

df_from_each_file = []

for f in all_files:
    filename = os.path.basename(f)
    df = pd.read_csv(f, encoding ="ISO-8859-1")
    df_from_each_file.append(df)

# Concantenated dataframe
concatenated_df = pd.concat(df_from_each_file, ignore_index=True)
concatenated_df

In [None]:
# Remove unneccessary columns
del concatenated_df["confidence"]
del concatenated_df["scan"]
del concatenated_df["track"]
del concatenated_df["version"]
del concatenated_df["type"]

In [None]:
concatenated_df

In [None]:
# Filter for MODIS fires
modis_df = concatenated_df.loc[concatenated_df["instrument"] == "MODIS"]

modis_df

In [None]:
# Delete unnecessary columns from modis df.
del modis_df["bright_ti4"]
del modis_df["bright_ti5"]

In [None]:
modis_df

In [None]:
# Filter for VIIRS fires.
viirs_df = concatenated_df.loc[concatenated_df["instrument"] == "VIIRS"]

viirs_df

In [None]:
# Delete unnecessary columns from viirs df.
del viirs_df["bright_t31"]
del viirs_df["brightness"]
del viirs_df["daynight"]

In [None]:
viirs_df

In [None]:
# Verify counts
print(modis_df.count())
print(viirs_df.count())

In [None]:
# Drop nas/null values (if any).
modis_df = modis_df.dropna(how="any")
viirs_df = viirs_df.dropna(how="any")

print(modis_df.count())
print(viirs_df.count())

In [None]:
# Check data types
print(modis_df.dtypes)
print(viirs_df.dtypes)

In [None]:
# Convert df to dict.
fires_modis_dict = modis_df.to_dict('range')
fires_viirs_dict = viirs_df.to_dict('range')

In [None]:
fires_modis_dict

In [None]:
fires_viirs_dict

## Load into database

In [None]:
client = MongoClient(MONGO_URI)

In [None]:
db = client[MONGO_DBNAME]

In [None]:
modis_collection = db.fires_modis
viirs_collection = db.fires_viirs

## For testing purposes - load 100 documents into database

In [None]:
def load_documents_into_db(documents, collection):

    # Loop through the list of fires and insert into database in chunks.
    print("Beginning load into database.")
    print("--------------------------------------------")
    count = 0
    set = 1
    for idx, fire in enumerate(documents):
        count = count + 1
        if count == 101:
            break
            count = 1
            set = set + 1
            time.sleep(5)
        print(f"Processing Record {count} of Set {set}")
        try:
            collection.insert_one(fire)
        except:
            print("Unable to insert fire data into database. Skipping...")

    print("------------------------------------------------")
    print("Loading data into database is complete")
    print("------------------------------------------------")

In [None]:
load_documents_into_db(fires_modis_dict, modis_collection)

In [None]:
load_documents_into_db(fires_viirs_dict, viirs_collection)