# Monitoring NB

Run this notebook to monitor download tasks in progress on Google Earth Engine.

In [1]:
# Necessary imports
import json
from tqdm.notebook import tqdm
import random
import pandas as pd
import time
from db_utils import DB

**SET THE PATH TO THE JSON FILE HERE**

In [2]:
# Path to the JSON file outout by download script
json_path = "../../2fac3e2c-6ae7-11ed-bfaf-0b4506ca505b.json"

**END USER INPUT**

In [3]:
# Connect to the database
db_conn = DB()

[INFO] connected to dev-sample


In [4]:
# Load the JSON 
task_list = json.load(open(json_path, "r"))
n_tasks = len(task_list)
print(f"JSON currently contains {n_tasks} task entries.")

# Convert to a DataFrame
tasks_df = pd.DataFrame(task_list)
tasks_df["gridname"] = tasks_df["description"].str.split("_").str[0]

JSON currently contains 5 task entries.


In [5]:
# Query the DB for the download status of the images.
image_ids = tuple(tasks_df['description'].unique())
if len(image_ids) > 1:
    db_query = """SELECT image_id, in_progress 
                  FROM images_download 
                  WHERE image_id IN {};""".format(image_ids)
else:
    db_query = """SELECT image_id, in_progress 
                  FROM images_download 
                  WHERE image_id = '{}';""".format(image_ids[0])
image_db = db_conn.run_query(db_query, fetch = True)
print("Returned {:d} rows from the DB".format(len(image_db)))

Returned 5 rows from the DB


In [6]:
# Initialise progress bar for all available tasks. 
batch_bar = tqdm(total=len(tasks_df), 
                 dynamic_ncols=True, 
                 leave=False, 
                 position=0, 
                 desc="All Tasks",
                 colour="GREEN")

# Add a progress bar for every unique grid patch present in task list
grid_bars = {}
for name, gdf in tasks_df.groupby(by='gridname'):
    grid_bars[name] = tqdm(total=len(gdf), 
                           dynamic_ncols=True, 
                           leave=True, 
                           position=0, 
                           desc=name)

# Logic : Check all tasks, keep removing them as and when the 
# in_progress flag is set to 0 for the task in the database.
while len(tasks_df) >= 1:
    
    # Loop through the tasks grouped by gridname
    for name, gdf in tasks_df.groupby(by='gridname'):
        for i, task in gdf.iterrows():
            
            # Check if download is still marked as in-progress in the DB
            desc = task['description']
            ip = image_db[image_db['image_id'] == desc]['in_progress'].item()

            # Do nothing if still in-progress
            if ip == 1:
                continue

            # Drop entry if not still in-progress
            if ip == 0:
                tasks_df.drop(i, inplace = True)
                grid_bars[name].update()
                batch_bar.update()
        
        time.sleep(0.25)

All Tasks:   0%|                                          | 0/5 [00:00<?, ?it/s]

GRID11960:   0%|                                          | 0/1 [00:00<?, ?it/s]

GRID11961:   0%|                                          | 0/1 [00:00<?, ?it/s]

GRID12106:   0%|                                          | 0/1 [00:00<?, ?it/s]

GRID12107:   0%|                                          | 0/1 [00:00<?, ?it/s]

GRID12108:   0%|                                          | 0/1 [00:00<?, ?it/s]