# Monitor progress of mapping tasks

Run this notebook to visualise the progress of mapping tasks.

In [None]:
# Necessary imports
import os
os.environ['USE_PYGEOS'] = '0'
import pandas as pd
import geopandas as gpd
import time
import folium
import geemap.foliumap as geemap
import branca.colormap
from tqdm.notebook import tqdm
from datetime import datetime

from db_utils import DB
from dotenv import load_dotenv

## Load environment and project details

As with the other notebooks, we load credentials and project details from a hidden ```.env``` file.

In [None]:
# Load environment variables (including path to credentials) from '.env' file
env_file_path = "../.env"

# Uncomment for alternative version for Windows (r"" indicates raw string)
#env_file_path = r"C:/Users/User/floodmapper/.env"

assert load_dotenv(dotenv_path=env_file_path) == True, "[ERR] Failed to load environment!"
assert "GOOGLE_APPLICATION_CREDENTIALS" in os.environ, "[ERR] Missing $GOOGLE_APPLICATION_CREDENTIAL!"
assert "GS_USER_PROJECT" in os.environ, "[ERR] Missing $GS_USER_PROJECT!"
key_file_path = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
assert os.path.exists(key_file_path), f"[ERR] Google credential key file does not exist: \n{key_file_path} "
assert "ML4FLOODS_BASE_DIR" in os.environ, "[ERR] Missing $ML4FLOODS_BASE_DIR!"
base_path = os.environ["ML4FLOODS_BASE_DIR"]
assert os.path.exists(base_path), f"[ERR] Base path does not exist: \n{base_path} "
bucket_name = os.environ["BUCKET_URI"]
assert bucket_name is not None and bucket_name != "", f"Bucket name not defined {bucket_name}"
print("[INFO] Successfully loaded FloodMapper environment.")

## Retrieve the session parameters

**Set the name of the session here and run all remaining cells in order.**

In [None]:
# EDIT THE NAME OF THE SESSION
session_name = "EMSR586"

In [None]:
# Connect to the database (point to the .env file for credentials)
db_conn = DB(env_file_path)

In [None]:
# Fetch the session parameters from the database
query = (f"SELECT flood_date_start, flood_date_end, "
         f"ref_date_start, ref_date_end, bucket_uri "
         f"FROM session_info "
         f"WHERE session = %s")
data = (session_name,)
session_df = db_conn.run_query(query, data, fetch=True)
flood_start_date = session_df.iloc[0]["flood_date_start"]
flood_end_date = session_df.iloc[0]["flood_date_end"]
ref_start_date = session_df.iloc[0]["ref_date_start"]
ref_end_date = session_df.iloc[0]["ref_date_end"]
bucket_uri = session_df.iloc[0]["bucket_uri"]

# Fetch the AoI grid patches from the database
query = (f"SELECT DISTINCT patch_name "
         f"FROM session_patches "
         f"WHERE session = %s")
data = (session_name,)
aois_df = db_conn.run_query(query, data, fetch=True)
num_patches = len(aois_df)
print(f"[INFO] Found {num_patches} grid patches in map.")
aois_list = aois_df.patch_name.to_list()

## Query the progress of inference from database

In [None]:
# Query the inference status and geometry of each image
query = (f"SELECT DISTINCT dl.image_id, dl.patch_name, inf.status, ST_AsText(gr.geometry) "
         f"FROM image_downloads dl "
         f"LEFT JOIN grid_loc gr "
         f"ON dl.patch_name = gr.patch_name "
         f"LEFT JOIN (SELECT * FROM inference WHERE mode='vect') AS inf "
         f"ON dl.image_id = inf.image_id "
         f"WHERE dl.patch_name IN %s "
         f"AND dl.status = 1 "
         f"AND ((dl.date >= %s "
         f"AND dl.date <= %s) ")
data = [tuple(aois_list), flood_start_date, flood_end_date]
if ref_start_date is not None and ref_end_date is not None:
    query += (f"OR (dl.date >= %s "
              f"AND dl.date <= %s));")
    data += [ref_start_date, ref_end_date]
else:
    query += (f");")
inf_df = db_conn.run_query(query, data, fetch = True)
num_rows = len(inf_df)
print(f"[INFO] Entries for {num_rows} images in the DB.")

# Format the results into a correct GeoDataFrame
inf_df['geometry'] = gpd.GeoSeries.from_wkt(inf_df['st_astext'])
inf_df.drop(['st_astext'], axis=1, inplace = True)
inf_gdf = gpd.GeoDataFrame(inf_df, geometry='geometry', crs="EPSG:4326")
inf_gdf = inf_gdf.fillna(0)

# Create an outline of the map area
aoi_outline = inf_gdf.geometry.unary_union
aoi_outline_gdf = gpd.GeoDataFrame(geometry=[aoi_outline], crs="EPSG:4326")

## Parse the number of inference processed files in each grid patch

In [None]:
# Extract the patch polygons
geom = inf_gdf[["patch_name", "geometry"]].drop_duplicates()
geom = geom.set_index("patch_name")

# Count the processed and unprocessed files
grid_inf_gdf = inf_gdf.loc[inf_gdf.status == 1]
processed = grid_inf_gdf.groupby("patch_name").image_id.count()
grid_not_gdf = inf_gdf.loc[inf_gdf.status == 0]
unprocessed = grid_not_gdf.groupby("patch_name").image_id.count()

# Create a processed gdf
processed_df = pd.concat([processed, geom], axis = 1)
processed_df = processed_df.rename(columns={"image_id": "count"})
processed_gdf = gpd.GeoDataFrame(processed_df, geometry='geometry', crs="EPSG:4326")
processed_gdf = processed_gdf.fillna(0)

# Create an unprocessed gdf
unprocessed_df = pd.concat([unprocessed, geom], axis = 1)
unprocessed_df = unprocessed_df.rename(columns={"image_id": "count"})
unprocessed_gdf = gpd.GeoDataFrame(unprocessed_df, geometry='geometry', crs="EPSG:4326")
unprocessed_gdf = unprocessed_gdf.fillna(0)

## Plot the number of inference processed files

In [None]:
# Define a style function to set the colours
#cm = branca.colormap.LinearColormap(
#    ['red', 'orange', 'yellow', 'cyan', 'blue', 'darkblue'],
cm = branca.colormap.linear.YlOrRd_07.scale(
    vmin=processed_gdf["count"].min(), 
    vmax=processed_gdf["count"].max())
def style_fn(feature):
    return {
        'fillColor': cm(feature['properties']['count']),
        'color': cm(feature['properties']['count']),
        'weight': 0.5,
        "fillOpacity": 0.5
    }

# Plot the patches colour-coded by number of downloads
m = aoi_outline_gdf.explore(color="black", style_kwds={"fillOpacity": 0.0, "weight": 3}, 
                            name="AoI Outline", highlight=False)
folium.GeoJson(processed_gdf, 
               style_function=style_fn,
               name="Processed Images",
               tooltip=folium.features.GeoJsonTooltip(["count"]),
              ).add_to(m)

# Add the colourmap, layer control and show
m.add_child(cm)
folium.LayerControl(collapsed=False).add_to(m)
m

## Plot the number of inference unprocessed files

In [None]:
# Define a style function to set the colours
#cm = branca.colormap.LinearColormap(
#    ['red', 'yellow', 'green'],
cm = branca.colormap.linear.YlOrRd_07.scale(
    vmin=unprocessed_gdf["count"].min(), 
    vmax=unprocessed_gdf["count"].max())
def style_fn(feature):
    return {
        'fillColor': cm(feature['properties']['count']),
        'color': cm(feature['properties']['count']),
        'weight': 0.5,
        "fillOpacity": 0.5
    }

# Plot the patches colour-coded by number of downloads
m = aoi_outline_gdf.explore(color="black", style_kwds={"fillOpacity": 0.0, "weight": 3}, 
                            name="AoI Outline", highlight=False)
folium.GeoJson(unprocessed_gdf, 
               style_function=style_fn,
               name="Unprocessed Images",
               tooltip=folium.features.GeoJsonTooltip(["count"]),
              ).add_to(m)

# Add the colourmap, layer control and show
m.add_child(cm)
folium.LayerControl(collapsed=False).add_to(m)
m

## Calculate the percentage processed (inference)

In [None]:
num_processed = inf_gdf[inf_gdf.status==1].status.count()
num_files = len(inf_gdf)
num_unprocessed = inf_gdf[inf_gdf.status==0].status.count()
percent_processed = round(num_processed*100 / num_files, 2)
print(f"[INFO] Inference is {percent_processed}% processed ({num_processed}/{num_files} files).")

## Query the progress of the temporal aggregation step

In [None]:
# Query the inference status and geometry of each image
query = (f"SELECT DISTINCT s.patch_name, t.status, ST_AsText(gr.geometry) "
         f"FROM session_patches s "
         f"LEFT JOIN grid_loc gr "
         f"ON s.patch_name = gr.patch_name "
         f"LEFT JOIN (SELECT * FROM postproc_temporal WHERE mode='flood' AND session = %s) AS t "
         f"ON s.patch_name = t.patch_name "
         f"WHERE s.session = %s ;")
data = [session_name, session_name]
tmp_df = db_conn.run_query(query, data, fetch = True)
num_rows = len(tmp_df)
print(f"[INFO] Entries for {num_rows} patches in the DB.")

# Format the results into a correct GeoDataFrame
tmp_df['geometry'] = gpd.GeoSeries.from_wkt(tmp_df['st_astext'])
tmp_df.drop(['st_astext'], axis=1, inplace = True)
tmp_gdf = gpd.GeoDataFrame(tmp_df, geometry='geometry', crs="EPSG:4326")
tmp_gdf = tmp_gdf.fillna(0)

## Plot the progress of temporal aggregation

In [None]:
# Define a style function to set the colours
cm = branca.colormap.LinearColormap(['red', 'blue'])
def style_fn(feature):
    return {
        'fillColor': cm(feature['properties']['status']),
        'color': cm(feature['properties']['status']),
        'weight': 0.5,
        "fillOpacity": 0.5
    }

# Plot the patches colour-coded by number of downloads
m = aoi_outline_gdf.explore(color="black", style_kwds={"fillOpacity": 0.0, "weight": 3}, 
                            name="AoI Outline", highlight=False)
folium.GeoJson(tmp_gdf, 
               style_function=style_fn,
               name="Temporal Aggregation Done",
               tooltip=folium.features.GeoJsonTooltip(["status"]),
              ).add_to(m)

# Add the colourmap, layer control and show
m.add_child(cm)
folium.LayerControl(collapsed=False).add_to(m)
m