# Validate Selected LGAs

The workflow in this notebook demonstrates how to load and a section of a generated flood map and plot it over the relevant satellite imagery.

For this tutorial, we will load the data for the EMSR586 Copernicus activation.

In [None]:
# Necessary imports
import os
os.environ['USE_PYGEOS'] = '0'
import gc
import numpy as np
from itertools import product
from datetime import datetime
from zoneinfo import ZoneInfo
from dotenv import load_dotenv
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors
import ee
from georeader.readers import ee_query
import folium
import geemap.foliumap as geemap
import shapely
from shapely.geometry import box, Point, MultiPoint
from shapely.ops import nearest_points


from ml4floods.data import utils
from ml4floods.visualization import plot_utils

from db_utils import DB

# Uncomment this to suppress deprecation warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 

# Set bucket will not be requester pays
utils.REQUESTER_PAYS_DEFAULT = False

## Load environment and project details

As with the other notebooks, we load credentials and project details from a hidden ```.env``` file.

In [None]:
# Load environment variables (including path to credentials) from '.env' file
env_file_path = "../.env"

# Uncomment for alternative version for Windows (r"" indicates raw string)
#env_file_path = r"C:/Users/User/floodmapper/.env"

assert load_dotenv(dotenv_path=env_file_path) == True, "[ERR] failed to load environment!"
assert "GOOGLE_APPLICATION_CREDENTIALS" in os.environ, "[ERR] missing $GOOGLE_APPLICATION_CREDENTIAL!"
assert "GS_USER_PROJECT" in os.environ, "[ERR] missing $GS_USER_PROJECT!"
key_file_path = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
assert os.path.exists(key_file_path), f"[ERR] Google credential key file does not exist: \n{key_file_path} "
assert "ML4FLOODS_BASE_DIR" in os.environ, "[ERR] missing $ML4FLOODS_BASE_DIR!"
base_path = os.environ["ML4FLOODS_BASE_DIR"]
assert os.path.exists(base_path), f"[ERR] base path does not exist: \n{base_path} "
bucket_name = os.environ["BUCKET_URI"]
assert bucket_name is not None and bucket_name != "", f"Bucket name not defined {bucket_name}"

print("[INFO] Successfully loaded FloodMapper environment.")

In [None]:
# Connect to the database (point to the .env file for credentials)
db_conn = DB(env_file_path)

In [None]:
# Initialise the Google Earth Engine connection.
# Follow instructions on login prompt, if required.
ee.Initialize()

## Retrieve the session parameters from the database

Edit the following cell to set the session name and retrieve the flood map parameters, including mapping grid and affected LGAs.

In [None]:
# EDIT THIS CELL: All work is conducted under a unique session name
session_name = "EMSR586"

In [None]:
tz = ZoneInfo("UTC")
midnight = datetime.min.time()

# Query the floodmapping parameters from the DB
query = (f"SELECT flood_date_start, flood_date_end, "
         f"ref_date_start, ref_date_end, bucket_uri "
         f"FROM session_info "
         f"WHERE session = %s;")
data = (session_name,)
session_df = db_conn.run_query(query, data, fetch=True)
flood_start_date = session_df.iloc[0]["flood_date_start"]
flood_start_date = datetime.combine(flood_start_date, midnight).replace(tzinfo=tz)
flood_end_date = session_df.iloc[0]["flood_date_end"]
flood_end_date = datetime.combine(flood_end_date, midnight).replace(tzinfo=tz)
#ref_start_date = session_df.iloc[0]["ref_date_start"]
#ref_start_date = datetime.combine(ref_start_date, midnight).replace(tzinfo=tz)
#ref_end_date = session_df.iloc[0]["ref_date_end"]
#ref_end_date = datetime.combine(ref_end_date, midnight).replace(tzinfo=tz)
bucket_uri = session_df.iloc[0]["bucket_uri"]

# Query the selected grid positions and LGAs
query = (f"SELECT sp.patch_name, ST_AsText(gr.geometry), gr.lga_name22 "
         f"FROM session_patches sp "
         f"INNER JOIN grid_loc gr "
         f"ON sp.patch_name = gr.patch_name "
         f"WHERE sp.session = %s ;")
data = (session_name,)
grid_sel_df = db_conn.run_query(query, data, fetch=True)

# Format the results into a correct GeoDataFrame
grid_sel_df['geometry'] = gpd.GeoSeries.from_wkt(grid_sel_df['st_astext'])
grid_sel_df.drop(['st_astext'], axis=1, inplace = True)
grid_sel_gdf = gpd.GeoDataFrame(grid_sel_df, geometry='geometry', crs="EPSG:4326")
grid_sel_gdf.drop_duplicates(subset=["patch_name"], inplace=True)
print(f"[INFO] {len(grid_sel_gdf)} grid patches selected.")

# Query the affected LGA shapes
lgas_sel_lst = grid_sel_df.lga_name22.unique().tolist()
query = (f"SELECT DISTINCT lga_name22, ST_AsText(geometry_col) "
         f"FROM lgas_info "
         f"WHERE lga_name22 IN %s ;")
data = (tuple(lgas_sel_lst),)
lgas_sel_df = db_conn.run_query(query, data, fetch=True)

# Format the results into a correct GeoDataFrame
lgas_sel_df['geometry'] = gpd.GeoSeries.from_wkt(lgas_sel_df['st_astext'])
lgas_sel_df.drop(['st_astext'], axis=1, inplace = True)
lgas_sel_gdf = gpd.GeoDataFrame(lgas_sel_df, geometry='geometry', crs="EPSG:4326")
print(f"[INFO] {len(lgas_sel_gdf)} LGAs affected.")

## Plot the mapping grid and overlapping LGAs 

Use this map as an aid to choose which LGA to validate. Display LGA names by hovering the mouse cursor over the map.

In [None]:
# Overplot the LGAs on the selected mapping grid
m = grid_sel_gdf.explore(style_kwds={"fillOpacity": 0.00}, color="red", 
                         name="Grid Patches", highlight=False)
lgas_sel_gdf.explore(m=m, style_kwds={"fillOpacity": 0.01}, name="LGAs")
folium.LayerControl(collapsed=False).add_to(m)
m

## Choose the LGAs to validate

Edit the next cell to choose a subset of LGAs over which to load the flood extent map. Set an empty list to load the full flood map.

**WARNING: loading too large an area will likely fail due to memory limits.**

In [None]:
# EDIT THIS CELL: Set the list of LGAs to load or leave blank [] to display whole map
selected_lgas = []
#selected_lgas = ["Newcastle", "Maitland"]

In [None]:
# Extract the outer bounds of the chosen area
if len(selected_lgas) == 0:
    # If no LGAs specified, select the bounds of all patches
    print("[INFO] Selecting bounds of all patches.")
    boundary_box = box(*grid_sel_gdf.total_bounds)
else:
    # Or select the bounds of chosen LGAs
    print("[INFO] Selecting bounds of chosen LGAs.")
    lgas_filtered_gdf = lgas_sel_gdf[lgas_sel_gdf.lga_name22.isin(selected_lgas)]
    boundary_box = box(*lgas_filtered_gdf.total_bounds)
boundary_gdf = gpd.GeoDataFrame(geometry=[boundary_box], crs=grid_sel_gdf.crs)

**Select the patch nearest the centre to plot a time-series.**

In [None]:
# Use patch centroids to select patch_name
grid_sel_gdf["cents"] = grid_sel_gdf.centroid
patch_cents = MultiPoint(grid_sel_gdf["cents"].to_list())
nearest_point = nearest_points(boundary_gdf.centroid, patch_cents)[1]
nearest_idx = grid_sel_gdf["cents"] == nearest_point.values[0]
nearest_patch_name = grid_sel_gdf[nearest_idx].patch_name.values[0]
print(f"[INFO] Closest patch to centre is {nearest_patch_name}")

## Visualise data availability and cloud-cover

In [None]:
# Select the date and cloud cover of available imagery
query = (f"SELECT image_id, satellite, cloud_probability, solarday, status "
         f"FROM image_downloads "
         f"WHERE patch_name = %s;")
data = (nearest_patch_name,)
downloads_df = db_conn.run_query(query, data, fetch=True)
print(f"[INFO] Found {len(downloads_df)} images for {nearest_patch_name}.")

# Plot the time-series
fig, ax = plt.subplots(1,1, figsize=(15,5))
for sat, downloaded in product(["Landsat","S2"], [1, 0]):
    label=f"{sat} ({'' if downloaded else 'NOT '}Downloaded)"
    color = "C0" if sat =="S2" else "C1"
    marker = "o" if downloaded else "x"
    selection_mask = (downloads_df.satellite == sat) & (downloads_df.status == downloaded)
    ax.scatter(x=downloads_df[selection_mask].solarday, 
               y=downloads_df[selection_mask].cloud_probability,
               label=label, c=color, marker=marker, s=100)
    
# Format plot to look nice
ax.legend()
downloads_df.plot(x="solarday", y="cloud_probability", ax=ax,legend=None)
ax.axvspan(flood_start_date, flood_end_date, alpha=0.05)
datespan = abs(flood_end_date - flood_start_date)
ax.set_xlim(flood_start_date - datespan *.1, flood_end_date + datespan *.1)
plt.ylabel("Mean Cloud Coverage (%)")
plt.xlabel("Date")
plt.title(f"Available Data During Flood for Patch {nearest_patch_name}.")
plt.grid()

## Load the floodmap of the selected area

First load the floodmap and clip to the selected area.

In [None]:
# Query the floodmap name on GCP
query = (f"SELECT data_path FROM postproc_spatial "
         f"WHERE session = %s "
         f"AND mode = %s;")
data = (session_name, "flood")
tmp_df = db_conn.run_query(query, data, fetch=True)
floodmap_path = tmp_df.iloc[0]["data_path"]

# Load the flood map from GCP
print(f"[INFO] Loading the floodmap:\n\t{floodmap_path}")
floodmap = utils.read_geojson_from_gcp(floodmap_path)

# Clip the floodmap at the boundary and delete the original
floodmap_clip = floodmap.clip(boundary_gdf)
del floodmap
gc.collect()

# Plot the floodmap
plot_utils.plot_floodmap(floodmap_clip) 

## Query the available satellite imagery during the flood period

In [None]:
# Format the boundary into a multipolygon
region = boundary_gdf.unary_union

# Run a GEE query for Landsat and Sentinel-2 data.
#  producttype can be 'both', 'S2', "Landsat", "L8" or "L9".
#  add_s2cloudless adds a column that indicates if the s2cloudless image is available.
flood_images_gee, flood_collection = ee_query.query(
    area=region, 
    date_start=flood_start_date, 
    date_end=flood_end_date,                                                   
    producttype="both", 
    return_collection=True, 
    add_s2cloudless=True)

# Print data about the available images
num_images = flood_images_gee.shape[0]
print(f"[INFO] Found {num_images} flooding images on archive.")

## Plot the floodmap over the satellite data

**Building the map in the next cell may take some time.**

In [None]:
# Filter the map for 'flood_trace' and 'water' polygons only
floodmap = floodmap_clip.loc[floodmap_clip['class'].apply(lambda x: x in ['flood_trace', 'water'])]
categories = floodmap["class"].unique()
print("[INFO] Categories in file: ", categories)

# Set the colourmap for the maps
COLORS = {
    'cloud': "gray",
    'flood_trace': "turquoise",
    'water': "blue"
}
cmap = matplotlib.colors.ListedColormap([COLORS[b] for b in categories])

# Intialise the OpenStreetMap base layer
m = geemap.Map(location=region.centroid.coords[0][-1::-1], zoom_start=10)

# Add the satellite data for each day
for (day, satellite), images_day in flood_images_gee.groupby(["solarday", "satellite"]):    
    image_col_day_sat = flood_collection.filter(ee.Filter.inList("title", images_day.index.tolist()))    
    bands = ["B11","B8","B4"] if satellite.startswith("S2") else ["B6","B5","B4"]
    m.addLayer(image_col_day_sat, 
               {"min":0, "max":3000 if satellite.startswith("S2") else 0.3, "bands": bands},
               f"{satellite}: {day}",
               False)
    
# Overlay the floodmap and the boundary
floodmap.explore(m=m, column="class", cmap=cmap, categories=categories, 
                 name="Flood Extent Map", style_kwds={"fillOpacity": 0.3})
boundary_gdf.explore(m=m, style_kwds={"fillOpacity": 0.0}, color="black", 
                     name="Boundary", highlight=False)

# Add the layer control
folium.LayerControl(collapsed=False).add_to(m)
print("[INFO] Map ready: execute next cell to display.")

In [None]:
# Show the map
m