# Query and Visualise Available Satellite Data

This notebook presents a workflow to query and visualise the Sentinel-2 and Landsat data available in Google Earth Engine under an area of interest (AOI) and over a specified time range.

In [1]:
# Necessary Python modules
import sys
import os
os.environ['USE_PYGEOS'] = '0'

from ml4floods.data import utils
import geopandas as gpd
from georeader.readers import ee_query, scihubcopernicus_query
import folium
from datetime import datetime, timezone, timedelta
import ee
import geopandas as gpd
import pandas as pd
import folium
from zoneinfo import ZoneInfo
import geemap.foliumap as geemap
from georeader.readers import query_utils
import folium
from georeader.readers import S2_SAFE_reader

from dotenv import load_dotenv

## Load environment and project details

As with the other notebooks, we load credentials and project details from a hidden ```.env``` file.

In [2]:
# Load environment variables (including path to credentials) from '.env' file
env_file_path = "../../.env"

assert load_dotenv(dotenv_path=env_file_path) == True, "[ERR] Failed to load environment!"
assert "GOOGLE_APPLICATION_CREDENTIALS" in os.environ, "[ERR] Missing $GOOGLE_APPLICATION_CREDENTIAL!"
assert "GS_USER_PROJECT" in os.environ, "[ERR] Missing $GS_USER_PROJECT!"
key_file_path = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
assert os.path.exists(key_file_path), f"[ERR] Google credential key file does not exist: \n{key_file_path} "
assert "ML4FLOODS_BASE_DIR" in os.environ, "[ERR] Missing $ML4FLOODS_BASE_DIR!"
base_path = os.environ["ML4FLOODS_BASE_DIR"]
assert os.path.exists(base_path), f"[ERR] Base path does not exist: \n{base_path} "
print("[INFO] Successfully loaded FloodMapper environment.")

[INFO] Successfully loaded FloodMapper environment.


Set the details of the event and mapping session here.

In [3]:
# All work is conducted under a unique session name
session_name = "EMSR586"

# Flooding date range (UTC)
# May need to start day or two earlier
flood_start_date = "2022-07-01"
flood_end_date = "2022-07-24"

# Pre-flood date range
# This is a time period before the flood event to inspect reference data 
preflood_start_date = "2022-06-15"
preflood_end_date = "2022-06-25"

## Parse and check date information

We assume the UTC timezone for all date queries.

In [4]:
# First parse the pre- and post-flood dates
tz = ZoneInfo("UTC")

_start = datetime.strptime(flood_start_date,"%Y-%m-%d").replace(tzinfo=tz)
_end = datetime.strptime(flood_end_date,"%Y-%m-%d").replace(tzinfo=tz)
flood_start_period, flood_end_period = sorted([_start, _end])
flood_duration = flood_end_period - flood_start_period
print(f"[INFO] Flood search period: \n\t{flood_start_period} to \n\t{flood_end_period}")
print(f"[INFO] Flood duration = {flood_duration}\n")

_start = datetime.strptime(preflood_start_date,"%Y-%m-%d").replace(tzinfo=tz)
_end = datetime.strptime(preflood_end_date,"%Y-%m-%d").replace(tzinfo=tz)
preflood_start_period, preflood_end_period = sorted([_start, _end])
preflood_duration = preflood_end_period - preflood_start_period
print(f"[INFO] Pre-flood search period: \n\t{preflood_start_period} to \n\t{preflood_end_period}")
print(f"[INFO] Pre-flood duration = {preflood_duration}\n")
margin = flood_start_period - preflood_end_period
print(f"[INFO] Margin before flood = {margin}\n")

[INFO] Flood search period: 
	2022-07-01 00:00:00+00:00 to 
	2022-07-24 00:00:00+00:00
[INFO] Flood duration = 23 days, 0:00:00

[INFO] Pre-flood search period: 
	2022-06-15 00:00:00+00:00 to 
	2022-06-25 00:00:00+00:00
[INFO] Pre-flood duration = 10 days, 0:00:00

[INFO] Margin before flood = 6 days, 0:00:00



## Load the gridded AOIs to be mapped

Here we load the gridded AoIs from the GCP bucket. We created this file in the previous notebook.

In [5]:
# Bucket Name
bucket_name = "gs://ml4floods_nema"

# Gridded AoI filename
grid_aoi_file = "patches_to_map.geojson"

# Form the session path and output path on the GCP bucket
session_path = os.path.join(bucket_name, "0_DEV/1_Staging/operational", session_name)
grid_aoi_path = os.path.join(session_path, grid_aoi_file)
grid_aois = utils.read_geojson_from_gcp(grid_aoi_path)
print(f"[INFO] Loaded gridded_aois from the following file:\n\t{grid_aoi_path}")
grid_aois

[INFO] Loaded gridded_aois from the following file:
	gs://ml4floods_nema/0_DEV/1_Staging/operational/EMSR586/patches_to_map.geojson


Unnamed: 0,name,geometry
0,GRID09881,"POLYGON ((150.47329 -33.10516, 150.47329 -32.8..."
1,GRID09881,"POLYGON ((150.47329 -33.10516, 150.47329 -32.8..."
2,GRID09882,"POLYGON ((150.47329 -32.90516, 150.47329 -32.6..."
3,GRID09882,"POLYGON ((150.47329 -32.90516, 150.47329 -32.6..."
4,GRID09882,"POLYGON ((150.47329 -32.90516, 150.47329 -32.6..."
...,...,...
60,GRID11058,"POLYGON ((152.07329 -32.90516, 152.07329 -32.6..."
61,GRID11059,"POLYGON ((152.07329 -32.70516, 152.07329 -32.4..."
62,GRID11059,"POLYGON ((152.07329 -32.70516, 152.07329 -32.4..."
63,GRID11205,"POLYGON ((152.27329 -32.90516, 152.27329 -32.6..."


In [6]:
# Merge the grid patches to form an outline (MultiPolygon)
aoi_outline_df = grid_aois.geometry.unary_union
aoi_outline_gdf = gpd.GeoDataFrame(geometry=[aoi_outline_df], crs="EPSG:4326")

# Plot the grid and outline on a Leaflet map
m = grid_aois.explore(style_kwds={"fillOpacity": 0.3}, name="Grid Patches")
aoi_outline_gdf.explore(m=m, color="red", style_kwds={"fillOpacity": 0.0}, name="AoI Outline")
folium.LayerControl(collapsed=False).add_to(m)
m

  return lib.unary_union(collections, **kwargs)


In [7]:
# Check if any grid patches are duplicates
are_duplicates = grid_aois.duplicated().any()
print(f"Are any grid names duplicates? -> {are_duplicates}")

Are any grid names duplicates? -> True


In [8]:
# Drop any duplicates now
grid_aois.drop_duplicates(inplace=True)
grid_aois.duplicated().any()

False

## Query what images are available in Google Earth Engine

In [9]:
%%time

# Run a GEE query for Landsat and Sentinel-2 data.
#  producttype can be 'both', 'S2', "Landsat", "L8" or "L9".
#  add_s2cloudless adds a column that indicates if the s2cloudless image is available .
flood_images_gee, flood_collection = ee_query.query(
    area=aoi_outline_df, 
    date_start=flood_start_period, 
    date_end=flood_end_period,                                                   
    producttype="both", 
    return_collection=True, 
    add_s2cloudless=True)

# Print data about the available images
num_images = flood_images_gee.shape[0]
print(f"[INFO] Found {num_images} flooding images on archive.")

[INFO] Found 54 flooding images on archive.
CPU times: user 591 ms, sys: 32.9 ms, total: 624 ms
Wall time: 9.64 s


  return lib.intersection(a, b, **kwargs)
  return lib.difference(a, b, **kwargs)


Show selected columns from the table for context.

In [10]:
# Print selected columns from the table
flood_images_gee[["overlappercentage", 
                  "cloudcoverpercentage", 
                  "utcdatetime",                    
                  "localdatetime",
                  "solardatetime",
                  "solarday",
                  "satellite"]]

Unnamed: 0_level_0,overlappercentage,cloudcoverpercentage,utcdatetime,localdatetime,solardatetime,solarday,satellite
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
S2B_MSIL1C_20220703T000229_N0400_R030_T56HLJ_20220703T010154,48.302627,99.243546,2022-07-03 00:05:58.729000+00:00,2022-07-03 00:05:58.729000+00:00,2022-07-03 10:10:49.627669+00:00,2022-07-03,S2B
S2B_MSIL1C_20220703T000229_N0400_R030_T56HKJ_20220703T010154,29.014953,96.160172,2022-07-03 00:06:02.179000+00:00,2022-07-03 00:06:02.179000+00:00,2022-07-03 10:10:53.077669+00:00,2022-07-03,S2B
S2B_MSIL1C_20220703T000229_N0400_R030_T56HLK_20220703T010154,25.238639,64.80522,2022-07-03 00:05:44.350000+00:00,2022-07-03 00:05:44.350000+00:00,2022-07-03 10:10:35.248669+00:00,2022-07-03,S2B
S2B_MSIL1C_20220703T000229_N0400_R030_T56HKK_20220703T010154,7.825712,93.82663,2022-07-03 00:05:47.617000+00:00,2022-07-03 00:05:47.617000+00:00,2022-07-03 10:10:38.515669+00:00,2022-07-03,S2B
S2B_MSIL1C_20220703T000229_N0400_R030_T56HMJ_20220703T010154,5.271817,99.195938,2022-07-03 00:05:53.409000+00:00,2022-07-03 00:05:53.409000+00:00,2022-07-03 10:10:44.307669+00:00,2022-07-03,S2B
S2B_MSIL1C_20220703T000229_N0400_R030_T56HMK_20220703T010154,2.135637,30.491144,2022-07-03 00:05:40.567000+00:00,2022-07-03 00:05:40.567000+00:00,2022-07-03 10:10:31.465669+00:00,2022-07-03,S2B
S2A_MSIL1C_20220704T235241_N0400_R130_T56HLJ_20220705T011811,23.388043,99.99966,2022-07-04 23:56:10.834000+00:00,2022-07-04 23:56:10.834000+00:00,2022-07-05 10:01:01.732669+00:00,2022-07-05,S2A
S2A_MSIL1C_20220704T235241_N0400_R130_T56HMJ_20220705T011811,8.382048,99.998089,2022-07-04 23:56:07.687000+00:00,2022-07-04 23:56:07.687000+00:00,2022-07-05 10:00:58.585669+00:00,2022-07-05,S2A
S2A_MSIL1C_20220704T235241_N0400_R130_T56HLK_20220705T011811,7.109774,99.917626,2022-07-04 23:55:56.176000+00:00,2022-07-04 23:55:56.176000+00:00,2022-07-05 10:00:47.074669+00:00,2022-07-05,S2A
S2A_MSIL1C_20220704T235241_N0400_R130_T56HMK_20220705T011811,2.835072,99.994446,2022-07-04 23:55:52.988000+00:00,2022-07-04 23:55:52.988000+00:00,2022-07-05 10:00:43.886669+00:00,2022-07-05,S2A


In [11]:
# Run the same query for the pre-flood period
preflood_images_gee, preflood_collection = ee_query.query(
    area=aoi_outline_df, 
    date_start=preflood_start_period, 
    date_end=preflood_end_period,                                                   
    producttype="both", 
    return_collection=True, 
    add_s2cloudless=True)
num_images = preflood_images_gee.shape[0]
print(f"[INFO] Found {num_images} pre-flood images on archive.")

[INFO] Found 27 pre-flood images on archive.


  return lib.intersection(a, b, **kwargs)
  return lib.difference(a, b, **kwargs)


Show selected columns from the table for context.

In [12]:
# Print selected columns from the table
preflood_images_gee[["overlappercentage", 
                     "cloudcoverpercentage", 
                     "utcdatetime",                    
                     "localdatetime",
                     "solardatetime",
                     "solarday",
                     "satellite"]]

Unnamed: 0_level_0,overlappercentage,cloudcoverpercentage,utcdatetime,localdatetime,solardatetime,solarday,satellite
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
LC09_L1TP_089083_20220615_20220616_02_T1,75.232919,1.52,2022-06-15 23:43:21.074000+00:00,2022-06-15 23:43:21.074000+00:00,2022-06-16 09:48:11.972669+00:00,2022-06-16,LC09
LC09_L1TP_089082_20220615_20220616_02_T1,19.259487,0.87,2022-06-15 23:42:57.124000+00:00,2022-06-15 23:42:57.124000+00:00,2022-06-16 09:47:48.022669+00:00,2022-06-16,LC09
S2A_MSIL1C_20220618T000231_N0400_R030_T56HLJ_20220618T022755,48.302613,48.123743,2022-06-18 00:06:05.037000+00:00,2022-06-18 00:06:05.037000+00:00,2022-06-18 10:10:55.935669+00:00,2022-06-18,S2A
S2A_MSIL1C_20220618T000231_N0400_R030_T56HKJ_20220618T022755,29.014953,0.741019,2022-06-18 00:06:08.482000+00:00,2022-06-18 00:06:08.482000+00:00,2022-06-18 10:10:59.380669+00:00,2022-06-18,S2A
S2A_MSIL1C_20220618T000231_N0400_R030_T56HLK_20220618T022755,25.238639,86.092418,2022-06-18 00:05:50.656000+00:00,2022-06-18 00:05:50.656000+00:00,2022-06-18 10:10:41.554669+00:00,2022-06-18,S2A
S2A_MSIL1C_20220618T000231_N0400_R030_T56HKK_20220618T022755,7.825712,20.411389,2022-06-18 00:05:53.928000+00:00,2022-06-18 00:05:53.928000+00:00,2022-06-18 10:10:44.826669+00:00,2022-06-18,S2A
S2A_MSIL1C_20220618T000231_N0400_R030_T56HMJ_20220618T022755,5.340741,77.138643,2022-06-18 00:05:59.758000+00:00,2022-06-18 00:05:59.758000+00:00,2022-06-18 10:10:50.656669+00:00,2022-06-18,S2A
S2A_MSIL1C_20220618T000231_N0400_R030_T56HMK_20220618T022755,2.160807,81.630733,2022-06-18 00:05:46.866000+00:00,2022-06-18 00:05:46.866000+00:00,2022-06-18 10:10:37.764669+00:00,2022-06-18,S2A
S2B_MSIL1C_20220619T235239_N0400_R130_T56HLJ_20220620T004608,23.24618,35.061012,2022-06-19 23:56:02.117000+00:00,2022-06-19 23:56:02.117000+00:00,2022-06-20 10:00:53.015669+00:00,2022-06-20,S2B
S2B_MSIL1C_20220619T235239_N0400_R130_T56HMJ_20220620T004608,8.382048,87.412374,2022-06-19 23:55:58.971000+00:00,2022-06-19 23:55:58.971000+00:00,2022-06-20 10:00:49.869669+00:00,2022-06-20,S2B


## Visualise the available Landsat and S2 tile footprints

Here we can visualise the footprints of the Sentinel-2 and Landsat tiles within each overpass of the satellite. Turn on individual footprint annotations by toggling the checkboxes on the legend of the map.

In [13]:
# Format the date, columns to show and colours
flood_images_gee["localdatetime_str"] = flood_images_gee["localdatetime"].dt.strftime("%Y-%m-%d %H:%M:%S")
preflood_images_gee["localdatetime_str"] = preflood_images_gee["localdatetime"].dt.strftime("%Y-%m-%d %H:%M:%S")
showcolumns = ["geometry","overlappercentage","cloudcoverpercentage", "localdatetime_str","solarday","satellite"]
colors = ["#ff7777", "#fffa69", "#8fff84", "#52adf1", "#ff6ac2","#1b6d52", "#fce5cd","#705334"]

# Plot the AoI outline
m = aoi_outline_gdf.explore(style_kwds={"fillOpacity": 0.1}, color="black", name="AoI Outline")

# Add the pre-flood data
for i, ((day,satellite), images_day) in enumerate(preflood_images_gee.groupby(["solarday","satellite"])):
    m = images_day[showcolumns].explore(
        m=m, 
        name=f"{satellite}: {day}", 
        color=colors[i % len(colors)], 
        show=False)
    
# Add the flooding data
for i, ((day,satellite), images_day) in enumerate(flood_images_gee.groupby(["solarday","satellite"])):
    m = images_day[showcolumns].explore(
        m=m, 
        name=f"{satellite}: {day}", 
        color=colors[i % len(colors)], 
        show=False)
    
# Add the layer control and show
folium.LayerControl(collapsed=False).add_to(m)
m

## Visualise the available Landsat and S2 imagery

We can also directly visualise the imagery for each satellite overpass. This will help make a selection on which days to include in the flood mapping operation.

Once the map loads, click on individual Satellite + date combinations to show the optical imagery. 

*Note: the imagery can take a few seconds to load.*

In [None]:
%%time
# Intialise the OpenStreetMap base layer
m = geemap.Map(location=aoi_outline_df.centroid.coords[0][-1::-1], zoom_start=8)

# Add the pre-flood data
for (day, satellite), images_day in preflood_images_gee.groupby(["solarday", "satellite"]):    
    image_col_day_sat = preflood_collection.filter(ee.Filter.inList("title", images_day.index.tolist()))    
    bands = ["B11","B8","B4"] if satellite.startswith("S2") else ["B6","B5","B4"]
    m.addLayer(image_col_day_sat, 
               {"min":0, "max":3000 if satellite.startswith("S2") else 0.3, "bands": bands},
               f"{satellite}: {day}",
               False)
    
# Add the flooding data
for (day, satellite), images_day in flood_images_gee.groupby(["solarday", "satellite"]):    
    image_col_day_sat = flood_collection.filter(ee.Filter.inList("title", images_day.index.tolist()))    
    bands = ["B11","B8","B4"] if satellite.startswith("S2") else ["B6","B5","B4"]
    m.addLayer(image_col_day_sat, 
               {"min":0, "max":3000 if satellite.startswith("S2") else 0.3, "bands": bands},
               f"{satellite}: {day}",
               False)

aoi_outline_gdf.explore(style_kwds={"fillOpacity": 0.1}, color="black", name="AoI", m=m)
folium.LayerControl(collapsed=False).add_to(m)
m

For EMSR586 we can see that the imagery 'S2B: 2022-06-23' and 'S2A: 2022-06-25' has low cloud cover and provides a great view of the AoI before the flood event.

The imagery 'S2A: 2022-07-08' provides a clear view of the land immediately after flooding.