In [None]:
# Necessary imports
import os
import pathlib
os.environ['USE_PYGEOS'] = '0'
import numpy as np
from itertools import product
from datetime import datetime
from zoneinfo import ZoneInfo
from dotenv import load_dotenv
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors
import ee
import pandas as pd
from georeader.readers import ee_query
import folium
import geemap.foliumap as geemap

import rasterio
from rasterio.io import MemoryFile
from rasterio import Affine as A
from rasterio import CRS
#from rasterio.warp import reproject, Resampling
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.transform import from_origin

from ml4floods.data import utils
from db_utils import DB

# Uncomment this to suppress deprecation warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
from shapely.errors import ShapelyDeprecationWarning
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) 

# Set bucket will not be requester pays
utils.REQUESTER_PAYS_DEFAULT = False

## Load environment and project details

As with the other notebooks, we load credentials and project details from a hidden ```.env``` file.

In [None]:
# Load environment variables (including path to credentials) from '.env' file
env_file_path = "../.env"

# Uncomment for alternative version for Windows (r"" indicates raw string)
#env_file_path = r"C:/Users/User/floodmapper/.env"

assert load_dotenv(dotenv_path=env_file_path) == True, "[ERR] failed to load environment!"
assert "GOOGLE_APPLICATION_CREDENTIALS" in os.environ, "[ERR] missing $GOOGLE_APPLICATION_CREDENTIAL!"
assert "GS_USER_PROJECT" in os.environ, "[ERR] missing $GS_USER_PROJECT!"
key_file_path = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
assert os.path.exists(key_file_path), f"[ERR] Google credential key file does not exist: \n{key_file_path} "
assert "ML4FLOODS_BASE_DIR" in os.environ, "[ERR] missing $ML4FLOODS_BASE_DIR!"
base_path = os.environ["ML4FLOODS_BASE_DIR"]
assert os.path.exists(base_path), f"[ERR] base path does not exist: \n{base_path} "
bucket_name = os.environ["BUCKET_URI"]
assert bucket_name is not None and bucket_name != "", f"Bucket name not defined {bucket_name}"

print("[INFO] Successfully loaded FloodMapper environment.")

In [None]:
# Connect to the database (point to the .env file for credentials)
db_conn = DB(env_file_path)

In [None]:
# Initialise the Google Earth Engine connection.
# Follow instructions on login prompt, if required.
ee.Initialize()

In [None]:
# EDIT THIS CELL: All work is conducted under a unique session name
session_name = "boulia_test"

## Retrieve the session parameters from the database

Edit the following cell to set the session name and retrieve the flood map parameters, including mapping grid and affected LGAs.

In [None]:
tz = ZoneInfo("UTC")
midnight = datetime.min.time()

# Query the floodmapping parameters from the DB
query = (f"SELECT flood_date_start, flood_date_end, "
         f"ref_date_start, ref_date_end, bucket_uri "
         f"FROM session_info "
         f"WHERE session = %s;")
data = (session_name,)
session_df = db_conn.run_query(query, data, fetch=True)
flood_start_date = session_df.iloc[0]["flood_date_start"]
flood_start_date = datetime.combine(flood_start_date, midnight).replace(tzinfo=tz)
flood_end_date = session_df.iloc[0]["flood_date_end"]
flood_end_date = datetime.combine(flood_end_date, midnight).replace(tzinfo=tz)
ref_start_date = session_df.iloc[0]["ref_date_start"]
ref_start_date = datetime.combine(ref_start_date, midnight).replace(tzinfo=tz)
ref_end_date = session_df.iloc[0]["ref_date_end"]
ref_end_date = datetime.combine(ref_end_date, midnight).replace(tzinfo=tz)
bucket_uri = session_df.iloc[0]["bucket_uri"]

# Query the selected grid positions and LGAs
query = (f"SELECT sp.patch_name, ST_AsText(gr.geometry), gr.lga_name22 "
         f"FROM session_patches sp "
         f"INNER JOIN grid_loc gr "
         f"ON sp.patch_name = gr.patch_name "
         f"WHERE sp.session = %s ;")
data = (session_name,)
grid_sel_df = db_conn.run_query(query, data, fetch=True)

# Format the results into a correct GeoDataFrame
grid_sel_df['geometry'] = gpd.GeoSeries.from_wkt(grid_sel_df['st_astext'])
grid_sel_df.drop(['st_astext'], axis=1, inplace = True)
grid_sel_gdf = gpd.GeoDataFrame(grid_sel_df, geometry='geometry', crs="EPSG:4326")
grid_sel_gdf.drop_duplicates(subset=["patch_name"], inplace=True)
print(f"[INFO] {len(grid_sel_gdf)} grid patches selected.")

# Query the affected LGA shapes
lgas_sel_lst = grid_sel_df.lga_name22.unique().tolist()
query = (f"SELECT DISTINCT lga_name22, ST_AsText(geometry_col) "
         f"FROM lgas_info "
         f"WHERE lga_name22 IN %s ;")
data = (tuple(lgas_sel_lst),)
lgas_sel_df = db_conn.run_query(query, data, fetch=True)

# Format the results into a correct GeoDataFrame
lgas_sel_df['geometry'] = gpd.GeoSeries.from_wkt(lgas_sel_df['st_astext'])
lgas_sel_df.drop(['st_astext'], axis=1, inplace = True)
lgas_sel_gdf = gpd.GeoDataFrame(lgas_sel_df, geometry='geometry', crs="EPSG:4326")
print(f"[INFO] {len(lgas_sel_gdf)} LGAs affected.")

## Select the patch and grid

In [None]:
# Set the patch to be processed
# This is a patch with blanks pixels in the 1st S2 image
patch_name = "GRID22412"

# Select the list of flood-maps during the time range
# Order by satellite so as S2 are first
query = (f"SELECT DISTINCT satellite, date, data_path "
         f"FROM inference "
         f"WHERE patch_name = %s "
         f"AND date >= %s "
         f"AND date <= %s "
         f"AND mode = %s "
         f"AND status = %s "
         f"ORDER BY satellite DESC, date ASC")
data = [patch_name, flood_start_date, flood_end_date, 'pred', 1]
geojsons_df = db_conn.run_query(query, data, fetch=True)
num_files = len(geojsons_df)
print(f"[INFO] Found {num_files} flood maps in the database.")
geojsons_df

In [None]:
# Select the geometry of the patch
query = (f"SELECT patch_name, ST_AsText(geometry) "
         f"FROM grid_loc "
         f"WHERE patch_name = %s;")
data = [patch_name]
grid_df = db_conn.run_query(query, data, fetch= True)
print(f"[INFO] Returned {len(grid_df)} rows.")

# Format the results into a correct GeoDataFrame
grid_df['geometry'] = gpd.GeoSeries.from_wkt(grid_df['st_astext'])
grid_df.drop(['st_astext'], axis=1, inplace = True)
grid_gdf = gpd.GeoDataFrame(grid_df, geometry='geometry', crs="EPSG:4326")
grid_gdf.head(3)

In [None]:
# Create a transformation given the pixel scale and coordinates
geom = grid_gdf.loc[0, 'geometry']
npix = 2500
west = geom.bounds[0]
south = geom.bounds[1]
east = geom.bounds[2]
north = geom.bounds[3]
xsize = abs(east - west)/npix
ysize = abs(north - south)/npix

# Get the transformation from_origin(west, north, xsize, ysize)
transform = from_origin(west, north, xsize, ysize)

In [None]:
# Create a template raster file
#https://gis.stackexchange.com/questions/279953/numpy-array-to-gtiff-using-rasterio-without-source-raster
arr = np.random.randint(5, size=(npix, npix)).astype(np.uint16)

new_dataset = rasterio.open('GRID22412_template.tif', 'w', 
                            driver='GTiff',
                            height = arr.shape[0], 
                            width = arr.shape[1],
                            count=1, 
                            dtype=str(arr.dtype),
                            crs=CRS.from_epsg("4326"),
                            transform=transform)
new_dataset.write(arr, 1)
new_dataset.close()