In [1]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/gonzalo/git/ml4floods/notebooks/data/ml4cc-general-access_request_pays.json"
os.environ["GS_USER_PROJECT"] = "ml4cc-general"

In [2]:
from ml4floods.data import utils

path_to_aois = "gs://ml4cc_data_lake/0_DEV/1_Staging/operational/EMSR570/"
fs = utils.get_filesystem(path_to_aois)

geojsons = sorted([f"gs://{f}" for f in fs.glob(f"{path_to_aois}*/WF2_unet_rbgiswirs_vec/*/*.geojson")])
len(geojsons)

1125

In [49]:
# Compute pre-flood water
from shapely.ops import unary_union
import geopandas as gpd

unique_dates2 = sorted([os.path.splitext(u)[0] for u in unique_dates])
flooding_date_pre = "2022-02-26"
flooding_date_post = "2022-03-31"

pre_flood_dates = [u for u in unique_dates2 if u < flooding_date]
aois = np.unique(sorted([g.split("/")[-4] for g in geojsons]))


def compute_cloud_coverage(path_to_file):
    data = utils.read_geojson_from_gcp(path_to_file)
    area_total = data[data["class"] == "area_imaged"].geometry.area.sum()
    area_clouds = data[data["class"] == "cloud"].geometry.area.sum()
    return float(area_clouds / area_total)


def compute_flood_water(floodmap_post_data, best_pre_flood_data):
    if floodmap_post_data.crs != best_pre_flood_data.crs:
        best_pre_flood_data = best_pre_flood_data.to_crs(floodmap_post_data.crs)
    else:
        best_pre_flood_data = best_pre_flood_data.copy()
    
    pre_flood_water = unary_union(best_pre_flood_data[best_pre_flood_data["class"] == "water"].geometry)
    geoms_flood = floodmap_post_data[floodmap_post_data["class"] == "water"].geometry.apply(lambda g: g.difference(pre_flood_water))
    geoms_flood = geoms_flood[~geoms_flood.isna()]
    data_post_flood = gpd.GeoDataFrame(geometry=geoms_flood,crs=floodmap_post_data.crs)
    data_post_flood["class"] = "water-post-flood"
    best_pre_flood_data.loc[best_pre_flood_data["class"] == "water", "class"] = "water-pre-flood"
    return pd.concat([data_post_flood, floodmap_post_data[floodmap_post_data["class"] != "water"],best_pre_flood_data], ignore_index=True)


for aoi in aois:
# for aoi in ["AOI02"]:
    # Get pre-flood floodmap with lowest cloud coverage
    floodmaps_post_aoi = []
    best_floodmap_pre = None
    cloud_cover = 1
    for g in geojsons:
        date_iter = os.path.splitext(os.path.basename(g))[0]
        
        if (f"/{aoi}/" in g) and (date_iter < flooding_date_pre):
            cc_iter = compute_cloud_coverage(g)
            if cc_iter < cloud_cover:
                best_floodmap_pre = g
                cloud_cover = cc_iter
        elif (f"/{aoi}/" in g) and (date_iter >= flooding_date_post):
            floodmaps_post_aoi.append(g)
            
    # Store the date of the pre-flood water data
    if best_floodmap_pre is None:
        print(f"No pre-flood image found for aoi:{aoi}")
        continue
    
    # Iterate over the post-flood maps, add pre-flood water and relabel water as "flooding water"
    best_pre_flood_data = utils.read_geojson_from_gcp(best_floodmap_pre)
    
    for floodmap_post in floodmaps_post_aoi:
        floodmap_post_data = utils.read_geojson_from_gcp(floodmap_post)
        floodmap_post_data_pre_post = compute_flood_water(floodmap_post_data, best_pre_flood_data)
        filename_out = floodmap_post.replace("_vec/","_vec_prepost/")
        print(f"Saving {filename_out}")
        utils.write_geojson_to_gcp(filename_out, floodmap_post_data_pre_post)


Saving gs://ml4cc_data_lake/0_DEV/1_Staging/operational/EMSR570/AOI01/WF2_unet_rbgiswirs_vec_prepost/Landsat/2022-04-04.geojson
Saving gs://ml4cc_data_lake/0_DEV/1_Staging/operational/EMSR570/AOI01/WF2_unet_rbgiswirs_vec_prepost/Landsat/2022-04-12.geojson
Saving gs://ml4cc_data_lake/0_DEV/1_Staging/operational/EMSR570/AOI01/WF2_unet_rbgiswirs_vec_prepost/S2/2022-03-31.geojson
Saving gs://ml4cc_data_lake/0_DEV/1_Staging/operational/EMSR570/AOI01/WF2_unet_rbgiswirs_vec_prepost/S2/2022-04-05.geojson
Saving gs://ml4cc_data_lake/0_DEV/1_Staging/operational/EMSR570/AOI01/WF2_unet_rbgiswirs_vec_prepost/S2/2022-04-10.geojson
Saving gs://ml4cc_data_lake/0_DEV/1_Staging/operational/EMSR570/AOI01/WF2_unet_rbgiswirs_vec_prepost/S2/2022-04-15.geojson
Saving gs://ml4cc_data_lake/0_DEV/1_Staging/operational/EMSR570/AOI02/WF2_unet_rbgiswirs_vec_prepost/Landsat/2022-04-04.geojson
Saving gs://ml4cc_data_lake/0_DEV/1_Staging/operational/EMSR570/AOI02/WF2_unet_rbgiswirs_vec_prepost/S2/2022-03-31.geojson
S

In [55]:
import numpy as np
import os
import pandas as pd

geojsons = sorted([f"gs://{f}" for f in fs.glob(f"{path_to_aois}*/WF2_unet_rbgiswirs_vec_prepost/*/*.geojson")])
print(len(geojsons))
os.makedirs("daily_EMSR570_prepost", exist_ok=True)
unique_dates = np.unique([os.path.basename(f) for f in geojsons])

for date in unique_dates:
    files_date = [f for f in geojsons if os.path.basename(f) == date]
    
    # TODO there could be 2 geojsons from the same date from the same AoI of different satellites
    file_save = []
    aois_processed = []
    for f in reversed(files_date):
        aoi = f.split("/")[-4]
        if aoi in aois_processed:
            continue
        
        data = utils.read_geojson_from_gcp(f).to_crs("EPSG:7844")
        data = data[~data.geometry.isna()].copy()
        
        file_save.append(data)
    
    file_save = pd.concat(file_save,ignore_index=True)
    name_file = date.replace("geojson","shp")
    print(f"Saving {name_file}")
    file_save.to_file(f"daily_EMSR570_prepost/{name_file}")

432
Saving 2022-03-31.shp
Saving 2022-04-03.shp
Saving 2022-04-04.shp
Saving 2022-04-05.shp
Saving 2022-04-09.shp
Saving 2022-04-10.shp
Saving 2022-04-11.shp
Saving 2022-04-12.shp
Saving 2022-04-13.shp
Saving 2022-04-14.shp
Saving 2022-04-15.shp
Saving 2022-04-19.shp
Saving 2022-04-20.shp


In [54]:
np.sum(data.geometry.isna())

9