# Demo - Generating Groundtruth from S2 images and FloodMaps

In [1]:
import sys, os
from pyprojroot import here

# spyder up to find the root
root = here(project_files=[".here"])

# append to path
sys.path.append(str(here()))

import logging
import json
from src.data.create_gt import (
    generate_water_cloud_binary_gt,
    generate_land_water_cloud_gt,
)
import pandas as pd
import numpy as np
from rasterio import features
import rasterio
import geopandas as gpd
import os
from shapely.ops import cascaded_union
from src.data.utils import filter_pols, filter_land
from typing import Optional, Dict, Tuple
from src.data.config import BANDS_S2, CODES_FLOODMAP, UNOSAT_CLASS_TO_TXT

from google.cloud import storage
import json
import rasterio.windows


from rasterio.plot import show as rasterio_plot
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

## PseudoCode

1. Get the Name of the Files in Directory
2. Loop through the files
    * Get the S2 Image
    * Get the floodmap
    * Get the metadata
3. Generate the FloodMap
    * WorldFloods 1.1 - 3-class
    * WorldFloods 2.0 - dual channel, binary (cloud, water)
4. Save them in the bucket
    * declare ML stuffs (train,test,val)
    * save the S2 Image
    * save the groundtruth mask
    * save the floodmap
    * save the meta


## 1 - Get the Name of the Files in Directory

In [2]:
from src.data.utils import get_files_in_bucket_directory, add_gcp_prefix, parse_gcp_path, copy_file_between_gcpbuckets
from collections import namedtuple
from pathlib import Path

In [3]:
path_loop = namedtuple("path_loop", ["bucket_id", "parent_path", "ml_path", "file_type"])


def create_full_parent_path(path_loop, gcp_prefix: bool=False):
    
    full_path = Path(path_loop.parent_path).joinpath(path_loop.ml_path).joinpath(path_loop.file_type)
    
    if gcp_prefix:
        full_path = add_gcp_prefix(full_path, path_loop.bucket_id)
    
    return str(full_path)

In [4]:
ml_paths = ["test", "train", "val"]

demo_path = path_loop("ml4floods", "worldfloods/public", "test", "S2")

full_parent_path = create_full_parent_path(demo_path, True)
print(full_parent_path)

gs://ml4floods/worldfloods/public/test/S2


In [5]:
from pprint import pprint

files_in_bucket = get_files_in_bucket_directory("ml4floods", directory="worldfloods/public/test/S2", suffix=".tif")

pprint(files_in_bucket)

['worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif',
 'worldfloods/public/test/S2/EMSR286_09ITUANGOSOUTH_DEL_MONIT02_v1_observed_event_a.tif',
 'worldfloods/public/test/S2/EMSR333_01RATTALORO_DEL_MONIT01_v1_observed_event_a.tif',
 'worldfloods/public/test/S2/EMSR333_02PORTOPALO_DEL_MONIT01_v1_observed_event_a.tif',
 'worldfloods/public/test/S2/EMSR333_13TORRECOLONNASPERONE_DEL_MONIT01_v2_observed_event_a.tif',
 'worldfloods/public/test/S2/EMSR342_06NORTHNORMANTON_DEL_v1_observed_event_a.tif',
 'worldfloods/public/test/S2/EMSR342_07SOUTHNORMANTON_DEL_MONIT03_v2_observed_event_a.tif',
 'worldfloods/public/test/S2/EMSR347_06MWANZA_DEL_v1_observed_event_a.tif',
 'worldfloods/public/test/S2/EMSR347_07ZOMBA_DEL_MONIT01_v1_observed_event_a.tif',
 'worldfloods/public/test/S2/EMSR347_07ZOMBA_DEL_v2_observed_event_a.tif',
 'worldfloods/public/test/S2/EMSR9284_01YLITORNIONORTHERN_DEL_MONIT01_v1_observed_event_a.tif']


# 0.1 - Plumbing

In [6]:
!gsutil ls gs://ml4floods/worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
!gsutil ls gs://ml4floods/worldfloods/public/test/floodmaps/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.shp
!gsutil ls gs://ml4floods/worldfloods/public/test/meta/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.json

gs://ml4floods/worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
gs://ml4floods/worldfloods/public/test/floodmaps/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.shp
gs://ml4floods/worldfloods/public/test/meta/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.json


In [None]:
# from dataclasses import dataclass, field

# def parse_gcp_path(full_path) -> Tuple[str]:
#     """Parse the bucket"""
#     # parse the components
#     bucket_id = str(Path(full_path.split("gs://")[1]).parts[0])
#     file_path = str(Path(full_path.split(bucket_id)[1]).parent)
#     file_name = str(Path(full_path).name)
#     return bucket_id, file_path, file_name

# @dataclass
# class GCPPath:
#     full_path: str
#     bucket_id : str = field(default=None)
#     parent_path : str = field(default=None)
#     file_name : str = field(default=None)
#     suffix : str = field(default=None)
    
        
#     def __init__(self, full_path):
        
        
#         self.full_path = full_path
# #         print(self.full_path)
#         self.bucket_id = str(Path(full_path.split("gs://")[1]).parts[0])
        
# #         print(self.bucket_id)
#         self.parent_path = str(Path(full_path.split(self.bucket_id)[1]).parent)[1:]
# #         print(self.parent_path)
#         self.file_name = str(Path(full_path).name)
# #         print(self.file_name)
#         self.suffix = self.file_name.split(".")[1]
# #         print(self.suffix)
        

        
#     def get_files_in_parent_directory(self,**kwargs):
#         # initialize client
#         client = storage.Client(**kwargs)
#         # get bucket
#         bucket = client.get_bucket(self.bucket_id)
#         # get blob
        
#         blobs = bucket.list_blobs(prefix=self.parent_path)
#         # check if it exists
        
#         files = ["gs://" + str(Path(self.bucket_id).joinpath(x.name)) for x in blobs ]
#         return files
#     def get_files_in_parent_directory_with_suffix(self, suffix=str, **kwargs):
#         # initialize client
#         client = storage.Client(**kwargs)
#         # get bucket
#         bucket = client.get_bucket(self.bucket_id)
#         # get blob
        
#         blobs = bucket.list_blobs(prefix=self.parent_path)
#         # check if it exists
#         files = ["gs://" + str(Path(self.bucket_id).joinpath(x.name)) for x in blobs if str(Path(x.name).suffix) == suffix]
#         return files
     
#     def transfer_file_to_bucket(self, destination_bucket_name: str, destination_file_path: str, **kwargs):
        
        
#         storage_client = storage.Client(**kwargs)
#         source_bucket = storage_client.get_bucket(self.bucket_id)
#         source_blob = source_bucket.blob(self.get_file_path())
        
#         destination_bucket = storage_client.get_bucket(destination_bucket_name)
        
#         destination_blob_name = str(Path(destination_file_path).joinpath(self.file_name))
#         # copy to new destination
#         new_blob = source_bucket.copy_blob(
#             source_blob, destination_bucket, destination_blob_name
#         )

        
#         return self
        
    
# #         return get_files_in_bucket_directory(self.bucket_id, directory=self.parent_path, suffix=self.suffix)
    
#     def get_file_path(self):
#         return str(Path(self.parent_path).joinpath(self.file_name))
    
#     def replace_bucket(self, bucket_id):
#         self.bucket_id = bucket_id
#         return self
    
#     def replace_file_name(self, file_name):
#         self.file_name = file_name
#         return self
    
#     def replace(self, original: str, replacement: str):
        
#         full_path =  self.full_path.replace(original, replacement)
        
# #         self.__init__(full_path)
        
#         return GCPPath(full_path)

In [None]:
save_file_to_bucket()

In [257]:
# from typing import Optional, Dict



# def save_groundtruth_tiff_rasterio(image_gt: np.ndarray, destination_path: str, gt_meta: Optional[Dict]=None, **kwargs)-> None:
#     """Save image as tiff with rasterio
    
#     Args:
#         image (np.ndarray): image to be saved
#             image should be of size (n_channels, height, width)
#         destination_path (str): path where the image is saved
#         gt_meta Optional[Dict]: a dictionary of extra metadata to be saved in 
#             the tags
#         **kwargs
    
#     Examples:
#         >>> destination_path = "./temp.tif"
#         >>> # path to image for coordinates
#         >>> with rasterio.open(s2_image_path) as src_s2:
#         >>>    crs = src_s2.crs
#         >>>    transform = src_s2.transform
#         >>> save_groundtruth_tiff_rasterio(
#             gt, destination_path,
#             transform=transform, crs=crs
#         )
#     """
    
#     # get image channels
#     if image_gt.ndim != 3:
#         image_gt = image_gt[None, ...]
        
#     n_channels, height, width = image_gt.shape
    
#     print(image_gt.shape)

#     # write the image
#     with rasterio.open(
#                 destination_path, 
#                 'w', 
#                 driver='COG', 
#                 height=height, 
#                 width=width,
#                 count=n_channels,
#                 dtype=image_gt.dtype, 
#                 **kwargs) as dst:
        
#         if gt_meta is not None:
#             dst.update_tags(gt_meta=gt_meta)
#         dst.write(image_gt)
#     return None

In [225]:
from rasterio.crs import CRS
from affine import Affine




In [278]:
import tqdm
from src.data.utils import save_file_to_bucket

# looping through the ML parts
ml_paths = [
    "test", 
#     "train", 
    "val"]

bucket_id = "ml4floods"
destination_bucket_id = "ml4cc_data_lake"

parent_path = "worldfloods/public"
destination_parent_path = "0_DEV/2_Mart/worldfloods_v1_1"
temp_file = "./temp_image.tif"


demo_image = "gs://ml4floods/worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif"





for ipath in ml_paths:
    # want the appropate ml path
    demo_image_gcp = GCPPath(demo_image)
    
    # ensure path name is the same as ipath for the loooop
    demo_image_gcp = demo_image_gcp.replace("test", ipath)

    # get all files in the parent directory
    files_in_bucket = demo_image_gcp.get_files_in_parent_directory_with_suffix(".tif")

    
    
    # loop through files in the bucket
    with tqdm.tqdm(files_in_bucket[:3]) as pbar: 
        for s2_image_path in pbar:
            
            
            s2_image_path = GCPPath(s2_image_path)

            
            # create floodmap path
            floodmap_path = demo_image_gcp.replace("/S2/", "/floodmaps/")
            floodmap_path = floodmap_path.replace(".tif", ".shp")
            
            # create meta path
            meta_path = demo_image_gcp.replace("/S2/", "/meta/")
            meta_path = meta_path.replace(".tif", ".json")
            
            # ==============================
            # Generate GT Image
            # ==============================
            # generate gt and gt meta
            gt, gt_meta = generate_land_water_cloud_gt(s2_image_path.full_path, floodmap_path.full_path, keep_streams=True)
            

            # ==============================
            # SAVE GT Image
            # ==============================
            pbar.set_description("Saving GT data...")


            # replace bucket path

            
            # ==============================
            # SAVE S2 Image
            # ==============================
            pbar.set_description("Saving S2 image...")
            # get parent path name
            s2_image_parent_destination = Path(destination_parent_path).joinpath(ipath).joinpath("S2")
            s2_image_path.transfer_file_to_bucket(destination_bucket_id, s2_image_parent_destination)
            
            
            # ==============================
            # SAVE Meta Data
            # ==============================
            pbar.set_description("Saving meta data...")
            # get parent path name
            meta_parent_destination = Path(destination_parent_path).joinpath(ipath).joinpath("meta")
            meta_path.transfer_file_to_bucket(destination_bucket_id, meta_parent_destination)
            
            
            # ==============================
            # SAVE FloodMap Data
            # ==============================
            pbar.set_description("Saving meta data...")
            # get parent path name
            floodmap_parent_destination = Path(destination_parent_path).joinpath(ipath).joinpath("floodmap")
            floodmap_path.transfer_file_to_bucket(destination_bucket_id, floodmap_parent_destination)
            
            # ==============================
            # SAVE GT Data
            # ==============================
            pbar.set_description("Saving GT data...")
            
            # replace parent path
            gt_path = demo_image_gcp.replace(bucket_id, destination_bucket_id)
            gt_path = gt_path.replace("/S2/", "/gt/")
            gt_path = gt_path.replace(parent_path, destination_parent_path)


            # save ground truth
            save_groundtruth_tiff_rasterio(
                gt, f"./{gt_path.file_name}", gt_meta=gt_meta, 
                crs=gt_meta["crs"], transform=gt_meta["transform"]
            )
            save_file_to_bucket(gt_path.full_path, f"./{gt_path.file_name}")
            Path(f"./{gt_path.file_name}").unlink()

            break
            
    break


            
            
#             # ==============================
#             # Generate GT Image
#             # ==============================
#             # generate gt and gt meta
#             gt, gt_meta = generate_land_water_cloud_gt(s2_image_path, floodmap_path, keep_streams=True)
            
#             # get save path
#             gt_path = s2_image_path.replace("/S2/", "/gt/").replace(parent_path, destination_parent_path).replace(bucket_id, destination_bucket_id)
            
#             # get necessary geocoordinates
#             crs, transform = _get_image_geocoords(s2_image_path)
            
            
#             # ============================
#             # SAVE ALL THE THINGS
#             # ============================
            
#             # S2 Image
            
#             # 1 - remove gcp path
#             s2_image_path_destination = remove_gcp_prefix(s2_image_path, True)
#             # 2 - replace parent path
#             s2_image_path_destination = s2_image_path_destination.replace(parent_path, destination_parent_path)
            
#             copy_file_between_gcpbuckets(s2_image_path)

            
#             # save gt image locally
#             save_groundtruth_tiff_rasterio(gt, temp_file, crs=crs, transform=transform)
            
#             # upload to the bucket
#             save_file_to_bucket(gt_path, temp_file)
            
#             # delete localfile
#             Path(temp_file).unlink()
            
#             # save gt meta
# #             save_file(gt, gt_path, crs=crs, transform=transform)
            
#         break


  0%|          | 0/3 [00:00<?, ?it/s]

gs://ml4floods/worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
gs://ml4floods/worldfloods/public/test/floodmaps/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
gs://ml4floods/worldfloods/public/test/floodmaps/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.shp
gs://ml4floods/worldfloods/public/test/meta/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
gs://ml4floods/worldfloods/public/test/meta/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.json


Saving S2 image...:   0%|          | 0/3 [01:24<?, ?it/s]

0_DEV/2_Mart/worldfloods_v1_1/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif


Saving meta data...:   0%|          | 0/3 [01:27<?, ?it/s]

0_DEV/2_Mart/worldfloods_v1_1/test/meta/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.json


Saving meta data...:   0%|          | 0/3 [01:28<?, ?it/s]

0_DEV/2_Mart/worldfloods_v1_1/test/floodmap/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.shp


Saving GT data...:   0%|          | 0/3 [01:28<?, ?it/s]  

gs://ml4cc_data_lake/worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
gs://ml4cc_data_lake/worldfloods/public/test/gt/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
gs://ml4cc_data_lake/0_DEV/2_Mart/worldfloods_v1_1/test/gt/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
(1, 2643, 2170)


Saving GT data...:   0%|          | 0/3 [01:29<?, ?it/s]


In [277]:
# 


gs://ml4cc_data_lake/worldfloods/public/test/S2/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
gs://ml4cc_data_lake/worldfloods/public/test/gt/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
gs://ml4cc_data_lake/0_DEV/2_Mart/worldfloods_v1_1/test/gt/EMSR286_08ITUANGONORTH_DEL_MONIT02_v1_observed_event_a.tif
(1, 2643, 2170)
