## Shoreline extraction from Landsat MNDWI images for Vietnam (1986-2021)

This notebook extracts subpixel contours from MNDWI images, which have been processing on  Google Earth Engine (see: https://code.earthengine.google.com/9d450b3d0d9840a352cbd3ba6ddb6ca5) and quantifies coastline change along shore-perpendicular transects.

Content of the notebook:

1. Setup
2. Download MNDWI rasters from Google Drive
3. Reproject and quality check MNDWI rasters
4. Subpixel contours
5. Create minimum water extent polygon
6. Create transects
7. Calculate intersections between shorelines and transects
8. Coastline Change Analysis


### 1.| Setup

In [2]:
# Libraries
import os
import glob 
import numpy as np 
import shapely as shp
import pandas as pd 
import geopandas as gpd 
import rasterio as rio 
import matplotlib.pyplot as plt
from scipy import stats 
from skimage.filters import threshold_otsu
# my coastline methods 
from coasty import postprocess, analysis 

In [3]:
# Directories
data_dir = os.path.join(os.getcwd(),"data") # path to data-folder with aux data
plot_dir = os.path.join(os.getcwd(),"figures/plots")
proc_tiles_path = os.path.join(os.path.join(data_dir,"VN_processing_polygons")) # path to processing tiles
country_bounds_path = os.path.join(data_dir,"VN_country_bounds_gov") # path to country bounds
transects_path = os.path.join(data_dir,"VN_transects_gov_2km_200m")
osm_sl_path = os.path.join(os.path.join(data_dir,"VN_osm_coastline")) # path to reference shoreline
buffer_path = os.path.join(os.path.join(data_dir,"VN_buffer_5km"))

# Params
export_folder = "GEE_vietnam"       # folder on Google Drive with GEE images to download
crs = "EPSG:3857"                   # coordinate system code of a projected crs 
min_length = 3000                   # min length of shoreline to keep [m]
buffer_dist = 5000                  # buffer around reference shorelines to clip detected shorelines [m]
transect_len = 3000                 # length of transects [m]
transect_dist = 200                 # distance between transects [m]
transect_min_line_length = 10000    # min legnth of polygon outline at which to draw transects [m]
                                    # (for removing small islands) 

In [4]:
# read/ create aux data 
proc_tiles = gpd.read_file(proc_tiles_path).to_crs(crs)
#country_bounds = gpd.read_file(country_bounds_path).to_crs(crs)
osm_sl = gpd.read_file(osm_sl_path).to_crs(crs)

try:
    buffer = gpd.read_file(buffer_path)
    print("Everything successfully read.")
except:
    print("Create osm shoreline buffer:")
    buffer = osm_sl.buffer(buffer_dist)
    buffer.to_file(buffer_path,driver="GeoJSON")
    print("Buffer saved.")

Everything successfully read.


### 2.| Download MNDWI rasters from Google Drive

In [18]:
# Loop through processing tiles and save in separate folers 
for i in proc_tiles.id:
    tile_name = "P"+str(i).zfill(2) # name of processing tile
    folder_path = os.path.join(data_dir,tile_name) # path to save the rasters
    if not os.path.exists(folder_path):
        os.mkdir(folder_path)
    # download images of current tile form Drive 
    #postprocess.download_from_drive(export_folder,folder_path,tile_name) 
    # move downloaded images to out_path folder 
    for raster in os.listdir(os.getcwd()):
        if raster.endswith(tile_name+"*.tif"):
            os.replace(os.path.join(os.getcwd(),raster),os.path.join(folder_path,raster))
    print('Files moved to: data/',tile_name)

Files moved to: data/ P00
Files moved to: data/ P01
Files moved to: data/ P02
Files moved to: data/ P03
Files moved to: data/ P04
Files moved to: data/ P05
Files moved to: data/ P06
Files moved to: data/ P07
Files moved to: data/ P08
Files moved to: data/ P09
Files moved to: data/ P10


### 3.| Reproject and quality check MNDWI rasters

In [23]:
for i in proc_tiles.id[1:]:
    tile_name = "P"+str(i).zfill(2)
    print(("--")*10,"Treating",tile_name,("--")*10)
    folder_path = os.path.join(data_dir,tile_name)
    if os.path.exists(folder_path):
        raster_paths = glob.glob(os.path.join(data_dir,tile_name,tile_name+"_*.tif"))
        raster_paths.sort()
        for r in raster_paths:
            postprocess.reproject_raster(r,r,crs)
            try:
                postprocess.mask_single_observation_pixel(r)
                print(os.path.basename(r),'masked.')
            except:
                print(os.path.basename(r),'could not be masked')
                pass
    else:
        print(folder_path,"does not exist.")    

q.tif saved.
P05_1996.tif masked.
P05_1997.tif reprojected.
P05_1997.tif masked.
P05_1997_11avg_aq.tif saved.
P05_1997.tif masked.
P05_1998.tif reprojected.
P05_1998.tif masked.
P05_1998_11avg_aq.tif saved.
P05_1998.tif masked.
P05_1999.tif reprojected.
P05_1999.tif masked.
P05_1999_15avg_aq.tif saved.
P05_1999.tif masked.
P05_2000.tif reprojected.
P05_2000.tif masked.
P05_2000_14avg_aq.tif saved.
P05_2000.tif masked.
P05_2001.tif reprojected.
P05_2001.tif masked.
P05_2001_13avg_aq.tif saved.
P05_2001.tif masked.
P05_2002.tif reprojected.
P05_2002.tif masked.
P05_2002_11avg_aq.tif saved.
P05_2002.tif masked.
P05_2003.tif reprojected.
P05_2003.tif masked.
P05_2003_07avg_aq.tif saved.
P05_2003.tif masked.
P05_2004.tif reprojected.
P05_2004.tif masked.
P05_2004_19avg_aq.tif saved.
P05_2004.tif masked.
P05_2005.tif reprojected.
P05_2005.tif masked.
P05_2005_13avg_aq.tif saved.
P05_2005.tif masked.
P05_2006.tif reprojected.
P05_2006.tif masked.
P05_2006_16avg_aq.tif saved.
P05_2006.tif mask

### 4.| Subpixel contours

In [40]:
%%time 
#runtime: 10h 33min (with binary images)
for i in proc_tiles.id[2:]:
    tile_name = "P"+str(i).zfill(2)
    print(("--")*10,"Treating",tile_name,("--")*10)
    folder_path = os.path.join(data_dir,tile_name)

    #  Clip osm shoreline buffer to processing tile 
    buffer_clip_path = os.path.join(data_dir,tile_name,tile_name+"_buffer")
    if not os.path.exists(buffer_clip_path):
        buffer_clip = gpd.clip(buffer,proc_tiles[proc_tiles.index == i])
        buffer_clip.to_file(buffer_clip_path,driver="GeoJSON")
        print("Buffer has been saved.")
    else: 
        print("Buffer exists and has been loaded.")
        buffer_clip = gpd.read_file(buffer_clip_path)
    
    # Create shorelines
    shorelines_path = os.path.join(folder_path,tile_name+"_shorelines") 
    if not os.path.exists(shorelines_path):
        shorelines = []
        print("Process shorelines...")    
        raster_paths = glob.glob(os.path.join(data_dir,tile_name,"*aq.tif"))
        raster_paths.sort()
        for r in raster_paths:
            # create path for single shorelines 
            sl_folder_path = os.path.join(folder_path,tile_name+"_single_shorelines")
            sl_path = os.path.join(sl_folder_path,os.path.splitext(os.path.basename(r))[0]+"_shoreline")
            # save single shoreline without modifications as backup
            if not os.path.exists(sl_path):
                with rio.open(r,"r") as raster:
                    mndwi = raster.read(1)
                    if np.count_nonzero(mndwi) > 0 and np.count_nonzero(~np.isnan(mndwi)) > 0:                    
                        thres = threshold_otsu(mndwi[~np.isnan(mndwi)])
                        print(thres)
                        shoreline = postprocess.subpixel_contours(r,thres)
                        if not shoreline.empty:
                            if not os.path.exists(sl_folder_path): os.mkdir(sl_folder_path)
                            shoreline.to_file(os.path.join(sl_path),driver="GeoJSON")
            else:
                shoreline = gpd.read_file(sl_path)
            # postprocess raw shorelines
            shoreline = gpd.clip(shoreline,buffer_clip)
            cleaned = postprocess.remove_small_lines(shoreline, min_size=min_length)
            if not cleaned.empty:
                year = os.path.basename(r)[4:8]
                avg_aq = os.path.basename(r)[9:11]
                cleaned['id']=year
                cleaned = cleaned.dissolve(by=cleaned.id,aggfunc="sum")
                cleaned['year']=year
                cleaned['avg_aq']=avg_aq
                cleaned['otsu_thres']=str(thres)
                cleaned['proc_tile']=tile_name                        
                shorelines.append(cleaned)
                print(year+": shoreline processed.")
        shorelines_gdf = pd.concat(shorelines,ignore_index=True)    
        shorelines_gdf.to_file(os.path.join(shorelines_path),driver="GeoJSON")
        print("All shorelines have been created and saved.")
    else:
        print("Shorelines already exist.")

-------------------- Treating P02 --------------------
Buffer exists and has been loaded.
Process shorelines...
0.2645921
1988: shoreline processed.
0.2952692
1989: shoreline processed.
0.3003744
1990: shoreline processed.
0.27366972
1991: shoreline processed.
0.3121435
1992: shoreline processed.
0.2712528
1993: shoreline processed.
0.31029844
1994: shoreline processed.
0.2761091
1995: shoreline processed.
0.29218987
1996: shoreline processed.
0.27496082
1997: shoreline processed.
0.25287065
1998: shoreline processed.
0.28084764
1999: shoreline processed.
0.26580787
2000: shoreline processed.
0.2555839
2001: shoreline processed.
0.2268727
2002: shoreline processed.
0.23895589
2003: shoreline processed.
0.26517364
2004: shoreline processed.
0.23725927
2005: shoreline processed.
0.24913274
2006: shoreline processed.
0.23216835
2007: shoreline processed.
0.24991596
2008: shoreline processed.
0.2578678
2009: shoreline processed.
0.23982912
2010: shoreline processed.
0.21854526
2011: shorel

In [None]:
# Merge all shorelines to one GeoPackage
all_shorelines_file = os.path.join("VN_all_shorelines")
if not os.path.exists(all_shorelines_file):
    print("Merge all shorelines")
    all_shorelines = []
    for i in proc_tiles.id:
        tile_name = "P"+str(i).zfill(2)
        folder_path = os.path.join(data_dir,tile_name)
        shorelines_path = os.path.join(folder_path,tile_name+"_shorelines")
        print(shorelines_path)
        shorelines = gpd.read_file(shorelines_path)
        all_shorelines.append(shorelines)
    all_shorelines_gdf = pd.concat(all_shorelines, ignore_index=True)
    all_shorelines_gdf.to_file(all_shorelines_file),driver="GPKG")
    print("All shorelines merged and saved.")
else:
    all_shorelines = gpd.read_file(all_shorelines_file)
    # add a dissolve or merge function year to have only one multiline per year with mean Otsus and sum of length :) 

### 5.| Create minimum water extent polygon

Generate minimum and maximum water extent raster for each processing tile

In [7]:
# Calculate raster with min and max water extent 
for i in proc_tiles.id:
    tile_name = "P"+str(i).zfill(2)
    print(("--")*10,"Treating",tile_name,("--")*10)
    folder_path = os.path.join(data_dir,tile_name)

    if os.path.exists(folder_path):
        raster_paths = glob.glob(os.path.join(folder_path,"*aq.tif"))
        # make MNDWI images binary first
        # (this step should later be included to the shoreline extraction script, where the Otsu is already being calculated) 
        for r in raster_paths:
            # create binary raster using the Otsu threshold for min water raster                
            binary_file = os.path.join(folder_path,os.path.splitext(os.path.basename(r))[0]+"_bin.tif")
            if not os.path.exists(binary_file):
                with rio.open(r,"r") as raster:
                    mndwi = raster.read(1)
                    if np.count_nonzero(mndwi) > 0 and np.count_nonzero(~np.isnan(mndwi)) > 0:                    
                        meta = raster.meta
                        thres = threshold_otsu(mndwi[~np.isnan(mndwi)])
                        binary = mndwi.copy()
                        binary[binary > thres] = 1
                        binary[binary < thres] = 0
                        meta.update({
                            "compress":"LZW",
                            })
                        with rio.open(binary_file,'w',**meta) as dst:
                            dst.write(binary,1)
                        print(binary_file, "saved.")
            else:
                print(binary_file, "exists.")
        binary_paths = glob.glob(os.path.join(folder_path,"*aq_bin.tif"))
        min_water_file = os.path.join(data_dir,tile_name,tile_name+"_min_water_extent")
        max_water_file = os.path.join(data_dir,tile_name,tile_name+"_max_water_extent")
        if not os.path.exists(min_water_file):
            analysis.calc_water_extent(binary_paths,min_water_file,max_water_file)
        else:
            print("Files exist.")

ents/Master_thesis/Code/VN_coastline_dynamics/data/P08/P08_2017_17avg_aq_bin.tif
Eating file: /home/ronja/Documents/Master_thesis/Code/VN_coastline_dynamics/data/P08/P08_2018_19avg_aq_bin.tif
Eating file: /home/ronja/Documents/Master_thesis/Code/VN_coastline_dynamics/data/P08/P08_1989_08avg_aq_bin.tif
Eating file: /home/ronja/Documents/Master_thesis/Code/VN_coastline_dynamics/data/P08/P08_1990_04avg_aq_bin.tif
Eating file: /home/ronja/Documents/Master_thesis/Code/VN_coastline_dynamics/data/P08/P08_2003_08avg_aq_bin.tif
Eating file: /home/ronja/Documents/Master_thesis/Code/VN_coastline_dynamics/data/P08/P08_2016_14avg_aq_bin.tif
Eating file: /home/ronja/Documents/Master_thesis/Code/VN_coastline_dynamics/data/P08/P08_2020_21avg_aq_bin.tif
Eating file: /home/ronja/Documents/Master_thesis/Code/VN_coastline_dynamics/data/P08/P08_1995_05avg_aq_bin.tif
Eating file: /home/ronja/Documents/Master_thesis/Code/VN_coastline_dynamics/data/P08/P08_1997_07avg_aq_bin.tif
Eating file: /home/ronja/Docume

Generalize, vectorize and merge minimum water extent rasters and create transects

In [15]:
%%time
#remove small pixel cluster in min and max water extent rasters and merge rasters of all tiles
all_min_water_polys_file = os.path.join(data_dir,"VN_min_water_extent")
if not os.path.exists(all_min_water_polys_file):
    min_water_polys = []
    for i in proc_tiles.id:
        tile_name = "P"+str(i).zfill(2)
        print(("--")*10,"Treating",tile_name,("--")*10)
        folder_path = os.path.join(data_dir,tile_name)
        # define path for generalized min water extent raster 
        min_water_simple_file = os.path.join(data_dir,tile_name,tile_name+"_min_water_extent_simple")
        if not os.path.exists(min_water_simple_file+"_poly"):
            try:
                # read min water extent file
                min_water_file = os.path.join(folder_path,tile_name+"_min_water_extent")
            except FileNotFoundError:
                print('File does not exist.')
            else:
                # remove small objects from raster
                analysis.remove_pixel_cluster(min_water_file,min_water_simple_file,50000,100000,0)
                print("Pixel cluster removed.")
                # vectorize raster 
                min_water_poly = analysis.vectorize_raster(min_water_simple_file,0)
                if not min_water_poly.empty:
                    min_water_polys.append(min_water_poly)
                    min_water_poly.to_file(min_water_simple_file+"_poly",driver="GeoJSON")
                    print("Minimum water extent polygon created.\n")
        else:
            print("Minimum water extent polygon already exists.\n")
            min_water_poly = gpd.read_file(os.path.join(folder_path,tile_name+"_min_water_extent_simple_poly"))
            min_water_polys.append(min_water_poly)
    # Concatenate all polygons
    min_water_polys_gdf = pd.concat(min_water_polys,ignore_index=True)
    # Dissolve overlapping polygons
    geoms = min_water_polys_gdf.geometry.unary_union
    min_water_polys_gdf = gpd.GeoDataFrame(geometry=[geoms],crs=crs)
    min_water_polys_gdf = min_water_polys_gdf.explode().reset_index(drop=True)
    min_water_polys_gdf.to_file(all_min_water_polys_file,driver="GeoJSON")
    print("Minimum water extent raster for Vietnam has been saved.")
else:
    min_water_polys_gdf = gpd.read_file(all_min_water_polys_file)
    print("Minimum water extent raster for Vietnam exists and has been loaded.")


Minimum water extent raster for Vietnam exists and has been loaded.
CPU times: user 1.41 s, sys: 16.3 ms, total: 1.43 s
Wall time: 1.42 s


### 6.| Create transects

In [4]:
try:
    transects = gpd.read_file(transects_path)
    print("Transects exist and have been loaded.")
except FileNotFoundError:
    # offical country bounds by Vietnamese government from 2020
    # visual inspection and comparison with shorelines showed best suitability for coastline change quantificaion 
    # will be perepared for transect generation here...
    country_bounds = gpd.read_file(country_bounds)
    country_bounds = country_bounds.to_crs(crs)
    country_bounds = country_bounds.explode()
    # only take the land polygon to exclude islands etc.
    country_bounds['area'] = country_bounds.geometry.area
    country_bounds = country_bounds[country_bounds.area == np.max(country_bounds.area)]
    #country_bounds_gov.geometry = country_bounds_gov.geometry.simplify(500,preserve_topology=True)
    country_bounds.to_file(country_bounds_path+"_simple",driver="GeoJSON")
    # draw transects at country polygon 
    transects = postprocess.draw_transects_polygon(
        country_bounds,
        transect_len/2,
        transect_len/2,
        transect_dist,
        transect_min_line_length,
        sigma=3,
        out_path_poly=country_bounds_path+"_smooth"
        )
    # clip transects to buffer 
    transects = gpd.clip(transects,buffer)
    #transects = transects.dropna() # if transects have been created along a multipolygon
    #transects = transects.explode().reset_index(drop=True)
    transects.to_file(transects_path,driver="GeoJSON")
    print("Transects for Vietnam have been created and saved.")
else:
    # clip transects to min water extent raster
    if not os.path.exists(transects_path+"_clip"):
        print("Clip transects to min water extent...")
        min_water_buffer = min_water_polys_gdf.buffer(100)
        transects_clip = gpd.clip(transects,min_water_buffer)
        # convert all mutlilinestrings to single linestrings to treat transect pieces separately
        transects_clip = transects_clip.explode().reset_index()
        transects_clip.to_file(transects_path+"_clip",driver="GPKG")
        print("Transects haven been clipped to min water extent polygon and saved.")
    else:
        print("Clipped transects already exist.")

Transects exist and have been loaded.
Clipped transects already exist.


### 7.| Calculate intersections between shorelines and transects

In [5]:
# Calculate intersections
# Load transects 
try:
    transects_clip = gpd.read_file(transects_path+"_clip")
    print("Transects have been loaded.")
except FileNotFoundError:
    print("Transects file for Vietnam does not exist.")

# Intersections
for i, tile in proc_tiles.iterrows():
    tile_name = "P"+str(i).zfill(2)
    print(("--")*10,"Treating",tile_name,("--")*10)
    folder_path = os.path.join(data_dir,tile_name)
    intersections_file = os.path.join(data_dir,tile_name,tile_name+"_intersections")
    if not os.path.exists(intersections_file):
        try:
            shorelines = gpd.read_file(os.path.join(data_dir,tile_name,tile_name+"_shorelines"))
        except FileNotFoundError:
            print('Shorelines do not exist.')
        else:        
            print("Calcualte intersections...")
            #tile_poly = tile.geometry
            #transects = gpd.clip(transects,tile_poly)
            intersections = postprocess.compute_intersections(transects_clip,shorelines,remove_outliers=False)
            if not intersections.empty:
                intersections.to_file(intersections_file,driver="GPKG")
                print("Intersections have been created and saved.")
            else:
                print("No intersections available for",tile_name)
    else:
        print("Intersections already exist.")
print("Done!")

DriverError: /Users/Ronjamac/Documents/02_Studium/Masterarbeit/Code/VN_coastline_dynamics/data/VN_transects_gov_2km_200m_clip: No such file or directory

In [10]:
# merge all intersections to one dataframe 
all_intersections_file = os.path.join(data_dir,"VN_all_intersections")
if not os.path.exists(all_intersections_file):
    print("Merge all intersections")
    all_intersections = []
    for i in proc_tiles.id:
        tile_name = "P"+str(i).zfill(2)
        folder_path = os.path.join(data_dir,tile_name)
        intersections_path = os.path.join(folder_path,tile_name+"_intersections")
        if os.path.exists(intersections_path):
            print(intersections_path)
            intersections = gpd.read_file(intersections_path)
            all_intersections.append(intersections)
    all_intersections_gdf = pd.concat(all_intersections, ignore_index=True)
    all_intersections_gdf.to_file(all_intersections_file,driver="GPKG")
    print("All intersections merged and saved.")
else:
    all_intersections = gpd.read_file(all_intersections_file)
    print("All intersections exist and have been loaded.")

All intersections exist and have been loaded.


### 8.| Coastline Change Analysis

#### Hotspot analysis

In [6]:
# Create classification transects (Erosion, Accretion, Stable, etc.)
import warnings
warnings.filterwarnings('ignore')
classifications_path = os.path.join(data_dir,"VN_all_classifications")
if not os.path.exists(classifications_path):
    intersections = gpd.read_file(os.path.join(data_dir,"VN_all_intersections"))
    classifications = postprocess.calc_change_metrics(intersections,5)
    classification = gpd.read_file(classifications_path)
    classifications.to_file(classifications_path,driver="GPKG")
    print("Classifications saved.")
else:
    print("Classifications exist.")

Classifications exist.


In [5]:
# Calculate accretion and erosion hotspots 
erosion_hotspots_path = os.path.join(data_dir,"VN_erosion_hotspots")
accretion_hotspots_path = os.path.join(data_dir,"VN_accretion_hotspots")

if not os.path.exists(erosion_hotspots_path+"a"):
    # load an prepare classification file 
    classification = gpd.read_file(os.path.join(data_dir,"VN_all_classifications"))
    classification = classification.sort_values(by="Transect_id").reset_index(drop=True)
    #classification = classification.replace("nan",np.NaN)
    classification = classification[classification["class_L1"].notna()].reset_index(drop=True)

    # set up while loop to find clustered erosion and accretion classification transects
    yet_seen = []
    erosion_hotspots = []
    accretion_hotspots = []
    # iterate through all classification transects
    for t, transect in classification.iterrows():
            # check if transect has already been evaluated 
            if not t in yet_seen:
                cluster = []
                # create cluster if the next transect or the transect after has the same class 
                while (transect.class_L1 == classification.iloc[t].class_L1) or (transect.class_L1 == classification.iloc[t+1].class_L1) :
                    yet_seen.append(t)
                    cluster.append(classification.iloc[t])
                    t +=1
                    # exist while loop if last transect id +2 has been reached
                    if t+2 > len(classification):
                        break
                # if less than 20 transect share the same class, omit from hotspot analysis
                if len(cluster)>20:
                    # save only erosion and accretion clusters and convert to GeoDataFrame
                    if cluster[0].class_L1 == "Accretion":
                        gdf = gpd.GeoDataFrame(cluster)
                        accretion_hotspots.append(gdf)
                    elif cluster[0].class_L1 == "Erosion":
                        gdf = gpd.GeoDataFrame(cluster)
                        erosion_hotspots.append(gdf)
    print("All hotspots identified.")
    # add cluster number to hotspot transects
    for i, transect in enumerate(erosion_hotspots):
        transect['cluster_no'] = i
    for i, transect in enumerate(accretion_hotspots):
        transect['cluster_no'] = i 
    # merge all hotspot dataframes
    erosion_hotspots_gdf = pd.concat(erosion_hotspots)
    accretion_hotspots_gdf = pd.concat(accretion_hotspots)
    # save dataframes
    erosion_hotspots_gdf.to_file(erosion_hotspots_path,driver="GPKG")
    accretion_hotspots_gdf.to_file(accretion_hotspots_path,driver="GPKG")
    print("Hotspot files saved.")
else:
    print("Hotspot files exist.")

All hotspots identified.
Hotspot files saved.


In [9]:
# Filter extreme erosion and accretion hotspots
extreme_erosion_hotspots_path = os.path.join(data_dir,"VN_extreme_erosion_hotspots")
extreme_accretion_hotspots_path = os.path.join(data_dir,"VN_extreme_accretion_hotspots")
if not os.path.exists(extreme_erosion_hotspots_path):
    erosion_hotspots = gpd.read_file(os.path.join(data_dir,"VN_erosion_hotspots"))
    accretion_hotspots = gpd.read_file(os.path.join(data_dir,"VN_accretion_hotspots"))
    print("Mean erosion of extreme hotspots:")
    extreme_erosion_hotspots = postprocess.define_severe_hotspots(erosion_hotspots,-5,"smaller")
    print("Mean accretion of extreme hotspots:")
    extreme_accretion_hotspots = postprocess.define_severe_hotspots(accretion_hotspots,5,"bigger")
    # save files
    extreme_erosion_hotspots.to_file(extreme_erosion_hotspots_path,driver="GPKG")
    extreme_accretion_hotspots.to_file(extreme_accretion_hotspots_path,driver="GPKG")
else:
    print("Extreme hotspot files exist.")

Extreme hotspot files exist.


#### Land area change

In [10]:
# calcualte land area change in the coastal zone (5km buffer) as proportional change 
provinces_clip_path = os.path.join(data_dir,"VN_coastal_provinces_clip")
provinces_path = os.path.join(data_dir,"VN_coastal_provinces")
buffer_path = os.path.join(data_dir,"VN_buffer_5km")

# buffer coastal provinces by 5km and clip to buffer 
if not os.path.exists(provinces_clip_path):
    buffer = gpd.read_file(buffer_path)
    provinces = gpd.read_file(provinces_path)
    provinces = provinces.to_crs(crs)
    provinces['geometry'] = provinces.geometry.buffer(5000)
    provinces = gpd.clip(provinces,buffer)
    provinces.to_file(provinces_clip_path,driver="GeoJSON")
else:
    print("Coastal provinces are already clipped to buffer and read.")
    provinces = gpd.read_file(provinces_clip_path)

# calcualte proportional land area change for each province 
for i in provinces.index:
    name = provinces.ADM1_PCODE.iloc[i]
    print("Land area change in province: ",name)
    land_area_path = os.path.join(plot_dir,"land_area_change_"+name)
    if not os.path.exists(land_area_path):
        land_area = pd.DataFrame(columns=['year','land_area_percentage'])
        land_area['year'] = range(1987,2022)
        for year in range(1987,2022):
            print(year)
            binary_files = glob.glob(os.path.join("{path}/**/*{year}*bin.tif".format(path=data_dir,year=year)))
            binary_files.sort()
            land_pixels, valid_pixels = [],[]
            for file in binary_files:
                # crop raster to area
                area = provinces[provinces.index==i]
                with rio.open(file) as src:
                    try:
                        out_image, out_transform = rio.mask.mask(src,area.geometry,crop=True,nodata=np.nan)
                        out_meta = src.meta
                        out_meta.update({"driver": "GTiff",
                            "height": out_image.shape[1],
                            "width": out_image.shape[2],
                            "transform": out_transform,
                            "compress":"LZW"})
                        out_path = os.path.join(os.getcwd(),"test_data",str(year)+"_"+name+"_"+os.path.basename(file)[:3])
                        with rio.open(out_path,"w",**out_meta) as dst:
                            dst.write(out_image)
                        show(out_image)
                        im_rev = out_image.copy()
                        im_rev[im_rev==0]=2
                        im_rev[im_rev==1]=0
                        im_rev[im_rev==2]=1
                        valid_pixel = np.count_nonzero(~np.isnan(out_image))
                        land_pixel = np.nansum(im_rev)#*30*30
                        land_pixels.append(land_pixel)
                        valid_pixels.append(valid_pixel)
                    except ValueError:
                        pass
            land_pixel_percentage = np.sum(land_pixels)/np.sum(valid_pixels)*100
            print("\nLand pixel percentage in",str(year)+":", land_pixel_percentage,"\n")
            land_area.loc[land_area.year == year, "land_area_percentage"] = land_pixel_percentage
            land_area.to_csv(land_area_name)
    else:
        print("Land area file already exists.")

Coastal provinces are already clipped to buffer and read.
Land area change in province:  VN717
Land area file already exists.
Land area change in province:  VN821
Land area file already exists.
Land area change in province:  VN811
Land area file already exists.
Land area change in province:  VN507
Land area file already exists.
Land area change in province:  VN715
Land area file already exists.
Land area change in province:  VN823
Land area file already exists.
Land area change in province:  VN501
Land area file already exists.
Land area change in province:  VN713
Land area file already exists.
Land area change in province:  VN405
Land area file already exists.
Land area change in province:  VN103
Land area file already exists.
Land area change in province:  VN701
Land area file already exists.
Land area change in province:  VN511
Land area file already exists.
Land area change in province:  VN813
Land area file already exists.
Land area change in province:  VN113
Land area file alread