### Define packages

In [23]:
import os
import dask
import xarray as xr
import pyproj
import rioxarray
import pandas as pd
import geopandas as gpd
import numpy as np
import rasterio
from rasterio.features import shapes
from shapely.geometry import shape


### Define functions

In [24]:
def assign_projection(ds, epsg=None):

    if not epsg == None:
        proj = pyproj.CRS.from_epsg(int(epsg))
    else:
        proj = pyproj.CRS.from_epsg(int(ds.crs.values.tolist()))

    print(proj,"projection was assigned to the dataset attributes")
    ds.attrs['crs'] = proj
    return ds

def print_ds_properties(rds,epsg=None):
    # Print the grid size
    print("Grid size:", rds.rio.resolution())

    #Print null data
    print("no data:", rds.rio.nodata)

    # Print the projection information
    if rds.rio.crs == None:
        print("There is no projection")
        proj = pyproj.CRS.from_epsg(epsg)
        rds.attrs['crs'] = proj
        rds.rio.set_crs(proj, inplace=True)
    else:
        print("There is projection available")
    
    print("Projection EPSG code is:", rds.rio.crs, "\n")
    return

def change_resolution(ds, new_resolution):
    # Reproject the rioxarray object to the new resolution
    reprojected_ds = ds.rio.reproject(ds.rio.crs, resolution=new_resolution, resampling="bilinear")
    return reprojected_ds

def match_resolution(rds, rds_source):
    # Reproject the rioxarray object to the new resolution
    reprojected_ds = rds.rio.reproject(rds_source.rio.crs, resolution=rds_source.rio.resolution(), resampling= rioxarray.enums.Resampling.bilinear)
    return reprojected_ds

def redefine_null_for_nan(ds, new_null_value):
    # Replace NaN values with the new null value
    ds.values[np.isnan(ds.values)] = new_null_value
    # ds.rio.update({'nodata': new_null_value})
    return ds

def create_gdf_from_geojson_files(input_aoi_data):
    geojson_files = []

    for filename in os.listdir(input_aoi_data):
        if filename.endswith(".geojson"):
            gdf = gpd.read_file(os.path.join(input_aoi_data,filename))
            aoi_id = filename.split('_')
            aoi_id = aoi_id[-1].split('.')[0]
            gdf.insert(1, "aoi", aoi_id)
            geojson_files.append((gdf))

    aoi_gdf = gpd.GeoDataFrame(pd.concat(geojson_files, ignore_index=True)).drop(columns=["id"])
    return aoi_gdf

def clip_raster(rds, geometry):
    rds_clipped = rds.rio.clip(geometry)
    return rds_clipped

def clip_raster_with_gdf(rds, gdf):
    rds_clipped_list = []
    for index, row in gdf.iterrows():
        aoi = row["aoi"]
        try:
            geometry = gdf.iloc[index:index+1].geometry
            rds_clipped = rds.rio.clip(geometry)
            rds_clipped_list.append(rds_clipped)
            print(f"Successful processing row {index} {aoi}")
        except Exception as e:
            print(f"Error processing row {index} {aoi}: {e}")
            rds_clipped_list.append("NaN")
            continue
    print("\n")
    return rds_clipped_list 

def apply_lat_mask(raster1, raster2):
    # Read the raster data
    data1 = raster1.values
    data2 = raster2.values
    
    # Create a new array for the result raster
    result_data = np.full_like(data1, fill_value=np.nan, dtype='float32')
    
    # Compare pixel values and assign new values
    result_data[(data1 >= data2)] = 7
    result_data[(data1 < data2)] = 9
    
    # Create a new rioxarray dataset for the result raster
    result_raster = raster1.copy(data=result_data)
    
    return result_raster

def apply_gebco_mask(raster1, raster2):
    # Read the raster data
    data1 = raster1.values
    data2 = raster2.values
    
    # Create a new array for the result raster
    result_data = np.full_like(data1, fill_value=np.nan, dtype='float32')
    
    # Compare pixel values and assign new values
    result_data[(data2 > 10)] = 11
    
    # Create a new rioxarray dataset for the result raster
    result_raster = raster1.copy(data=result_data)
    
    return result_raster

def apply_lat_hat_mask(depth, lat, hat):
    # Read the raster data
    depth_array = depth.values
    lat_array = lat.values
    hat_array = hat.values
    
    # Create a new array for the result raster
    result_data = np.full_like(depth_array, fill_value=np.nan, dtype='float32')
    
    # Compare pixel values and assign new values

    # result_data[( hat_array >= depth_array) ] = 7 
    # result_data[(hat_array >= depth_array) & (depth_array >= lat_array)] = 7
    # result_data[(hat_array < depth_array)] = 11
    # result_data[(depth_array < lat_array)] = 9
    # result_data[(depth_array >= lat_array) & (hat_array >= depth_array)] = 7
    result_data[(hat_array >= depth_array)] = 7
    result_data[(hat_array < depth_array)] = 9
    
    # Create a new rioxarray dataset for the result raster
    result_raster = depth.copy(data=result_data)
    
    return result_raster

def apply_gebco_mask(gebco_rds,lat_mask):

    binary_mask = gebco_rds > 10
    lat_mask_gebco_plus_10 = lat_mask.where(~binary_mask, other=11)

    return lat_mask_gebco_plus_10 

def apply_lat_mask(depth, lat):

    binary_mask = lat > depth
    depth_lat = depth.where(~binary_mask, other=np.nan)
    return depth_lat 

def apply_hat_mask(depth_lat, hat_rds):

    binary_mask =  depth_lat > hat_rds 
    depth_lat_hat = depth_lat.where(~binary_mask, other=np.nan)
    return depth_lat_hat


### Define packages paths

In [25]:
repository_path = os.path.dirname(os.getcwd())
input_data_path = os.path.join(repository_path,"Data")
input_aoi_data = r"p:\11209821-cmems-global-sdb\00_miscellaneous\AOIs"
output_data_path = os.path.join(repository_path,"Output")

if not os.path.exists(output_data_path):
    print("Output data path does not exist. Creating directory...")
    os.makedirs(output_data_path)
    print("Output data path created:", output_data_path)
else:
    print("Input data path already exists:", output_data_path)

Input data path already exists: d:\Proyectos2024\Copernicus\Repository\Copernicus\Repository\eo-bathymetry\notebooks\Output


### Retrieve input data

In [26]:
# AOIs for al the sites
aoi_gdf = create_gdf_from_geojson_files(input_aoi_data)
aoi_gdf_bounding_box  = aoi_gdf.copy()
aoi_gdf_bounding_box['bounding_box'] = aoi_gdf_bounding_box.geometry.apply(lambda x: x.envelope)
aoi_gdf_bounding_box = aoi_gdf_bounding_box[['aoi', 'bounding_box']].rename(columns={'bounding_box': 'geometry'})

# AOI for Sao Paulo
aoi_saopaulo_bounding_box = aoi_gdf_bounding_box[aoi_gdf_bounding_box['aoi'] == "SaoPaulo"]
aoi_saopaulo_bounding_box.reset_index(drop=True, inplace=True)

# AOI for WaddenSea
aoi_wadden_bounding_box = aoi_gdf_bounding_box[aoi_gdf_bounding_box['aoi'] == "WaddenSea"]
aoi_wadden_bounding_box.reset_index(drop=True, inplace=True)

# AOI for Okha
aoi_woody_bounding_box = aoi_gdf_bounding_box[aoi_gdf_bounding_box['aoi'] == "WoodyCape"]
aoi_woody_bounding_box.reset_index(drop=True, inplace=True)

In [27]:
# aoi_saopaulo_bounding_box.to_file(os.path.join(output_data_path,'aoi_saopaulo_bounding_box.geojson'), driver='GeoJSON')
# aoi_wadden_bounding_box.to_file(os.path.join(output_data_path,'aoi_wadden_bounding_box.geojson'), driver='GeoJSON')
# aoi_woody_bounding_box.to_file(os.path.join(output_data_path,'aoi_woody_bounding_box.geojson'), driver='GeoJSON')

In [37]:
output_data_path = os.path.join(repository_path,"AOI_results")
coordinates_list = ['n90.0_s0.0_w0.0_e90.0']
# coordinates_list = ['n0.0_s-90.0_w-90.0_e0.0']
# coordinates_list = ['n0.0_s-90.0_w0.0_e90.0'] 

coordinates = coordinates_list[0]
results = rioxarray.open_rasterio(os.path.join(output_data_path,f"gebco_2023_{coordinates}_result.tif"))
print_ds_properties(results, 4326)

Grid size: (0.004166666666666666, -0.004166666666666667)
no data: None
There is no projection
Projection EPSG code is: EPSG:4326 



### Clip results

In [32]:
# results_clipped_wadden = clip_raster_with_gdf(results, aoi_wadden_bounding_box)
# results_clipped_wadden[0].rio.to_raster(os.path.join(output_data_path,f'gebco_2023_{coordinates}_results_clipped_wadden.tif'), driver='GTiff', compress='lzw')

# results_clipped_saopaulo = clip_raster_with_gdf(results, aoi_saopaulo_bounding_box)
# results_clipped_saopaulo[0].rio.to_raster(os.path.join(output_data_path,f'gebco_2023_{coordinates}_results_clipped_saopaulo.tif'), driver='GTiff', compress='lzw')

# results_clipped_woody = clip_raster_with_gdf(results, aoi_woody_bounding_box)
# results_clipped_woody[0].rio.to_raster(os.path.join(output_data_path,f'gebco_2023_{coordinates}_results_clipped_woody.tif'), driver='GTiff', compress='lzw')


Successful processing row 0 WoodyCape




In [38]:
# Open raster file using rasterio
with rasterio.open(os.path.join(output_data_path,f"gebco_2023_{coordinates}_results_clipped_wadden.tif")) as src:
    # Read raster data into numpy array
    raster_array = src.read(1)  # Assuming it's a single band raster, adjust if necessary
    # Extract transformation metadata
    transform = src.transform
    # Polygonize raster data
    polygons = list(shapes(raster_array, mask=None, transform=transform))
    # Convert polygons to Shapely geometries and record pixel values
    geometries_with_values = [(shape(polygon), value) for polygon, value in polygons]

# Extract geometries and values into separate lists
geometries = [geometry for geometry, value in geometries_with_values]
values = [value for geometry, value in geometries_with_values]

# Convert Shapely geometries and pixel values to GeoDataFrame
geo_df = gpd.GeoDataFrame(geometry=geometries, data={'pixel_value': values})

geo_df = geo_df[~np.isnan(geo_df['pixel_value'])]
geo_df.reset_index(drop=True, inplace=True)

# Define the EPSG code for the desired projection
epsg_code = 4326  # For example, EPSG code for WGS 84

# Assign the projection to the GeoDataFrame
geo_df.crs = f"EPSG:{epsg_code}"

# Save GeoDataFrame to file
# geo_df.to_file(os.path.join(output_data_path,f"gebco_2023_{coordinates}_depth_lat_hat.shp"))
geo_df.to_file(os.path.join(output_data_path,f"gebco_2023_{coordinates}_results_clipped_wadden.geojson"), driver="GeoJSON", crs=f"EPSG:{epsg_code}")
