# Prepare



This script process geotiff files in the following steps.
1. Reproject and resample these files to a consistent coordinate system and cell size
2. Create xarray data arrays from the resampled data for organized data handling
3. Check inconstistencies for any differences in data array shapes, providing awareness of potential inconstistencies.


In [1]:
import os
import rasterio
import xarray as xr
from rasterio.warp import calculate_default_transform, reproject, Resampling
import numpy as np

### Define parameters

In [2]:
# Define the target coordinate system (WGS84)
src_crs = {"init": "EPSG:32647"}
dst_crs = {"init": "EPSG:4326"}
cell_size = 0.001

#Specify the folder containing the geotiff files
folder_path = r"C:\Users\micha\Stichting Deltares\Tiaravanni Hermawan - Indonesia case for students\semarang\hydromt_data_1\to_clean"

### Read all the tif from the directory

In [3]:
file_list = []
# Walk through the directory and its subdirectories
for root, dirs, files in os.walk(folder_path):
    for file in files:
        if file.endswith('.tif') or file.endswith('.tiff'):
            file_list.append(os.path.join(root, file))
file_list

['C:\\Users\\micha\\Stichting Deltares\\Tiaravanni Hermawan - Indonesia case for students\\semarang\\hydromt_data_1\\to_clean\\cleaned\\basin_cleaned.tiff',
 'C:\\Users\\micha\\Stichting Deltares\\Tiaravanni Hermawan - Indonesia case for students\\semarang\\hydromt_data_1\\to_clean\\cleaned\\cn_avg_cleaned.tiff',
 'C:\\Users\\micha\\Stichting Deltares\\Tiaravanni Hermawan - Indonesia case for students\\semarang\\hydromt_data_1\\to_clean\\cleaned\\cn_dry_cleaned.tiff',
 'C:\\Users\\micha\\Stichting Deltares\\Tiaravanni Hermawan - Indonesia case for students\\semarang\\hydromt_data_1\\to_clean\\cleaned\\cn_wet_cleaned.tiff',
 'C:\\Users\\micha\\Stichting Deltares\\Tiaravanni Hermawan - Indonesia case for students\\semarang\\hydromt_data_1\\to_clean\\cleaned\\distance_to_river_cleaned.tiff',
 'C:\\Users\\micha\\Stichting Deltares\\Tiaravanni Hermawan - Indonesia case for students\\semarang\\hydromt_data_1\\to_clean\\cleaned\\elevation_subsidence_cleaned.tiff',
 'C:\\Users\\micha\\Stichtin

### For each tif in the list do the following steps: 
    1. Set nan values 
    2. Resample to given cell size
    3. Populate xarray data array from the reprojected data and store it in the dictionary

In [5]:
def get_data_from_original_raster(file):
    with rasterio.open(file_path) as src:
        src_data = src.read(1)
        src_data = np.where(
            src_data == -9999, np.nan, src_data
        )  # Replace -9999 with np.nan
        # no flood depth as 0
        if (
            file_name == "flooddepth_sfincs.tif"
            or file_name == "flood_depth_compound.tiff"
        ):
            src_data = np.where(np.isnan(src_data), 0, src_data)
        # no population as 0
        if file_name == "population.tif":
            src_data = np.where(src_data < 0, 0, src_data)
        return src.profile, src_data, src.bounds

In [7]:
def resample_to_cell_size(src_profile, dst_crs, bbox):
    #1. Calculate the default transform and dimensions of the output raster using bounding values
    dst_transform, dst_width, dst_height = calculate_default_transform(
        src_profile["crs"],
        dst_crs,
        src_profile["width"],
        src_profile["height"],
        left=bbox[0],
        bottom=bbox[1],
        right=bbox[2],
        top=bbox[3],
    )
    #2. Resample the data to a cell size
    target_transform = rasterio.transform.from_origin(
        dst_transform[2], dst_transform[5], cell_size, cell_size
    )
    target_width = int(dst_width * dst_transform[0] / cell_size)
    target_height = int(dst_height * abs(dst_transform[4]) / cell_size)

    dst_data = np.zeros((target_height, target_width), dtype=np.float32)
    reproject(
        source=src_data,
        destination=dst_data,
        src_transform=src_profile["transform"],
        src_crs=src_profile["crs"],
        dst_transform=target_transform,
        dst_crs=dst_crs,
        resampling=Resampling.bilinear,
    )
    
    #3. Update the metadata of the output raster
    dst_profile = src_profile.copy()
    dst_profile.update(
        {
            "crs": dst_crs,
            "transform": target_transform,  
            "width": target_width,  
            "height": target_height, 
        }
    )
    return dst_profile, dst_data

In [8]:
data_array = {}
for file_name in file_list:
    #1. Construct the input file path
    file_path = file_name

    #2. Remove file extension from the name
    data_name = os.path.splitext(file_name)[0].split("\\")[-1]
    
    src_profile, src_data = set_nan_values_raster(file_path)
    #print(src_profile, src_data)
    dst_profile, dst_data = resample_to_cell_size(src_profile, dst_crs, bbox)
    #data_array[data_name] = xr.DataArray(dst_data, dims=("y", "x"), name=data_name)

TypeError: cannot unpack non-iterable NoneType object

### Resize arrays to have the minimum size:
    1. Find the minimum size among all arrays
    2. Update the profile

In [None]:
min_rows = min([arr.shape[0] for arr in data_array.values()])
min_cols = min([arr.shape[1] for arr in data_array.values()])

In [2]:
dst_profile = src_profile.copy()
dst_profile.update(
    {
        "crs": dst_crs,
        "transform": target_transform,  # dst_transform,
        "width": min_cols,  # dst_width,
        "height": min_rows,  # dst_height
    }
)

NameError: name 'src_profile' is not defined

In [None]:
resized_arrays_tiff = {}

for key, arr in data_array.items():
    resized_arr = arr[:min_rows, :min_cols]
    resized_arrays_tiff[key] = xr.DataArray(resized_arr, dims=("y", "x"), name=key)

    # write resized rasters
    with rasterio.open(
        folder_path + "cleaned/" + key + "_cleaned.tiff", "w", **dst_profile
    ) as dst:
        dst.write(resized_arrays_tiff[key], indexes=1)


### Create netcdf per event
    1. Set event numbers
    2. Get coordinates from one of the tiff files
    3. Define the variables to exclude
    4. Loop through each event number and create a NetCDF file for it

In [None]:
event_numbers = [i for i in range(1, 7)]

In [None]:
coords_tiff = xr.open_dataset(
    folder_path + "cleaned/floodmaps_1_cleaned.tiff", engine="rasterio"
)

In [None]:
variable_to_exclude = "floodmaps" 


target_vars = [f"{variable_to_exclude}{event_number}" for event_number in event_numbers]
dynamic_vars = target_vars + [
    f"{variable_to_exclude}_gis_{event_number}" for event_number in event_numbers
]
static_vars = [
    var_name for var_name in resized_arrays_tiff.keys() if var_name not in dynamic_vars
]

In [None]:
for event_number in event_numbers:
    resized_arrays_nc = {}
    #1. Select the relevant DataArrays for the current event
    selected_vars = [
        [f"{variable_to_exclude}{event_number}", f"{variable_to_exclude}_gis_{event_number}"] + static_vars
    ][0]

    #2. Create a new DataArray containing only the selected variables
    dims = ("events", "y", "x")
    coords = [[event_number], coords_tiff.y.values, coords_tiff.x.values]

    for key, arr in data_array.items():
        resized_arr = arr[:min_rows, :min_cols].expand_dims({"events": 1})
        resized_arrays_nc[key] = xr.DataArray(
            resized_arr, dims=dims, coords=coords, name=key
        )

    resized_arrays_ds = xr.Dataset(resized_arrays_nc)
    resized_arrays_ds = resized_arrays_ds.rename(
        {
            f"floodmaps_{event_number}": "floodmaps",
            f"floodmaps_gis_{event_number}": "floodmaps_gis",
        }
    )

    #3. Define the filename for the NetCDF file
    filename = f"event_{event_number}.nc"

    #4. Save the DataArray to a NetCDF file
    resized_arrays_ds.to_netcdf(folder_path + filename)