# Create land-sea mask from copernicus luc data

Land-sea mask from copernicus luc data (orig. resolution: 100m)\
Extent limited to -60 to 80 latitude (further north and south almost only ice)\
Target grid from worldclim data (resolution: 1km)

---

In [None]:
# Libraries
import time
import xarray as xr
import rioxarray
import matplotlib.pyplot as plt

In [None]:
# Directories
dir_data =  '../data/'
dir01 = '../paper_deficit/output/01_prep/'

---

### Pre-processing

In [None]:
# Libraries
from dask_jobqueue import SLURMCluster
from dask.distributed import Client
import dask

# Initialize dask
cluster = SLURMCluster(
    queue='compute',                      # SLURM queue to use
    cores=24,                             # Number of CPU cores per job
    memory='256 GB',                      # Memory per job
    account='bm0891',                     # Account allocation
    interface="ib0",                      # Network interface for communication
    walltime='00:30:00',                  # Maximum runtime per job
    local_directory='../dask/',           # Directory for local storage
    job_extra_directives=[                # Additional SLURM directives for logging
        '-o ../dask/LOG_worker_%j.o',     # Output log
        '-e ../LOG_worker_%j.e'           # Error log
    ]
)

# Scale dask cluster
cluster.scale(jobs=2)

# Configurate dashboard url
dask.config.config.get('distributed').get('dashboard').update(
    {'link': '{JUPYTERHUB_SERVICE_PREFIX}/proxy/{port}/status'}
)

# Create client
client = Client(cluster)

client

In [None]:
def prep_copernicus_land_mask():
    """Prepare land-sea mask from Copernicus LUC data for regridding"""
    
    # Read water array
    da_water = rioxarray.open_rasterio(
        dir_data + '/copernicus_luc/2019/PROBAV_LC100_global_v3.0.1_2019-nrt_' + 
        'PermanentWater-CoverFraction-layer_EPSG-4326.tif',
        chunks=dict(y=5000, x=5000), lock=False)
    
    # Read snow array
    da_snow = rioxarray.open_rasterio(
        dir_data + '/copernicus_luc/2019/PROBAV_LC100_global_v3.0.1_2019-nrt_' + 
        'Snow-CoverFraction-layer_EPSG-4326.tif',
        chunks=dict(y=5000, x=5000), lock=False)

    # Create mask from water and snow array
    da_mask = da_water + da_snow

    # Set land grid cells to one and export as tif
    # Set to integer (assuming mask == 0 is land, else water/snow)
    xr.where(da_mask == 0, 100, 0) \
        .astype('int8') \
        .rio.to_raster(dir01 + 'copernicus_land_mask.tif')
    
# Call the function
prep_copernicus_land_mask()

In [None]:
def prep_target_grid():
    """Prepare target grid for regridding using WorldClim elevation data."""
    
    # Path to the WorldClim elevation file
    f_wclim_elev = dir_data + 'worldclim/wc2.1_30s_elev/wc2.1_30s_elev.tif'
    
    # Read elevation file with chunking for efficient processing
    da_elev = rioxarray.open_rasterio(f_wclim_elev, chunks=dict(y=5000, x=5000))
    
    # Limit extent to latitude between 80 and -60, exclude fill values, and export as a tif
    # Use rioxarray's nodata attribute for fill value
    # Rename the DataArray to 'target_grid'
    # Limit the latitude extent to between 80 and -60
    # Drop spatial_ref coordinate to simplify the output
    # Save the result as a GeoTIFF file
    da_elev.where(da_elev != da_elev.rio.nodata) \
           .rename('target_grid') \
           .sel(y=slice(80, -60)) \
           .drop_vars('spatial_ref') \
           .rio.to_raster(dir01 + 'target_grid.tif') 
    
# Call the function
prep_target_grid()

In [None]:
# close dask cluster
cluster.close()

In [None]:
# wait for 60s for dask client to completely disconnect
time.sleep(60)

---

### Regridding

In [None]:
# Import regridding function
from regrid_high_res_v1_01 import regrid_high_res, prep_tif

In [None]:
def regrid_da(f_source, dir_target, dir_source, dir_out, 
              size_tiles, fill_value=None, olap=1):  
    """Regrid large xarray dataarrays.

    Args:
        f_source (str): The filename (without extension) of the source .tif file to be regridded.
        dir_target (str): Directory containing target grid .tif file.
        dir_source (str): Directory containing the the source  .tif file.
        dir_out (str): Directory to store the output and intermediate files.
        size_tiles (int): Size of the regridding tiles in degrees.
        fill_value (float, optional): Fill value to use in the regridding process. Defaults to None.
        olap (int, optional): Overlap size in degrees for regridding tiles. Defaults to 1.
        
    Returns:
        xarray.Dataset: The combined dataset after regridding.
    """
    # Prepare the target and source data arrays from TIFF files
    da_target = prep_tif(dir_target + 'target_grid.tif', 'target_grid')
    da_source = prep_tif(dir_source + f_source + '.tif', f_source)
    # Regridd source array to target grid
    regrid_high_res(da_target, da_source, dir_out,
                    account='bm0891', partition='compute',
                    size_tiles=size_tiles, olap=olap, fill_value = fill_value,
                    type_export='zarr', del_interm=False)

In [None]:
# Regrid
%time regrid_da('copernicus_land_mask', dir01, dir01, dir01, 15, 127, 0.2)
# takes about 22min

---

### Post-processing

In [None]:
def post_esa_land_mask():
    """Change to bool and set land where regridded mask is more than 50% land"""
    
    # Read regridded land-sea mask
    ds = xr.open_zarr(dir01 + 'ds_regridded_copernicus_land_mask.zarr')

    # Rename variable
    ds = ds.rename(dict(regridded_copernicus_land_mask = 'copernicus_land_mask'))
    
    # Define treshold for land grid cells and export as zarr
    xr.where(ds > 50, True, False) \
        .to_zarr(dir01 + 'ds_prep_copernicus_land_mask.zarr', mode='w')


post_esa_land_mask()

---

### Check

In [None]:
#plot to check
for i in ['copernicus_land_mask']:
    fig, ax = plt.subplots(figsize=(10, 5), ncols=1, nrows=1)
    xr.open_zarr(dir01 + 'ds_prep_' + i + '.zarr')[i] \
        .plot.imshow(ax=ax)
    ax.set_title(i)

---