In [1]:
import numpy as np
import xarray as xr
import time 
import os
from netCDF4 import Dataset
from pyproj import CRS
import rasterio
import rioxarray
from rasterio.enums import Resampling
from sklearn.preprocessing import StandardScaler
import rioxarray

In [3]:
filename_biostates_2019 = '/cluster/projects/itk-SINMOD/coral-mapping/midnor/BioStates_2019.nc'
biostates_2019 = xr.open_dataset(filename_biostates_2019)

In [3]:
print(list(biostates_2022.variables))

['time', 'grid_mapping', 'LayerDepths', 'xc', 'yc', 'zc', 'depth', 'DXxDYy', 'nitrate', 'silicate', 'ammonium', 'diatoms', 'flagellates', 'ciliates', 'HNANO', 'bacteria', 'calanus_finmarchicus', 'calanus_glacialis', 'detritus_slow', 'detritus_fast', 'DOC', 'cDOM', 'silicate_detritus', 'sediment_Si', 'sediment_N']


In [4]:
nitrate_2022 = biostates_2022['nitrate']
valid_mask = ~nitrate_2022.isnull()  
reversed_valid_mask = valid_mask.isel(zc=slice(None, None, -1))
bottom_layer_idx_reversed = reversed_valid_mask.argmax(dim="zc")
bottom_layer_idx = valid_mask.zc.size - 1 - bottom_layer_idx_reversed
no_valid_data_mask = valid_mask.sum(dim="zc") == 0
nitrate_bottom_layer_2022 = nitrate_2022.isel(zc=bottom_layer_idx)
nitrate_bottom_layer_2022 = nitrate_bottom_layer_2022.where(~no_valid_data_mask, np.nan)

In [5]:
nitrate_bottom_layer_2022.to_netcdf('/cluster/home/maikents/surface_chlorophyll/nitrate_22.nc')

In [5]:
silicate_2019 = biostates_2019['silicate']
valid_mask = ~silicate_2019.isnull()  
reversed_valid_mask = valid_mask.isel(zc=slice(None, None, -1))
bottom_layer_idx_reversed = reversed_valid_mask.argmax(dim="zc")
bottom_layer_idx = valid_mask.zc.size - 1 - bottom_layer_idx_reversed
no_valid_data_mask = valid_mask.sum(dim="zc") == 0
silicate_bottom_layer_2019 = silicate_2019.isel(zc=bottom_layer_idx)
silicate_bottom_layer_2019 = silicate_bottom_layer_2019.where(~no_valid_data_mask, np.nan)

In [6]:
silicate_bottom_layer_2019.to_netcdf('/cluster/home/maikents/surface_chlorophyll/silicate_19.nc')

In [8]:
sediment_nitrate_2023 = biostates_2023['sediment_N']
sediment_nitrate_2023 = sediment_nitrate_2023.where(~sediment_nitrate_2023.isnull(), np.nan)

In [9]:
sediment_nitrate_2023.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_23.nc')

In [10]:
sediment_silicate_2023 = biostates_2023['sediment_Si']
sediment_silicate_2023 = sediment_silicate_2023.where(~sediment_silicate_2023.isnull(), np.nan)

In [11]:
sediment_silicate_2023.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_23.nc')

In [3]:
#Now combine all 3 years 
sediment_nitrate_2022 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_22.nc')
sediment_nitrate_2019 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_19.nc')
sediment_nitrate_2023 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_23.nc')

In [4]:
sediment_nitrate_2022 = sediment_nitrate_2022.isel(time=slice(0, 365))

In [5]:
sediment_nitrate = xr.concat([sediment_nitrate_2019, sediment_nitrate_2022, sediment_nitrate_2023], dim="time")


sediment_nitrate.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_2019_2022_2023.nc')

In [6]:
sediment_silicate_2022 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_22.nc')
sediment_silicate_2019 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_19.nc')
sediment_silicate_2023 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_23.nc')

In [7]:
sediment_silicate_2022 = sediment_silicate_2022.isel(time=slice(0, 365))

In [8]:
sediment_silicate = xr.concat([sediment_silicate_2019, sediment_silicate_2022, sediment_silicate_2023], dim="time")


sediment_silicate.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_2019_2022_2023.nc')

In [9]:
print(sediment_silicate)

<xarray.Dataset> Size: 2GB
Dimensions:      (time: 1095, yc: 555, xc: 950)
Coordinates:
  * xc           (xc) float32 4kB 800.0 1.6e+03 2.4e+03 ... 7.592e+05 7.6e+05
  * yc           (yc) float32 2kB 800.0 1.6e+03 2.4e+03 ... 4.432e+05 4.44e+05
Dimensions without coordinates: time
Data variables:
    sediment_Si  (time, yc, xc) float32 2GB nan nan nan ... 59.41 59.48 59.58


In [2]:
sediment_nitrate = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_2019_2022_2023.nc')
sediment_silicate = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_2019_2022_2023.nc')

In [3]:
#Create min, mean and max features across time

data_var_ni = sediment_nitrate['sediment_N']
time_avg_layer = data_var_ni.mean(dim="time", skipna=True)
time_percentiles = data_var_ni.quantile([0.1, 0.9], dim="time", skipna=True)

stats_array = xr.concat(
    [time_avg_layer, time_percentiles.sel(quantile=0.1).drop_vars("quantile"), 
     time_percentiles.sel(quantile=0.9).drop_vars("quantile")],
    dim="stat"
).rename(f"sediment_nitrate_features")
stats_array = stats_array.assign_coords(stat=["mean", "10th_percentile", "90th_percentile"])

stats_array.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_2019_2022_2023_features.nc', mode='w')

  return function_base._ureduce(a,


In [4]:
data_var_si = sediment_silicate['sediment_Si']
time_avg_layer = data_var_si.mean(dim="time", skipna=True)
time_percentiles = data_var_si.quantile([0.1, 0.9], dim="time", skipna=True)

stats_array = xr.concat(
    [time_avg_layer, time_percentiles.sel(quantile=0.1).drop_vars("quantile"), 
     time_percentiles.sel(quantile=0.9).drop_vars("quantile")],
    dim="stat"
).rename(f"sediment_silicate_features")
stats_array = stats_array.assign_coords(stat=["mean", "10th_percentile", "90th_percentile"])

stats_array.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_2019_2022_2023_features.nc', mode='w')

  return function_base._ureduce(a,


In [2]:
#Transform to emod grid
nitrate = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_2019_2022_2023_features.nc')
silicate = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_2019_2022_2023_features.nc')

In [4]:
def obtain_sinmod_crs(PhysStates_data):
    grid_mapping = PhysStates_data['grid_mapping']  #Replace 'grid_mapping' with the correct variable name if different
    grid_attrs = grid_mapping.attrs  

    #Print horizontal resolution if available
    horizontal_resolution = grid_attrs.get('horizontal_resolution', 'unknown')
    print(f"\nHorizontal resolution: {horizontal_resolution} meters")

    #Construct the CRS using the attributes
    crs_sinmod = CRS.from_proj4(
        f"+proj=stere "
        f"+lat_0={grid_attrs['latitude_of_projection_origin']} "
        f"+lat_ts={grid_attrs['standard_parallel']} "
        f"+lon_0={grid_attrs['straight_vertical_longitude_from_pole']} "
        f"+x_0={grid_attrs['false_easting']} "
        f"+y_0={grid_attrs['false_northing']} "
        f"+a={grid_attrs['semi_major_axis']} "
        f"+b={grid_attrs['semi_minor_axis']} "
        f"+units=m +no_defs"
    )

    print(f"\nSINMOD CRS: {crs_sinmod}")
    return crs_sinmod

midnor_crs = obtain_sinmod_crs(biostates_2019)



Horizontal resolution: 800.0 meters

SINMOD CRS: +proj=stere +lat_0=90.0 +lat_ts=60.0 +lon_0=58.0 +x_0=2544800.0 +y_0=1918800.0 +a=6370000.0 +b=6370000.0 +units=m +no_defs +type=crs


In [5]:
#Attach the crs to the SINMOD dataset
nitrate_sinmod = nitrate.rio.write_crs(midnor_crs)
silicate_sinmod = silicate.rio.write_crs(midnor_crs)

In [10]:
#4: Align the SINMOD data with the bathymetry

tif_file = '/cluster/projects/itk-SINMOD/coral-mapping/data/raw_data/EMOD-tifs/bathymetry_32N_Clip_sample.tif'

def align_SINMOD_and_bathymetry(SINMOD_features, tif_file, resampling=Resampling.bilinear):
        
    with rioxarray.open_rasterio(tif_file) as tif:

        if 'band' in tif.dims:
            tif = tif.isel(band=0)

        SINMOD_features_reprojected = SINMOD_features.rio.reproject_match(tif, resampling=resampling)

        print(SINMOD_features_reprojected.rio.bounds())

        
        sinmod_mask = SINMOD_features_reprojected['sediment_silicate_features'][0].isnull()

        #Apply the mask to the EMOD data
        tif = tif.where(~sinmod_mask, np.nan)

        valid_mask = SINMOD_features_reprojected['sediment_silicate_features'][0].notnull()

        valid_columns = valid_mask.any(dim="y")  
        min_col = valid_columns.argmax().item()  #First non-NaN column from the left
        max_col = valid_columns.shape[0] - valid_columns[::-1].argmax().item() - 1  #First non-NaN column from the right

        #Find the first valid row (non-NaN) from the top (min row index)
        valid_rows = valid_mask.any(dim="x")  #Check for valid values in each row
        min_row = valid_rows.argmax().item()  #First non-NaN row from the top
        max_row = valid_rows.shape[0] - valid_rows[::-1].argmax().item() - 1  #Adjust for reverse indexing

        #Slice the raster to the bounding box of valid data
        clipped_SINMOD_features = SINMOD_features_reprojected.isel(x=slice(min_col, max_col + 1), y=slice(min_row, max_row + 1))
        
        clipped_tif = tif.rio.clip_box(minx=clipped_SINMOD_features.rio.bounds()[0]+1, 
                                        miny=clipped_SINMOD_features.rio.bounds()[1], 
                                        maxx=clipped_SINMOD_features.rio.bounds()[2], 
                                        maxy=clipped_SINMOD_features.rio.bounds()[3])
        
        clipped_tif = clipped_tif.reset_coords(drop=True)

    return clipped_SINMOD_features, clipped_tif

In [9]:
nitrate_features_reprojected, tif = align_SINMOD_and_bathymetry(nitrate_sinmod, tif_file)

(-148857.58868623473, 6580002.847672726, 866884.4113137652, 7621679.847672726)


In [11]:
silicate_features_reprojected, tif = align_SINMOD_and_bathymetry(silicate_sinmod, tif_file)

(-148857.58868623473, 6580002.847672726, 866884.4113137652, 7621679.847672726)


In [13]:
nitrate_features_reprojected.to_netcdf('/cluster/home/maikents/surface_chlorophyll/nitrate_features_reprojected_features_emod_grid.nc')

In [14]:
silicate_features_reprojected.to_netcdf('/cluster/home/maikents/surface_chlorophyll/silicate_features_reprojected_features_emod_grid.nc')

In [2]:
#Standardize
nitrate_features_reprojected = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/nitrate_features_reprojected_features_emod_grid.nc')
silicate_features_reprojected = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/silicate_features_reprojected_features_emod_grid.nc')

In [3]:
nitrate_mean = nitrate_features_reprojected['sediment_nitrate_features'].isel(stat=0)
nitrate_min = nitrate_features_reprojected['sediment_nitrate_features'].isel(stat=1)
nitrate_max = nitrate_features_reprojected['sediment_nitrate_features'].isel(stat=2)

In [4]:
silicate_mean = silicate_features_reprojected['sediment_silicate_features'].isel(stat=0)
silicate_min = silicate_features_reprojected['sediment_silicate_features'].isel(stat=1)
silicate_max = silicate_features_reprojected['sediment_silicate_features'].isel(stat=2)

In [5]:
#Mean nitrate

nitrate_data_mean = nitrate_mean.astype(np.float32)  
nitrate_data_mean_scaled = np.copy(nitrate_data_mean)    

reshaped_nitrate_data_mean = nitrate_data_mean.values.reshape(nitrate_data_mean.shape[0], -1)  

mask = np.isnan(reshaped_nitrate_data_mean)

nitrate_scaler = StandardScaler()
for i in range(reshaped_nitrate_data_mean.shape[1]):  
    column = reshaped_nitrate_data_mean[:, i] 
    if not np.all(np.isnan(column)):  
        reshaped_nitrate_data_mean[:, i][~mask[:, i]] = nitrate_scaler.fit_transform(column[~mask[:, i]].reshape(-1, 1)).ravel()


reshaped_nitrate_data_mean_scaled = reshaped_nitrate_data_mean.reshape(nitrate_data_mean.shape)

nitrate_data_mean_standardized = xr.DataArray(reshaped_nitrate_data_mean_scaled, dims=nitrate_mean.dims, attrs=nitrate_mean.attrs)

print(f"Standardised nitrate mean - Mean: {nitrate_data_mean_standardized.mean():.2f}, Std Dev: {nitrate_data_mean_standardized.std():.2f}")


nitrate_standardized = nitrate_features_reprojected.copy()

nitrate_standardized['sediment_nitrate_features'].data[0, :, :] = nitrate_data_mean_standardized.data


Standardised nitrate mean - Mean: 0.00, Std Dev: 1.00


In [6]:
#Min nitrate

nitrate_data_min = nitrate_min.astype(np.float32)  
nitrate_data_min_scaled = np.copy(nitrate_data_min)    

reshaped_nitrate_data_min = nitrate_data_min.values.reshape(nitrate_data_min.shape[0], -1)  

mask = np.isnan(reshaped_nitrate_data_min)

nitrate_scaler = StandardScaler()
for i in range(reshaped_nitrate_data_min.shape[1]):  
    column = reshaped_nitrate_data_min[:, i] 
    if not np.all(np.isnan(column)):  
        reshaped_nitrate_data_min[:, i][~mask[:, i]] = nitrate_scaler.fit_transform(column[~mask[:, i]].reshape(-1, 1)).ravel()


reshaped_nitrate_data_min_scaled = reshaped_nitrate_data_min.reshape(nitrate_data_min.shape)

nitrate_data_min_standardized = xr.DataArray(reshaped_nitrate_data_min_scaled, dims=nitrate_min.dims, attrs=nitrate_min.attrs)

print(f"Standardised nitrate min - Mean: {nitrate_data_min_standardized.mean():.2f}, Std Dev: {nitrate_data_min_standardized.std():.2f}")


nitrate_standardized = nitrate_features_reprojected.copy()

nitrate_standardized['sediment_nitrate_features'].data[1, :, :] = nitrate_data_min_standardized.data


Standardised nitrate min - Mean: -0.00, Std Dev: 1.00


In [7]:
#Max nitrate

nitrate_data_max = nitrate_max.astype(np.float32)  
nitrate_data_max_scaled = np.copy(nitrate_data_max)    

reshaped_nitrate_data_max = nitrate_data_max.values.reshape(nitrate_data_max.shape[0], -1)  

mask = np.isnan(reshaped_nitrate_data_max)

nitrate_scaler = StandardScaler()
for i in range(reshaped_nitrate_data_max.shape[1]):  
    column = reshaped_nitrate_data_max[:, i] 
    if not np.all(np.isnan(column)):  
        reshaped_nitrate_data_max[:, i][~mask[:, i]] = nitrate_scaler.fit_transform(column[~mask[:, i]].reshape(-1, 1)).ravel()


reshaped_nitrate_data_max_scaled = reshaped_nitrate_data_max.reshape(nitrate_data_max.shape)

nitrate_data_max_standardized = xr.DataArray(reshaped_nitrate_data_max_scaled, dims=nitrate_max.dims, attrs=nitrate_max.attrs)

print(f"Standardised nitrate max - Mean: {nitrate_data_max_standardized.mean():.2f}, Std Dev: {nitrate_data_max_standardized.std():.2f}")


nitrate_standardized = nitrate_features_reprojected.copy()

nitrate_standardized['sediment_nitrate_features'].data[2, :, :] = nitrate_data_max_standardized.data


Standardised nitrate max - Mean: -0.00, Std Dev: 1.00


In [8]:
nitrate_standardized.to_netcdf('/cluster/home/maikents/surface_chlorophyll/nitrate_2019_2022_2023_features_emod_grid_standardized.nc')

In [9]:
#Mean silicate

silicate_data_mean = silicate_mean.astype(np.float32)  
silicate_data_mean_scaled = np.copy(silicate_data_mean)    

reshaped_silicate_data_mean = silicate_data_mean.values.reshape(silicate_data_mean.shape[0], -1)  

mask = np.isnan(reshaped_silicate_data_mean)

silicate_scaler = StandardScaler()
for i in range(reshaped_silicate_data_mean.shape[1]):  
    column = reshaped_silicate_data_mean[:, i] 
    if not np.all(np.isnan(column)):  
        reshaped_silicate_data_mean[:, i][~mask[:, i]] = silicate_scaler.fit_transform(column[~mask[:, i]].reshape(-1, 1)).ravel()


reshaped_silicate_data_mean_scaled = reshaped_silicate_data_mean.reshape(silicate_data_mean.shape)

silicate_data_mean_standardized = xr.DataArray(reshaped_silicate_data_mean_scaled, dims=silicate_mean.dims, attrs=silicate_mean.attrs)

print(f"Standardised silicate mean - Mean: {silicate_data_mean_standardized.mean():.2f}, Std Dev: {silicate_data_mean_standardized.std():.2f}")


silicate_standardized = silicate_features_reprojected.copy()

silicate_standardized['sediment_silicate_features'].data[0, :, :] = silicate_data_mean_standardized.data


Standardised silicate mean - Mean: -0.00, Std Dev: 1.00


In [10]:
#Min silicate

silicate_data_min = silicate_min.astype(np.float32)  
silicate_data_min_scaled = np.copy(silicate_data_min)    

reshaped_silicate_data_min = silicate_data_min.values.reshape(silicate_data_min.shape[0], -1)  

mask = np.isnan(reshaped_silicate_data_min)

silicate_scaler = StandardScaler()
for i in range(reshaped_silicate_data_min.shape[1]):  
    column = reshaped_silicate_data_min[:, i] 
    if not np.all(np.isnan(column)):  
        reshaped_silicate_data_min[:, i][~mask[:, i]] = silicate_scaler.fit_transform(column[~mask[:, i]].reshape(-1, 1)).ravel()


reshaped_silicate_data_min_scaled = reshaped_silicate_data_min.reshape(silicate_data_min.shape)

silicate_data_min_standardized = xr.DataArray(reshaped_silicate_data_min_scaled, dims=silicate_min.dims, attrs=silicate_min.attrs)

print(f"Standardised silicate min - Mean: {silicate_data_min_standardized.mean():.2f}, Std Dev: {silicate_data_min_standardized.std():.2f}")


silicate_standardized = silicate_features_reprojected.copy()

silicate_standardized['sediment_silicate_features'].data[1, :, :] = silicate_data_min_standardized.data


Standardised silicate min - Mean: -0.00, Std Dev: 1.00


In [11]:
#Max silicate

silicate_data_max = silicate_max.astype(np.float32)  
silicate_data_max_scaled = np.copy(silicate_data_max)    

reshaped_silicate_data_max = silicate_data_max.values.reshape(silicate_data_max.shape[0], -1)  

mask = np.isnan(reshaped_silicate_data_max)

silicate_scaler = StandardScaler()
for i in range(reshaped_silicate_data_max.shape[1]):  
    column = reshaped_silicate_data_max[:, i] 
    if not np.all(np.isnan(column)):  
        reshaped_silicate_data_max[:, i][~mask[:, i]] = silicate_scaler.fit_transform(column[~mask[:, i]].reshape(-1, 1)).ravel()


reshaped_silicate_data_max_scaled = reshaped_silicate_data_max.reshape(silicate_data_max.shape)

silicate_data_max_standardized = xr.DataArray(reshaped_silicate_data_max_scaled, dims=silicate_max.dims, attrs=silicate_max.attrs)

print(f"Standardised silicate max - Mean: {silicate_data_max_standardized.mean():.2f}, Std Dev: {silicate_data_max_standardized.std():.2f}")


silicate_standardized = silicate_features_reprojected.copy()

silicate_standardized['sediment_silicate_features'].data[2, :, :] = silicate_data_max_standardized.data

Standardised silicate max - Mean: -0.00, Std Dev: 1.00


In [12]:
silicate_standardized.to_netcdf('/cluster/home/maikents/surface_chlorophyll/silicate_2019_2022_2023_features_emod_grid_standardized.nc')