In [1]:
import numpy as np
import xarray as xr
import time 
import os
from netCDF4 import Dataset
from pyproj import CRS
import rasterio
import rioxarray
from rasterio.enums import Resampling
from sklearn.preprocessing import StandardScaler
import rioxarray

In [7]:
filename_biostates_2023 = '/cluster/projects/itk-SINMOD/coral-mapping/midnor/BioStates_2023.nc'
biostates_2023 = xr.open_dataset(filename_biostates_2023)

In [3]:
print(list(biostates_2022.variables))

['time', 'grid_mapping', 'LayerDepths', 'xc', 'yc', 'zc', 'depth', 'DXxDYy', 'nitrate', 'silicate', 'ammonium', 'diatoms', 'flagellates', 'ciliates', 'HNANO', 'bacteria', 'calanus_finmarchicus', 'calanus_glacialis', 'detritus_slow', 'detritus_fast', 'DOC', 'cDOM', 'silicate_detritus', 'sediment_Si', 'sediment_N']


In [4]:
nitrate_2022 = biostates_2022['nitrate']
valid_mask = ~nitrate_2022.isnull()  
reversed_valid_mask = valid_mask.isel(zc=slice(None, None, -1))
bottom_layer_idx_reversed = reversed_valid_mask.argmax(dim="zc")
bottom_layer_idx = valid_mask.zc.size - 1 - bottom_layer_idx_reversed
no_valid_data_mask = valid_mask.sum(dim="zc") == 0
nitrate_bottom_layer_2022 = nitrate_2022.isel(zc=bottom_layer_idx)
nitrate_bottom_layer_2022 = nitrate_bottom_layer_2022.where(~no_valid_data_mask, np.nan)

In [5]:
nitrate_bottom_layer_2022.to_netcdf('/cluster/home/maikents/surface_chlorophyll/nitrate_22.nc')

In [5]:
silicate_2019 = biostates_2019['silicate']
valid_mask = ~silicate_2019.isnull()  
reversed_valid_mask = valid_mask.isel(zc=slice(None, None, -1))
bottom_layer_idx_reversed = reversed_valid_mask.argmax(dim="zc")
bottom_layer_idx = valid_mask.zc.size - 1 - bottom_layer_idx_reversed
no_valid_data_mask = valid_mask.sum(dim="zc") == 0
silicate_bottom_layer_2019 = silicate_2019.isel(zc=bottom_layer_idx)
silicate_bottom_layer_2019 = silicate_bottom_layer_2019.where(~no_valid_data_mask, np.nan)

In [6]:
silicate_bottom_layer_2019.to_netcdf('/cluster/home/maikents/surface_chlorophyll/silicate_19.nc')

In [8]:
sediment_nitrate_2023 = biostates_2023['sediment_N']
sediment_nitrate_2023 = sediment_nitrate_2023.where(~sediment_nitrate_2023.isnull(), np.nan)

In [9]:
sediment_nitrate_2023.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_23.nc')

In [10]:
sediment_silicate_2023 = biostates_2023['sediment_Si']
sediment_silicate_2023 = sediment_silicate_2023.where(~sediment_silicate_2023.isnull(), np.nan)

In [11]:
sediment_silicate_2023.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_23.nc')

In [3]:
#Now combine all 3 years 
sediment_nitrate_2022 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_22.nc')
sediment_nitrate_2019 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_19.nc')
sediment_nitrate_2023 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_23.nc')

In [4]:
sediment_nitrate_2022 = sediment_nitrate_2022.isel(time=slice(0, 365))

In [5]:
sediment_nitrate = xr.concat([sediment_nitrate_2019, sediment_nitrate_2022, sediment_nitrate_2023], dim="time")


sediment_nitrate.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_2019_2022_2023.nc')

In [6]:
sediment_silicate_2022 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_22.nc')
sediment_silicate_2019 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_19.nc')
sediment_silicate_2023 = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_23.nc')

In [7]:
sediment_silicate_2022 = sediment_silicate_2022.isel(time=slice(0, 365))

In [8]:
sediment_silicate = xr.concat([sediment_silicate_2019, sediment_silicate_2022, sediment_silicate_2023], dim="time")


sediment_silicate.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_2019_2022_2023.nc')

In [9]:
print(sediment_silicate)

<xarray.Dataset> Size: 2GB
Dimensions:      (time: 1095, yc: 555, xc: 950)
Coordinates:
  * xc           (xc) float32 4kB 800.0 1.6e+03 2.4e+03 ... 7.592e+05 7.6e+05
  * yc           (yc) float32 2kB 800.0 1.6e+03 2.4e+03 ... 4.432e+05 4.44e+05
Dimensions without coordinates: time
Data variables:
    sediment_Si  (time, yc, xc) float32 2GB nan nan nan ... 59.41 59.48 59.58


In [2]:
sediment_nitrate = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_2019_2022_2023.nc')
sediment_silicate = xr.open_dataset('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_2019_2022_2023.nc')

In [3]:
#Create min, mean and max features across time

data_var_ni = sediment_nitrate['sediment_N']
time_avg_layer = data_var_ni.mean(dim="time", skipna=True)
time_percentiles = data_var_ni.quantile([0.1, 0.9], dim="time", skipna=True)

stats_array = xr.concat(
    [time_avg_layer, time_percentiles.sel(quantile=0.1).drop_vars("quantile"), 
     time_percentiles.sel(quantile=0.9).drop_vars("quantile")],
    dim="stat"
).rename(f"sediment_nitrate_features")
stats_array = stats_array.assign_coords(stat=["mean", "10th_percentile", "90th_percentile"])

stats_array.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_nitrate_2019_2022_2023_features.nc', mode='w')

  return function_base._ureduce(a,


In [4]:
data_var_si = sediment_silicate['sediment_Si']
time_avg_layer = data_var_si.mean(dim="time", skipna=True)
time_percentiles = data_var_si.quantile([0.1, 0.9], dim="time", skipna=True)

stats_array = xr.concat(
    [time_avg_layer, time_percentiles.sel(quantile=0.1).drop_vars("quantile"), 
     time_percentiles.sel(quantile=0.9).drop_vars("quantile")],
    dim="stat"
).rename(f"sediment_silicate_features")
stats_array = stats_array.assign_coords(stat=["mean", "10th_percentile", "90th_percentile"])

stats_array.to_netcdf('/cluster/home/maikents/surface_chlorophyll/sediment_silicate_2019_2022_2023_features.nc', mode='w')

  return function_base._ureduce(a,


In [None]:
l