## Prepare ROMS I4DVar observation file from cortadv5_FilledSST

In [None]:
from dataclasses import dataclass

import numpy as np
import xarray as xr

In [None]:
@dataclass
class Arguments:
    start_time: str = '2007-01-01'
    end_time: str = '2007-01-31'
    input_grid_file: str = '/cluster/projects/nn9490k/ROHO800/Grid/ROHO800_grid_fix5.nc'
    input_data_file: str = '/cluster/projects/nn9297k/ROHO800+/InputData/4dvar/cortadv5_FilledSST.nc'
    output_obs_file: str = '/cluster/projects/nn9297k/shmiak/roho800_data/input_data/roho800_obs_sst.nc'

args = Arguments()

In [None]:
ds_grid = xr.open_dataset(args.input_grid_file)
ds_data = xr.open_dataset(args.input_data_file)

In [None]:
ds_grid

In [None]:
# slice a time period and interpolate to the ROMS grid
ds_data = ds_data.sel(time=slice(args.start_time, args.end_time))
coords = {
    'lon': ds_grid.lon_rho,
    'lat': ds_grid.lat_rho,
}
da_sst = ds_data['FilledSST'].interp(coords)

In [None]:
da_sst = da_sst / ds_grid.mask_rho  # exclude values outside the sea

In [None]:
da_sst

In [None]:
mask = ~np.isnan(da_sst.isel(time=0).values)  # mask of grid points without data
points_per_time = mask.flatten()[mask.flatten()==True].shape[0]
print(f"The number of points per time: {points_per_time}")

#### obs_value

In [None]:
# Flattens from the last dimenstion: (x, y, z) so z -> y -> x
np_sst = da_sst.values.flatten(order='C')
np_sst = np_sst[~np.isnan(np_sst)]
np_sst.shape

#### obs_type

In [None]:
np_type = np.full_like(np_sst, 6)
np_type.shape

#### obs_provenance

In [None]:
np_provenance = np.full_like(np_sst, 1)
np_provenance.shape

#### obs_time

In [None]:
np_time = np.repeat(da_sst.time.values[..., np.newaxis], points_per_time, axis=1).flatten()
np_time.shape

#### obs_depth

In [None]:
np_depth = np.full_like(np_sst, 1)
np_depth.shape

#### obs_Xgrid and obs_Ygrid

In [None]:
x_idx, y_idx = np.where(mask)

In [None]:
x_idx