In [None]:
from pathlib import Path

import numpy as np
import xarray as xr
from siphon.catalog import TDSCatalog

from atm_forcing import CF_ROMS, generate_catalog_urls, get_ds

### Preprocess and download

In [None]:
LAT_NEW = np.arange(58.9, 60, 0.02)
LON_NEW = np.arange(10.1, 11.1, 0.02)

In [None]:
regridder = None
dss = []
timestamps = []
parameters = [x[0] for x in CF_ROMS]
for date_and_time, catalog_url in generate_catalog_urls():
    timestamp = date_and_time.strftime("%Y%m%d")
    file_path = Path.home() / "FjordSim_data" / "NORA3" / f"{timestamp}.nc"
    if file_path.exists():
        print("File exists:", file_path)
        continue
    print(f"Processing: {date_and_time}.")
    cat = TDSCatalog(catalog_url)
    urls = [v.access_urls["opendap"] for k, v in cat.datasets.items() if "_fp" in k]
    ds = xr.open_mfdataset(urls, combine="by_coords", compat="no_conflicts", data_vars="all")
    ds = ds[parameters]
    regridder, ds = get_ds(regridder, ds, LAT_NEW, LON_NEW)
    dss.append(ds)
    timestamps.append(timestamp)
    if len(dss) > 3:  # there should be 4 files per day.
        assert len(set(timestamps)) <= 1
        ds = xr.combine_by_coords(dss, coords=["time"], join="outer")
        print(f"Saving to {file_path}.")
        ds.to_netcdf(file_path)  # , encoding={var: {"zlib": True, "complevel": 5} for var in ds.data_vars})
        print("Saving done.")
        dss = []
        timestamps = []

### Checkup

In [None]:
pattern = str(
    Path.home() / "FjordSim_data" / "NORA3" / "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9].nc"
)
ds = xr.open_mfdataset(pattern, combine="by_coords")

In [None]:
ds

In [None]:
bad_steps = (ds.time.diff("time") != np.timedelta64(1, "h"))
print("Bad steps at positions:", bad_steps.where(bad_steps, drop=True))

In [None]:
ds["air_temperature_2m"].to_netcdf(
    Path.home() / "FjordSim_data" / "NORA3" / "air_temperature_2m.nc",
    encoding={"air_temperature_2m": {"zlib": True, "complevel": 5}}
)