In [1]:
import os
import xarray as xr
import dask
from dask.diagnostics import ProgressBar
import hydra
from omegaconf import DictConfig
import logging
import numpy as np

In [2]:
import hydra
from omegaconf import DictConfig
# 初始化 Hydra
hydra.initialize(config_path="conf", version_base=None)
# 组合配置
cfg = hydra.compose(config_name="config_uv_1")
cfg

{'grib_store_path': '/datasets/grib2_data', 'hdf5_store_path': '/datasets/hdf5_data_uvsp', 'dt': 1, 'start_train_year': 2015, 'end_train_year': 2015, 'test_years': [2015], 'out_of_sample_years': [2015], 'compute_mean_std': True, 'variables': ['10m_u_component_of_wind', '10m_v_component_of_wind', 'surface_pressure']}

In [3]:
	all_years = [2015]
	# Return the Zarr paths
	grib_paths = []
	for year in all_years:
		for month in range(8,10):
			grib_path = f"{cfg.grib_store_path}/{year}/wnd10m.cdas1.{year}{month:02d}.grb2"
			grib_paths.append(grib_path)

In [5]:
grib_arrays = [xr.open_dataset(path) for path in grib_paths]
grib_arrays

[<xarray.Dataset> Size: 244MB
 Dimensions:            (time: 124, step: 6, latitude: 200, longitude: 205)
 Coordinates:
   * time               (time) datetime64[ns] 992B 2015-08-01 ... 2015-08-31T1...
   * step               (step) timedelta64[ns] 48B 01:00:00 02:00:00 ... 06:00:00
     heightAboveGround  float64 8B ...
   * latitude           (latitude) float64 2kB 41.81 41.6 41.4 ... 1.329 1.124
   * longitude          (longitude) float64 2kB 98.18 98.39 98.59 ... 139.7 139.9
     valid_time         (time, step) datetime64[ns] 6kB ...
 Data variables:
     u                  (time, step, latitude, longitude) float32 122MB ...
     v                  (time, step, latitude, longitude) float32 122MB ...
 Attributes:
     GRIB_edition:            2
     GRIB_centre:             60
     GRIB_centreDescription:  National Center for Atmospheric Research 
     GRIB_subCentre:          1
     Conventions:             CF-1.7
     institution:             National Center for Atmospheric Resear

In [8]:
	cfsr_xarray_concat = xr.concat(
		grib_arrays, dim="time"
	)

In [10]:
	cfsr_xarray_channel = xr.concat(
		[cfsr_xarray_concat[z] for z in list(cfsr_xarray_concat.data_vars.keys())], dim="channel"
	)
	cfsr_xarray_stacked =  cfsr_xarray_channel.stack(newtime=['time', 'step'])
	cfsr_xarray_stacked = cfsr_xarray_stacked.swap_dims({'newtime':'valid_time'})
	cfsr_xarray_drop = cfsr_xarray_stacked.drop_vars('newtime')
	cfsr_xarray = cfsr_xarray_drop.transpose("valid_time", "channel", "latitude", "longitude")
	cfsr_xarray.name = "fields"
	cfsr_xarray = cfsr_xarray.astype("float32")

In [11]:
cfsr_xarray