Convert `.npy` files to `.nc` files so that they can be read by `xr.open_mfdataset`.

In [1]:
import numpy as np
import xarray as xr
from pathlib import Path

In [2]:
### Point to .npy inputs
dir_in = Path('/scratch/orybchuk/wakedynamics/ldm-3d/simulations/train/npy')
npy_files = list(Path(dir_in).glob('*.npy'))
npy_files.sort()

In [3]:
### Convert to .nc
dir_out = Path('/scratch/orybchuk/wakedynamics/ldm-3d/simulations/train/nc')

## Prepare for coords
x = np.arange(0, 128)*15
y = np.arange(0, 128)*15
z = np.arange(0, 64)*15
# z = np.arange(0, 32)*15  # Trim the height so we're only looking below the capping inversion

for i in range(len(npy_files)):
    if i%100==0: print(i, '...')
    # Open and reorganize data
    npy_arr = np.load(npy_files[i])
    npy_arr = npy_arr[np.newaxis,:,:,:,:]
    
    # Create and populate dataset
    curr_time = int(npy_files[i].stem[2:])
    coords = {'time': [curr_time], 'x':x, 'y':y, 'z':z}
    ds = xr.Dataset(coords=coords)
    ds['u'] = (('time', 'x', 'y', 'z'), npy_arr[:,0,:,:,:])
    ds['v'] = (('time', 'x', 'y', 'z'), npy_arr[:,1,:,:,:])
    ds['w'] = (('time', 'x', 'y', 'z'), npy_arr[:,2,:,:,:])
    
    # Save
    ds.to_netcdf(Path(dir_out, f'{curr_time}.nc'))

0 ...
100 ...
200 ...
300 ...
400 ...
500 ...
600 ...
700 ...
800 ...
900 ...
1000 ...
1100 ...
1200 ...
1300 ...
1400 ...
1500 ...
1600 ...
1700 ...
1800 ...
1900 ...
2000 ...
2100 ...
2200 ...
2300 ...
2400 ...
2500 ...
2600 ...
2700 ...
2800 ...
2900 ...
3000 ...
3100 ...
3200 ...
3300 ...
3400 ...
3500 ...
3600 ...
3700 ...
3800 ...
3900 ...
4000 ...
4100 ...
4200 ...
4300 ...
4400 ...
4500 ...
4600 ...
4700 ...
4800 ...
4900 ...
5000 ...


In [4]:
### Demo
all_out_files = list(Path(dir_out).glob('*.nc'))
all_out_files.sort()
ds_all = xr.open_mfdataset(all_out_files)