We create a notebook to load all the training data or load all the testing data, and then we calculate different quantities. We incrementally open up and analyze each dataset because these are large datasets, and loading them takes quite some time.

In [1]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from pathlib import Path
import scipy.stats

# Load all training data

In [2]:
### Load files
dir_in_train = Path('/scratch/orybchuk/wakedynamics/ldm-3d/simulations/train/nc_trimmed')
train_files = list(Path(dir_in_train).glob('*.nc'))
train_files.sort()

ds_train = xr.open_mfdataset(train_files, parallel=True, engine='netcdf4')

### Calculate vertical profiles

In [None]:
# u_train_mean = ds_train['u'].mean(('x', 'y', 'time')).compute()
# u_train_std = ds_train['u'].std(('x', 'y', 'time')).compute()

In [None]:
# v_train_mean = ds_train['v'].mean(('x', 'y', 'time')).compute()
# v_train_std = ds_train['v'].std(('x', 'y', 'time')).compute()

In [None]:
# w_train_mean = ds_train['w'].mean(('x', 'y', 'time')).compute()
# w_train_std = ds_train['w'].std(('x', 'y', 'time')).compute()

In [None]:
# u_train_mean.to_netcdf('u_train_mean.nc')
# u_train_std.to_netcdf('u_train_std.nc')
# v_train_mean.to_netcdf('v_train_mean.nc')
# v_train_std.to_netcdf('v_train_std.nc')
# w_train_mean.to_netcdf('w_train_mean.nc')
# w_train_std.to_netcdf('w_train_std.nc')

### Calculate fluxes

In [None]:
# ds_train["up"] = ds_train['u'] - ds_train['u'].mean(('x', 'y'))
# ds_train["vp"] = ds_train['v'] - ds_train['v'].mean(('x', 'y'))
# ds_train["wp"] = ds_train['w'] - ds_train['w'].mean(('x', 'y'))

# ds_train["upvp"] = ds_train['up'] * ds_train['vp']
# ds_train["upwp"] = ds_train['up'] * ds_train['wp']
# ds_train["vpwp"] = ds_train['vp'] * ds_train['wp']

# ds_train["upvp"].compute()
# ds_train["upwp"].compute()
# ds_train["vpwp"].compute()

# ds_train['upvp'].to_netcdf("upvp_train.nc")
# ds_train['upwp'].to_netcdf("upwp_train.nc")
# ds_train['vpwp'].to_netcdf("vpwp_train.nc")

In [None]:
# upvp_train_mean = ds_train['upvp'].mean(('x', 'y', 'time')).compute()
# upwp_train_mean = ds_train['upwp'].mean(('x', 'y', 'time')).compute()
# vpwp_train_mean = ds_train['vpwp'].mean(('x', 'y', 'time')).compute()

# upvp_train_mean.to_netcdf("upvp_train_profile.nc")
# upwp_train_mean.to_netcdf("upwp_train_profile.nc")
# vpwp_train_mean.to_netcdf("vpwp_train_profile.nc")

### Calculate histogram

In [None]:
### Calculate histogram, going sample by sample
## Bins
umin = 6
umax = 12
ubins = np.linspace(umin, umax, 500)

vmin = -1.5
vmax = 3.5
vbins = np.linspace(vmin, vmax, 500)

wmin = -0.75
wmax = 0.75
wbins = np.linspace(wmin, wmax, 300)

## Scipy histograms
uhist_np = np.histogram(ds_train['u'].values.flatten(), bins=ubins)
uhist = scipy.stats.rv_histogram(uhist_np)

vhist_np = np.histogram(ds_train['v'].values.flatten(), bins=vbins)
vhist = scipy.stats.rv_histogram(vhist_np)

whist_np = np.histogram(ds_train['w'].values.flatten(), bins=wbins)
whist = scipy.stats.rv_histogram(whist_np)

np.save('u_train_hist.npy', uhist.pdf(ubins))
np.save('v_train_hist.npy', vhist.pdf(vbins))
np.save('w_train_hist.npy', whist.pdf(wbins))

# ## Calculate histograms - OLD WAY
# all_hist_u = np.zeros((len(ds_train['time']), len(ubins)))
# all_hist_v = np.zeros((len(ds_train['time']), len(vbins)))
# all_hist_w = np.zeros((len(ds_train['time']), len(wbins)))
# for i in range(len(ds_train['time'])):
#     uhist_train_np = np.histogram(ds_train['u'].isel(time=i).values.flatten(), bins=ubins)
#     uhist_train = scipy.stats.rv_histogram(uhist_train_np)
#     all_hist_u[i,:] = uhist_train.pdf(ubins).copy()
    
#     vhist_train_np = np.histogram(ds_train['v'].isel(time=i).values.flatten(), bins=vbins)
#     vhist_train = scipy.stats.rv_histogram(vhist_train_np)
#     all_hist_v[i,:] = vhist_train.pdf(vbins).copy()
    
#     whist_train_np = np.histogram(ds_train['w'].isel(time=i).values.flatten(), bins=wbins)
#     whist_train = scipy.stats.rv_histogram(whist_train_np)
#     all_hist_w[i,:] = whist_train.pdf(wbins).copy()
    
# np.save('u_train_hist.npy', all_hist_u)
# np.save('v_train_hist.npy', all_hist_v)
# np.save('w_train_hist.npy', all_hist_w)

### Calculate spectra

In [None]:
# ### Calculate 1D spectra, sample by sample
# khub = 6
# tmp1 = ds_train['up'].isel(time=0, z=khub)
# tmp2 = np.fft.rfftn(tmp1.values, axes=(0,))
# all_spectra = np.zeros((len(ds_train['time']), len(tmp2)))    
    
# for i in range(len(ds_train['time'])):
#     up_xy_train = ds_train['up'].isel(time=i, z=khub)
#     up_k_train = np.fft.rfftn(up_xy_train.values, axes=(0,))
#     up_k_bar_train = np.mean(np.abs(up_k_train)**2, axis=1)
    
#     all_spectra[i,:] = up_k_bar_train.copy()
# np.save('spectra_train.npy', all_spectra)

### Calculate continuity

In [None]:
# vel = xr.open_dataset(train_files[0])

In [None]:
# ### Calculate histograms that assess continuity
# ## Helper variables
# # Simulation parameters
# dx, dy, dz = 15, 15, 15
# facx, facy, facz = 1/(4*dx), 1/(4*dy), 1/(4*dz)

# # Histogram parameters
# dmin = -0.02
# dmax = 0.02
# dbins = np.linspace(dmin, dmax, 500)
# all_dudx_hist = np.zeros((len(ds_train['time']), len(dbins)))
# all_dvdy_hist = np.zeros((len(ds_train['time']), len(dbins)))
# all_dwdz_hist = np.zeros((len(ds_train['time']), len(dbins)))
# all_div_hist = np.zeros((len(ds_train['time']), len(dbins)))

# ## Iterate over timesteps
# for t in range(len(ds_train['time'])):
#     if t % 100 == 0: print(t, '...')
# #     vel = ds_train.isel(time=t)
#     vel = xr.open_dataset(train_files[t])

#     # Calculate gradients and divergence
#     dudx = facx*(-vel['u'].roll(x=-1,y=-1,z=-1,roll_coords=False).values + vel['u'].roll(x=0,y=-1,z=-1,roll_coords=False).values \
#                  -vel['u'].roll(x=-1,y=0 ,z=-1,roll_coords=False).values + vel['u'].roll(x=0,y=0 ,z=-1,roll_coords=False).values \
#                  -vel['u'].roll(x=-1,y=-1,z=0 ,roll_coords=False).values + vel['u'].roll(x=0,y=-1,z=0 ,roll_coords=False).values \
#                  -vel['u'].roll(x=-1,y=0 ,z=0 ,roll_coords=False).values + vel['u'].roll(x=0,y=0 ,z=0 ,roll_coords=False).values)
#     dvdy = facy*(-vel['v'].roll(x=-1,y=-1,z=-1,roll_coords=False).values - vel['v'].roll(x=0,y=-1,z=-1,roll_coords=False).values \
#                  +vel['v'].roll(x=-1,y=0 ,z=-1,roll_coords=False).values + vel['v'].roll(x=0,y=0 ,z=-1,roll_coords=False).values \
#                  -vel['v'].roll(x=-1,y=-1,z=0 ,roll_coords=False).values - vel['v'].roll(x=0,y=-1,z=0 ,roll_coords=False).values \
#                  +vel['v'].roll(x=-1,y=0 ,z=0 ,roll_coords=False).values + vel['v'].roll(x=0,y=0 ,z=0 ,roll_coords=False).values)
#     dwdz = facz*(-vel['w'].roll(x=-1,y=-1,z=-1,roll_coords=False).values - vel['w'].roll(x=0,y=-1,z=-1,roll_coords=False).values \
#                  -vel['w'].roll(x=-1,y=0 ,z=-1,roll_coords=False).values - vel['w'].roll(x=0,y=0 ,z=-1,roll_coords=False).values \
#                  +vel['w'].roll(x=-1,y=-1,z=0 ,roll_coords=False).values + vel['w'].roll(x=0,y=-1,z=0 ,roll_coords=False).values \
#                  +vel['w'].roll(x=-1,y=0 ,z=0 ,roll_coords=False).values + vel['w'].roll(x=0,y=0 ,z=0 ,roll_coords=False).values)
#     div = dudx + dvdy + dwdz

#     # Calculate histograms
#     dudxhist_np = np.histogram(dudx.flatten(), bins=dbins)
#     dudxhist = scipy.stats.rv_histogram(dudxhist_np)

#     dvdyhist_np = np.histogram(dvdy.flatten(), bins=dbins)
#     dvdyhist = scipy.stats.rv_histogram(dvdyhist_np)

#     dwdzhist_np = np.histogram(dwdz.flatten(), bins=dbins)
#     dwdzhist = scipy.stats.rv_histogram(dwdzhist_np)

#     divhist_np = np.histogram(div.flatten(), bins=dbins)
#     divhist = scipy.stats.rv_histogram(divhist_np)

#     # Store histograms
#     all_dudx_hist[t,:] = dudxhist.pdf(dbins)
#     all_dvdy_hist[t,:] = dvdyhist.pdf(dbins)
#     all_dwdz_hist[t,:] = dwdzhist.pdf(dbins)
#     all_div_hist[t,:] = divhist.pdf(dbins)

# # Save histograms
# np.save('hist_dudx_train.npy', all_dudx_hist.mean(axis=0))
# np.save('hist_dvdy_train.npy', all_dvdy_hist.mean(axis=0))
# np.save('hist_dwdz_train.npy', all_dwdz_hist.mean(axis=0))
# np.save('hist_div_train.npy', all_div_hist.mean(axis=0))

# Load all testing data (81 hours)

In [3]:
### Load files
dir_in_test = Path('/scratch/orybchuk/wakedynamics/ldm-3d/simulations/test/nc_trimmed')
test_files = list(Path(dir_in_test).glob('*.nc'))
test_files.sort()

ds_test = xr.open_mfdataset(test_files, parallel=True, engine='netcdf4')

### Calculate vertical profiles

In [None]:
# u_test_mean = ds_test['u'].mean(('x', 'y', 'time')).compute()
# u_test_std = ds_test['u'].std(('x', 'y', 'time')).compute()

In [None]:
# v_test_mean = ds_test['v'].mean(('x', 'y', 'time')).compute()
# v_test_std = ds_test['v'].std(('x', 'y', 'time')).compute()

In [None]:
# w_test_mean = ds_test['w'].mean(('x', 'y', 'time')).compute()
# w_test_std = ds_test['w'].std(('x', 'y', 'time')).compute()

In [None]:
# u_test_mean.to_netcdf('u_test_mean.nc')
# u_test_std.to_netcdf('u_test_std.nc')
# v_test_mean.to_netcdf('v_test_mean.nc')
# v_test_std.to_netcdf('v_test_std.nc')
# w_test_mean.to_netcdf('w_test_mean.nc')
# w_test_std.to_netcdf('w_test_std.nc')

### Calculate fluxes

In [None]:
# ds_test["up"] = ds_test['u'] - ds_test['u'].mean(('x', 'y'))
# ds_test["vp"] = ds_test['v'] - ds_test['v'].mean(('x', 'y'))
# ds_test["wp"] = ds_test['w'] - ds_test['w'].mean(('x', 'y'))

# ds_test["upvp"] = ds_test['up'] * ds_test['vp']
# ds_test["upwp"] = ds_test['up'] * ds_test['wp']
# ds_test["vpwp"] = ds_test['vp'] * ds_test['wp']

# ds_test["upvp"].compute()
# ds_test["upwp"].compute()
# ds_test["vpwp"].compute()

# ds_test['upvp'].to_netcdf("upvp_test.nc")
# ds_test['upwp'].to_netcdf("upwp_test.nc")
# ds_test['vpwp'].to_netcdf("vpwp_test.nc")

In [None]:
# upvp_test_mean = ds_test['upvp'].mean(('x', 'y', 'time')).compute()
# upwp_test_mean = ds_test['upwp'].mean(('x', 'y', 'time')).compute()
# vpwp_test_mean = ds_test['vpwp'].mean(('x', 'y', 'time')).compute()

# upvp_test_mean.to_netcdf("upvp_test_profile.nc")
# upwp_test_mean.to_netcdf("upwp_test_profile.nc")
# vpwp_test_mean.to_netcdf("vpwp_test_profile.nc")

### Calculate histogram

In [None]:
# ### Calculate histogram, going sample by sample
# ## Bins
# umin = 6
# umax = 12
# ubins = np.linspace(umin, umax, 500)

# vmin = -1.5
# vmax = 3.5
# vbins = np.linspace(vmin, vmax, 500)

# wmin = -0.75
# wmax = 0.75
# wbins = np.linspace(wmin, wmax, 300)

# ## Scipy histograms
# uhist_np = np.histogram(ds_test['u'].values.flatten(), bins=ubins)
# uhist = scipy.stats.rv_histogram(uhist_np)

# vhist_np = np.histogram(ds_test['v'].values.flatten(), bins=vbins)
# vhist = scipy.stats.rv_histogram(vhist_np)

# whist_np = np.histogram(ds_test['w'].values.flatten(), bins=wbins)
# whist = scipy.stats.rv_histogram(whist_np)

# np.save('u_test_hist.npy', uhist.pdf(ubins))
# np.save('v_test_hist.npy', vhist.pdf(vbins))
# np.save('w_test_hist.npy', whist.pdf(wbins))

### Calculate spectra

In [None]:
# ### Calculate 1D spectra, sample by sample
# khub = 6
# tmp1 = ds_test['up'].isel(time=0, z=khub)
# tmp2 = np.fft.rfftn(tmp1.values, axes=(0,))
# all_spectra = np.zeros((len(ds_test['time']), len(tmp2)))    
    
# for i in range(len(ds_test['time'])):
#     up_xy_test = ds_test['up'].isel(time=i, z=khub)
#     up_k_test = np.fft.rfftn(up_xy_test.values, axes=(0,))
#     up_k_bar_test = np.mean(np.abs(up_k_test)**2, axis=1)
    
#     all_spectra[i,:] = up_k_bar_test.copy()
# np.save('spectra_test.npy', all_spectra)

### Calculate continuity

In [None]:
### Calculate histograms that assess continuity
## Helper variables
# Simulation parameters
dx, dy, dz = 15, 15, 15
facx, facy, facz = 1/(4*dx), 1/(4*dy), 1/(4*dz)

# Histogram parameters
dmin = -0.02
dmax = 0.02
dbins = np.linspace(dmin, dmax, 500)
all_dudx_hist = np.zeros((len(ds_test['time']), len(dbins)))
all_dvdy_hist = np.zeros((len(ds_test['time']), len(dbins)))
all_dwdz_hist = np.zeros((len(ds_test['time']), len(dbins)))
all_div_hist = np.zeros((len(ds_test['time']), len(dbins)))

## Iterate over timesteps
for t in range(len(ds_test['time'])):
    if t % 250==0: print(t, '...')
    vel = ds_test.isel(time=t)

    # Calculate gradients and divergence
    dudx = facx*(-vel['u'].roll(x=-1,y=-1,z=-1,roll_coords=False).values + vel['u'].roll(x=0,y=-1,z=-1,roll_coords=False).values \
                 -vel['u'].roll(x=-1,y=0 ,z=-1,roll_coords=False).values + vel['u'].roll(x=0,y=0 ,z=-1,roll_coords=False).values \
                 -vel['u'].roll(x=-1,y=-1,z=0 ,roll_coords=False).values + vel['u'].roll(x=0,y=-1,z=0 ,roll_coords=False).values \
                 -vel['u'].roll(x=-1,y=0 ,z=0 ,roll_coords=False).values + vel['u'].roll(x=0,y=0 ,z=0 ,roll_coords=False).values)
    dvdy = facy*(-vel['v'].roll(x=-1,y=-1,z=-1,roll_coords=False).values - vel['v'].roll(x=0,y=-1,z=-1,roll_coords=False).values \
                 +vel['v'].roll(x=-1,y=0 ,z=-1,roll_coords=False).values + vel['v'].roll(x=0,y=0 ,z=-1,roll_coords=False).values \
                 -vel['v'].roll(x=-1,y=-1,z=0 ,roll_coords=False).values - vel['v'].roll(x=0,y=-1,z=0 ,roll_coords=False).values \
                 +vel['v'].roll(x=-1,y=0 ,z=0 ,roll_coords=False).values + vel['v'].roll(x=0,y=0 ,z=0 ,roll_coords=False).values)
    dwdz = facz*(-vel['w'].roll(x=-1,y=-1,z=-1,roll_coords=False).values - vel['w'].roll(x=0,y=-1,z=-1,roll_coords=False).values \
                 -vel['w'].roll(x=-1,y=0 ,z=-1,roll_coords=False).values - vel['w'].roll(x=0,y=0 ,z=-1,roll_coords=False).values \
                 +vel['w'].roll(x=-1,y=-1,z=0 ,roll_coords=False).values + vel['w'].roll(x=0,y=-1,z=0 ,roll_coords=False).values \
                 +vel['w'].roll(x=-1,y=0 ,z=0 ,roll_coords=False).values + vel['w'].roll(x=0,y=0 ,z=0 ,roll_coords=False).values)
    div = dudx + dvdy + dwdz

    # Calculate histograms
    dudxhist_np = np.histogram(dudx.flatten(), bins=dbins)
    dudxhist = scipy.stats.rv_histogram(dudxhist_np)

    dvdyhist_np = np.histogram(dvdy.flatten(), bins=dbins)
    dvdyhist = scipy.stats.rv_histogram(dvdyhist_np)

    dwdzhist_np = np.histogram(dwdz.flatten(), bins=dbins)
    dwdzhist = scipy.stats.rv_histogram(dwdzhist_np)

    divhist_np = np.histogram(div.flatten(), bins=dbins)
    divhist = scipy.stats.rv_histogram(divhist_np)

    # Store histograms
    all_dudx_hist[t,:] = dudxhist.pdf(dbins)
    all_dvdy_hist[t,:] = dvdyhist.pdf(dbins)
    all_dwdz_hist[t,:] = dwdzhist.pdf(dbins)
    all_div_hist[t,:] = divhist.pdf(dbins)

# Save histograms
np.save('hist_dudx_test.npy', all_dudx_hist.mean(axis=0))
np.save('hist_dvdy_test.npy', all_dvdy_hist.mean(axis=0))
np.save('hist_dwdz_test.npy', all_dwdz_hist.mean(axis=0))
np.save('hist_div_test.npy', all_div_hist.mean(axis=0))