## spatial_PSD_data.ipynb

Reads the hourly LES output files and ancillary data files
* Calculates the power spectral density (PSD) as a function of wavelength (wavenumber), hour, and for the 3-D fields, model level.
* For each variable, the output has dimensions of time (hour), bottom_top (i.e., model level, if 3D) and wavelength.
* Writes out NetCDF files `fr2_PSD_##.nc4` where `##` is the lower boundary condition code `00` or `01`
    * 00   = heterogeneous BCs from HydroBlocks (HET)
    * 01   = homogeneous (HOM; domain mean applied at all grid cells) 


In [None]:
import xarray as xr
import numpy as np
import pandas as pd

from scipy.ndimage import gaussian_filter
from scipy import fft 
from scipy import signal as sg
from scipy import interpolate


import matplotlib.pyplot as plt
import matplotlib.colors as colors

import sys, warnings, glob
warnings.filterwarnings("default", category=RuntimeWarning)

In [None]:
ddir = "/Volumes/SSD_8TB/CLASP/LES_runs2/"          # Path to original LES output
pdir = "/Volumes/SSD_8TB/CLASP/LES_HCF/PBL_data/"   # Path to ancillar data produced by `ancillary_LES_data.ipynb`

dpre, ppre, bcs, suff = "fr2_", "pbl2c", "_01", ".nc4"
dpre, ppre, bcs, suff = "fr2_", "pbl2c", "_00", ".nc4"

d_list = sorted(glob.glob(f"{ddir}/{dpre}*{bcs}"))
p_list = sorted(glob.glob(f"{pdir}/{ppre}_*{bcs}.nc4"))

hours = ["13","14","15","16","17","18","19","20","21","22","23","00","01","02"] # Which hours to process and save

dom = [31,28,31,30,31,30,31,31,30,31,30,31]     # Days in each month


### Functions for assessing power spectra of 2-D fields

In [None]:
def power2DMean(field, N):
    """
    Returns a single power spectrum for a 2-D grid based on the variance in both dimensions. 
    Based upon: 
        https://dsp.stackexchange.com/questions/36902/calculate-1d-power-spectrum-from-2d-images
    I think it could be simplified further, but this works.
        
    Required inputs:
        field     = (2D or 3D array) 2-D grid(s) of data, assumed to have periodic boundaries
                    if 3D, first axis is assumed to be a dimension to iterate over
        N         = (integer) Number of wavenumbers, ostensibly half the grid dimension if grid is square
        
    Outputs:
        powerspec = (1D or 2D array) Power spectrum [V**2/Hz]
    """
    #global nT,t,k_rad,pspec,p_i,kE,kN,spec
    
    shift = np.fft.fftshift
    
    # Check for dimensional consistency
    ss = field.shape
    if len(ss) < 2:
        raise Exception("Function power2DMean: input field is not an array")
    if len(ss) > 3:
        raise Exception("Function power2DMean: input field has too many dimensions")
    
    # Sort out dimensions
    nE,nN = ss[-2:]  # Find size of array (assume any iterative dim is the first dim)
        
    # Get the radial k's
    kE = np.fft.fftfreq(nE)     
    kN = np.fft.fftfreq(nN)
    k = kN if kN.size < kE.size else kE
    k = k[k > 0]
    
    # Set up a radial k from center of 2-D spectrum
    k_rad = np.sqrt(kN[:, np.newaxis]**2 + kE[np.newaxis, :]**2)
    if len(ss) == 3:
        nT = ss[0]
        powerspec = np.zeros([nT,N-1])
        for t in range(nT):
            powerspec[t] = powerCalc(field[t], k, N, k_rad)     
    else:
        powerspec = powerCalc(field, k, N, k_rad)
    return powerspec,k

def powerCalc(field2d, k, N, k_rad):
    """
    Returns a single power spectrum for a 2-D grid based on the variance in both dimensions. 
    Based upon: 
        https://dsp.stackexchange.com/questions/36902/calculate-1d-power-spectrum-from-2d-images
    I think it could be simplified further, but this works.
        
    Required inputs:
        field2d   = (2D array) 2-D grid of data, assumed to have periodic boundaries
        k         = (1D array) the Discrete Fourier Transform sample frequencies
        N         = (integer) Number of wavenumbers, ostensibly half the grid dimension if grid is square
        k_rad     = the radial k from center of 2-D spectrum
        
    Outputs:
        powerspec = (1D array) Power spectrum [V**2/Hz]
    """
    shift = np.fft.fftshift
    spec = shift(np.fft.fft2(field2d-field2d.mean())) # 2-D spectrum from field
    pspec = np.abs(spec)**2          # Convert to power
    pspec[k_rad == 0.] = 0.          # Fix the middle

    # Interpolate the 2D grid power spectrum
    kE = shift(np.fft.fftfreq(spec.shape[1]))
    kN = shift(np.fft.fftfreq(spec.shape[0]))
    p_i = interpolate.RectBivariateSpline(kN, kE, pspec)
    theta = np.linspace(-np.pi, np.pi, N, False)
    power = np.empty_like(k)

    # Construct a single spectrum spanning the 2 dimensions
    for i in range(k.size):
        kE = np.sin(theta) * k[i]
        kN = np.cos(theta) * k[i]
        power[i] = np.median(p_i.ev(kN, kE) * 4 * np.pi)
        #power[i] = np.mean(p_i.ev(kN, kE) * 4 * np.pi)

    powerspec = np.abs(power) / pspec.size
    return powerspec

def lh_v(T):  # Empirical fit to L_v(T)
    return 1000*(2500.8-2.36*T+0.0016*T**2-0.00006*T**3)


## Calculation loop



In [None]:
vars_save = ["AVS_TKK1","AVS_QK1","AVS_LWP","AVS_CWP",
             "AVS_PBLZ","AVS_LCLZ","AVS_LFCZ",
             "AVS_LH","AVS_SH","AVS_EF","AVS_RNET","AVS_MEK1",
             "AVV_TH","AVV_QV","AVV_RHO",
             "AVV_THV","AVV_MSE"]   # This is the naming convention of the WRF LES output
vars_source = ["d","d","d","0",
               "p","p","p",
               "d","d","0","0","0",
               "d","d","d",
               "d","0"]             # d = from ddir files (LES output)
                                    # p = from pdir files (ancillary)
                                    # 0 = new derived variables created here
tot_hours = 14

#########################################
for i,v in enumerate(vars_save): # Loop through variables
    print(f"  \033[1m{v}\033[0m",end=": ")
        
    for c,case_d in enumerate(d_list):   #### Loop through cases (dates)
        case_p = p_list[c]                  # original data
        case = case_d.split(dpre)[1][:8]
        yyyy, mm, dd = case[:4], case[4:6], case[6:8]
        print(f"{case} ",end="")

        ##### Open files, extract variable
        if vars_source[i] == "p":
            p_data = xr.open_dataset(case_p).isel(Time=slice(0,tot_hours))
        else:
            filelist = []                       # Open multiple files to get original data for the case
            for h in hours:
                if h[0] == "0":
                    if int(dd) == dom[int(mm)-1]:
                        dd = "00"
                        mm = f"{(int(mm) % 12) + 1:02d}"
                    filelist.append(f"{case_d}/diag_d01_{yyyy}-{mm}-{(int(dd)+1):02d}_{h}:00:00{suff}")
                else:
                    filelist.append(f"{case_d}/diag_d01_{yyyy}-{mm}-{dd}_{h}:00:00{suff}")

            d_data = xr.open_mfdataset(filelist,combine="nested",concat_dim="Time",decode_times=False)
            d_data = d_data.assign_coords({"Time":
                                           [pd.to_datetime(s.decode("utf-8"),format='%Y-%m-%d_%H:%M:%S') for s in d_data['Times'].values]
                                          }).isel(Time=slice(0,tot_hours))         # Add time coord to LES output
        if vars_source[i] == "0":
            # Need to create a few derived variables
            if v == "AVS_RNET":
                AVS_RNET = d_data['AVS_SWDNB']-d_data['AVS_SWUPB']+d_data['AVS_LWDNB']-d_data['AVS_LWUPB']
                AVS_RNET.attrs = d_data['AVS_SWDNB'].attrs
                AVS_RNET.attrs['description'] = 'net downward radiation at surface'
            if v == "AVS_EF":
                AVS_EF = d_data['AVS_LH']/(d_data['AVS_SH']+d_data['AVS_LH'])
                AVS_EF = AVS_EF.where(AVS_EF>0,0).where(AVS_EF<1,1)
                AVS_EF.attrs = d_data['AVS_SWDNB'].attrs
                AVS_EF.attrs['description'] = 'evaporative fraction'
                AVS_EF.attrs['units'] = "-"
            if v == "AVS_CWP":
                AVS_CWP = d_data['AVS_LWP']+d_data['AVS_IWP']
                AVS_CWP.attrs = d_data['AVS_LWP'].attrs
                AVS_CWP.attrs['description'] = 'cloud (liquid+ice) water path'
            if v == "AVS_MEK1":
                AVS_MEK1 = 1005*d_data['AVS_TKK1']+lh_v(d_data['AVS_TKK1'])*d_data['AVS_QK1']
                AVS_MEK1.attrs = d_data['AVS_TKK1'].attrs
                AVS_MEK1.attrs['units'] = 'J kg-1'
                AVS_MEK1.attrs['description'] = 'moist enthalpy at k=1'
            if v == "AVV_MSE":
                AVV_MSE = (1005*d_data['AVV_TH']+lh_v(d_data['AVV_TH'])*d_data['AVV_QV']+9.81*d_data['AVV_Z']).load()
                AVV_MSE.attrs = d_data['AVV_TH'].attrs
                AVV_MSE.attrs['units'] = 'J kg-1'
                AVV_MSE.attrs['description'] = 'moist static energy'

            exec(f"var = {v}")
        else:
            exec(f"var = {vars_source[i]}_data['{v}'].load()")

        ##### Calculate spatial power spectra, save to new xarray      
        # Make an empty DataArray for power spectrum density
        if c==0:
            if v[:3] == "AVV":
                dummy,k = power2DMean(xr.zeros_like(var[0,0]), N=int(var.shape[2]/2))
                wavelength = 1/k
                varname = f"PSD_{v.split('_')[1]}"
                xpsd = xr.DataArray(np.zeros([tot_hours*len(d_list),var.shape[1],int(var.shape[2]/2)-1]),
                    dims=["time","bottom_top","wavelength"],
                    coords=dict(
                    wavelength=wavelength),
                    name=varname,
                    attrs=dict(units=f"({var.units})**2/Hz",
                               description=f"power spectral density for {var.description}"))
            else:
                dummy,k = power2DMean(xr.zeros_like(var[0]), N=int(var.shape[2]/2))
                wavelength = 1/k
                varname = f"PSD_{v.split('_')[1]}"
                xpsd = xr.DataArray(np.zeros([tot_hours*len(d_list),int(var.shape[2]/2)-1]),
                    dims=["time","wavelength"],
                    coords=dict(
                    wavelength=wavelength),
                    name=varname,
                    attrs=dict(units=f"({var.units})**2/Hz",
                               description=f"power spectral density for {var.description}"))
            x_time = np.tile(var['Time'].values,len(d_list))
        #var = var[:,0]
        for h in range(tot_hours): # Loop through hours
            print(f"\b{h+65:c}",end="")
            xpsd[c*tot_hours+h],k = power2DMean(var[h], N=int(var.shape[2]/2))
        x_time[c*tot_hours:(c+1)*tot_hours] = var['Time'].copy()
        print(f"\b ",end="")
        #sys.exit(0)
    xpsd = xpsd.assign_coords(dict(time=x_time))  
    
    # Make a dataset from data arrays
    if i == 0:
        ds = xpsd.to_dataset() 
    else:
        ds[varname] = xpsd

#    ds[varname] = xpsd
    sys.exit(0)
    print(" ")
    
# For deflated NetCDF4 output
deflate = dict(zlib=True, complevel=1)
encoding = {var: deflate for var in ds.data_vars}

ds.to_netcdf(f"{ddir}fr2_PSD{bcs}.nc4",engine="netcdf4",format="netCDF4",encoding=encoding)

print("***DONE***")