## statistics_PSD_data.ipynb

Reads the hourly PSD data from fr2_PSD_##.nc4
* Calculates a number of statistics as a function of hour and wavelength:
    * The exp(mean(log)) across cases of PSD for variables in each HET and HOM sets
    * exp(mean(log(HET/HOM))) across cases of PSD for variables
    * Log variance across cases of PSD for variables in each HET and HOM sets
    * Correlation (HOM vs HET) of log(PSD) for specific variables across cases
    * The relative entropy (HOM vs HET) across cases of PSD for variables
* Writes out fr2_stats2_PSD.nc4


In [None]:
import xarray as xr
import numpy as np
import pandas as pd

from scipy.stats import entropy, spearmanr

import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib as mpl

import sys, warnings, glob
warnings.filterwarnings("ignore", category=RuntimeWarning)

## The function below performs smoothing on power spectra in frequency space

In [None]:
##############################################################
### Function to smooth out ripples in data in frequency space
def freq_smooth2(freq,psd,winfac=1):
    """
    Required inputs:
    freq   (float 1D array) = frequencies (ascending) [Hz]
    psd    (float 1D or 2D array) = corresponding power spectral density [v**2/Hz]
    
    Optional inputs:
    winfac          (float) = scaling factor for window size as function of frequency
    
    Output:
    smooth (float 1D or 2D array) = Smoothed version of psd
    """
    
    smooth = psd.copy()
    if freq.ndim != 1:
        raise IndexError("Frequencies must be in a 1D array")
    win = np.rint(np.exp(np.sqrt(freq)*winfac))*2 - 1 # Window width
    
    if len(psd.shape) == 1:  # 1D case - easy:  
        if freq.shape != psd.shape:
            raise IndexError("Required input series are not the same length")
        for n in range(len(freq)):
            t0 = int((win[n]-1)/2)
            t1 = np.min((int((win[n]-1)/2)+1,len(freq)))
            smooth[n] = psd[n-t0:n+t1].mean()
            
    elif len(psd.shape) == 2:  # 2D case - harder:
        if q_lmean.wavelength.size not in psd.shape:
            raise IndexError("No PSD array dimension matches frequency series length")
        idx = psd.shape.index(freq.size) # This is the matching dimension for frequencies
        for n in range(len(freq)):
            t0 = int((win[n]-1)/2)
            t1 = np.min((int((win[n]-1)/2)+1,len(freq)))
            for j in range(psd.shape[1-idx]):
                if idx == 1:
                    smooth[:,n] = psd[:,n-t0:n+t1].mean(axis=idx)   
                else:
                    smooth[n,:] = psd[n-t0:n+t1,:].mean(axis=idx)   
        
    else:
        raise IndexError("Required PSD array cannot exceed 2D")
        
    return smooth

### Some large arrays and strings are set below to be used to annotate the dataset produced 

In [None]:
# Approximate pressures [Pa] corresponding to model levels (just a time average from one of the cases)
p_levs = np.array([97741.44 , 97398.18 , 97057.56 , 96719.125, 96382.766, 96048.21 ,
       95715.01 , 95382.93 , 95051.836, 94721.61 , 94392.29 , 94063.93 ,
       93736.61 , 93410.234, 93084.7  , 92760.07 , 92436.27 , 92113.336,
       91791.305, 91470.1  , 91149.69 , 90830.11 , 90511.375, 90193.49 ,
       89876.516, 89560.45 , 89245.33 , 88931.15 , 88617.95 , 88305.68 ,
       87994.25 , 87683.74 , 87374.19 , 87065.664, 86758.15 , 86451.63 ,
       86146.15 , 85841.67 , 85538.15 , 85235.555, 84933.87 , 84633.02 ,
       84332.984, 84033.75 , 83735.375, 83437.89 , 83141.336, 82845.664,
       82550.92 , 82257.12 , 81964.19 , 81672.19 , 81381.08 , 81090.89 ,
       80801.55 , 80513.   , 80225.305, 79938.43 , 79652.38 , 79367.18 ,
       79082.86 , 78799.44 , 78516.85 , 78235.08 , 77954.195, 77674.195,
       77395.02 , 77116.67 , 76839.12 , 76562.3  , 76286.25 , 76010.984,
       75736.52 , 75462.9  , 75190.05 , 74917.96 , 74646.664, 74376.14 ,
       74106.49 , 73837.71 , 73569.74 , 73302.67 , 73036.484, 72771.305,
       72507.17 , 72244.055, 71981.96 , 71720.85 , 71460.63 , 71201.58 ,
       70943.81 , 70687.1  , 70431.24 , 70176.24 , 69922.11 , 69669.07 ,
       69417.2  , 69166.414, 68916.56 , 68667.766, 68419.945, 68172.92 ,
       67926.734, 67681.36 , 67436.77 , 67192.9  , 66949.695, 66707.21 ,
       66465.414, 66224.28 , 65983.88 , 65744.23 , 65505.37 , 65267.23 ,
       65029.78 , 64793.008, 64556.918, 64321.508, 64086.785, 63852.684,
       63619.258, 63386.53 , 63154.508, 62923.2  , 62692.56 , 62462.605,
       62233.395, 62004.88 , 61777.07 , 61549.95 , 61323.496, 61097.684,
       60872.566, 60648.1  , 60424.316, 60201.258, 59978.914, 59757.258,
       59536.215, 59315.805, 59095.945, 58876.695, 58658.   , 58439.887,
       58222.355, 58005.477, 57789.383, 57574.125, 57359.594, 57145.78 ,
       56932.754, 56720.55 , 56509.07 , 56298.285, 56088.215, 55878.82 ,
       55670.117, 55462.133, 55254.883, 55048.38 , 54842.582, 54637.402,
       54432.78 , 54228.758, 54025.383, 53822.62 , 53620.414, 53413.402,
       53195.96 , 52967.26 , 52727.133, 52475.32 , 52210.676, 51933.094,
       51642.125, 51336.582, 51016.254, 50680.44 , 50328.12 , 49958.906,
       49571.812, 49166.34 , 48741.938, 48297.59 , 47833.152, 47347.863,
       46840.688, 46310.96 , 45757.793, 45180.402, 44578.145, 43949.965,
       43294.848, 42612.16 , 41901.027, 41161.133, 40391.66 , 39591.812,
       38761.625, 37902.418, 37015.66 , 36099.28 , 35151.277, 34172.605,
       33164.355, 32126.564, 31060.488, 29966.203, 28843.734, 27694.73 ,
       26527.67 , 25374.379, 24258.316, 23178.521, 22135.934, 21129.643,
       20157.207, 19218.822, 18314.855, 17444.225, 16605.197, 15797.964,
       15021.669, 14274.116, 13556.278, 13073.933])
# Approximate heights [m AGL] corresponding to model levels (just a time average from one of the cases)
z_levs = np.array([   15.697305,    47.003498,    78.152885,   109.18822 ,
         140.12819 ,   170.99742 ,   201.83646 ,   232.66306 ,
         263.48798 ,   294.31656 ,   325.14484 ,   355.9643  ,
         386.77017 ,   417.56982 ,   448.3696  ,   479.1682  ,
         509.96713 ,   540.7652  ,   571.5618  ,   602.35944 ,
         633.1591  ,   663.9615  ,   694.76434 ,   725.56433 ,
         756.3584  ,   787.1455  ,   817.9245  ,   848.6918  ,
         879.4479  ,   910.1955  ,   940.9394  ,   971.6786  ,
        1002.4047  ,  1033.1143  ,  1063.8088  ,  1094.4874  ,
        1125.1477  ,  1155.7919  ,  1186.4244  ,  1217.0476  ,
        1247.6613  ,  1278.2714  ,  1308.8835  ,  1339.496   ,
        1370.1034  ,  1400.703   ,  1431.293   ,  1461.8727  ,
        1492.4412  ,  1522.9967  ,  1553.543   ,  1584.0798  ,
        1614.6058  ,  1645.1223  ,  1675.6332  ,  1706.143   ,
        1736.652   ,  1767.1573  ,  1797.6582  ,  1828.1549  ,
        1858.6423  ,  1889.1199  ,  1919.5938  ,  1950.0632  ,
        1980.5247  ,  2010.9766  ,  2041.4221  ,  2071.8645  ,
        2102.3044  ,  2132.7468  ,  2163.1902  ,  2193.6335  ,
        2224.0728  ,  2254.5073  ,  2284.9392  ,  2315.3723  ,
        2345.806   ,  2376.2366  ,  2406.659   ,  2437.0747  ,
        2467.486   ,  2497.892   ,  2528.2888  ,  2558.6653  ,
        2589.0173  ,  2619.3489  ,  2649.6565  ,  2679.9517  ,
        2710.24    ,  2740.493   ,  2770.6968  ,  2800.8716  ,
        2831.0376  ,  2861.2036  ,  2891.366   ,  2921.5012  ,
        2951.5933  ,  2981.662   ,  3011.7202  ,  3041.7522  ,
        3071.766   ,  3101.7798  ,  3131.7915  ,  3161.7935  ,
        3191.7917  ,  3221.795   ,  3251.802   ,  3281.812   ,
        3311.8276  ,  3341.8484  ,  3371.869   ,  3401.884   ,
        3431.8918  ,  3461.895   ,  3491.8982  ,  3521.9026  ,
        3551.9094  ,  3581.9119  ,  3611.916   ,  3641.9243  ,
        3671.9336  ,  3701.9375  ,  3731.9363  ,  3761.933   ,
        3791.9285  ,  3821.9216  ,  3851.9084  ,  3881.8914  ,
        3911.8716  ,  3941.8499  ,  3971.8286  ,  4001.8088  ,
        4031.7869  ,  4061.764   ,  4091.7395  ,  4121.7046  ,
        4151.6597  ,  4181.609   ,  4211.556   ,  4241.5034  ,
        4271.4565  ,  4301.4126  ,  4331.3726  ,  4361.3364  ,
        4391.305   ,  4421.273   ,  4451.221   ,  4481.1436  ,
        4511.056   ,  4540.9644  ,  4570.859   ,  4600.734   ,
        4630.601   ,  4660.466   ,  4690.3296  ,  4720.19    ,
        4750.0503  ,  4779.906   ,  4809.75    ,  4839.5845  ,
        4869.413   ,  4899.247   ,  4929.0938  ,  4958.9478  ,
        4988.8003  ,  5018.6523  ,  5048.5146  ,  5079.1836  ,
        5111.4976  ,  5145.5986  ,  5181.529   ,  5219.3506  ,
        5259.2583  ,  5301.2915  ,  5345.5483  ,  5392.237   ,
        5441.4194  ,  5493.2476  ,  5547.9185  ,  5605.5356  ,
        5666.307   ,  5730.3726  ,  5797.89    ,  5869.096   ,
        5944.101   ,  6023.112   ,  6106.4023  ,  6194.1836  ,
        6286.7466  ,  6384.3604  ,  6487.284   ,  6595.8438  ,
        6710.4106  ,  6831.275   ,  6958.7915  ,  7093.227   ,
        7234.981   ,  7384.4844  ,  7542.1406  ,  7708.2354  ,
        7882.8984  ,  8066.8276  ,  8260.93    ,  8465.5625  ,
        8681.12    ,  8908.353   ,  9147.682   ,  9399.853   ,
        9665.779   ,  9946.119   , 10239.759   , 10539.291   ,
       10838.875   , 11138.76    , 11438.572   , 11738.272   ,
       12038.477   , 12339.073   , 12639.703   , 12940.302   ,
       13241.187   , 13542.021   , 13842.673   , 14143.909   ,
       14445.741   , 14724.665   ])
# Long description strings for the dataset:
d_string = """
Data are from LES simulations with heterogeneous (HET) and homogeneous (HOM) surface states and fluxes 
   • Lower boundary prescribed from HydroBlocks offline simulations
   • PSD for variables here are calulated hourly in horizontal space (directionless) across LES domain
     • 92 case days, each with 14 hours
     • 259 wavenumbers (domain 520x520)
   • For 3D variables, vertical coordinates are approximate, to align with model levels - makes plots easier to interpret
     
All statistics are calcualted each hour as a function of wavelength: 
   • No data at t=0 (hour 12UTC) as the ICs are the same
   • Data for 14 hours (13 to 03UTC) are saved
   • Across wavelengths; results can be plotted in spectral space
   • 3D fields also have a vertical coordinate to their statistics
"""
s_string = """
Means are calculated as the exponent of the mean of the logs of power spectral density (PSD) across all 92 cases in HET and HOM.
   • That is: exp(mean(log(PSD)))
   • These are used to normalize the case values to produce a sort of anomaly
     • Normalized values (0,1) have less power than "mean"
     • Normalized values >1 have more power than "mean"
     
Similar to the log-mean, a "log-variance" is calculated across all 92 cases for HET and HOM:
   • Equals the mean of the normalized PSDs for each case 
   • No variance from case to case would result in a value of 1
   • More case-to-case variance leads to values ever greater than 1
   
An exp-mean-log-ratio between HET and HOM across cases:
   • That is: exp(mean(log(ratio))) applied to non-normalized PSDs
   • Works like RMS difference, but in log space
   • HET is in the numerator, so expect values >1 mainly - i.e., mismatches driven by surface heterogeneity leads to bigger numbers

Pearson's correlation between HET and HOM across cases (applied to the log of PSDs):
   • High correlations suggest the area-mean surface ICs or synoptic atmospheric situation controls PSD
   • Low correlation suggests the surfae heterogeneity affects the power spectrum

Relative entropy (RE) is calculated between HET (baseline, q) and HOM (p) cases.
   • PSDs are normalized by exp(mean(log(PSD))) to put all wavenumbers on a more equal footing
   • 8 bins for PSD, each spans a power of 10 
     • count number of cases in each bin
     • 1 is added to each bin to avoid div0 when any q bins are empty
     • Thus, reads like 100 (92 + 8) count across all bins
     • Then converted to a probability distribution (Σ=1.0)
   • RE is calculated as function of wavelength, (and level for 3D fields)
"""

In [None]:
#################################
# Open files

ddir = "/Volumes/SSD_8TB/CLASP/LES_runs2/"  # Part to the output from `spatial_PSD_data.ipynb` 
cases = ['_00','_01']

# Open files
het = xr.open_dataset(f"{ddir}fr2_PSD{cases[0]}.nc4")
hom = xr.open_dataset(f"{ddir}fr2_PSD{cases[1]}.nc4")

het

## Calculate statistics between HOM and HET fields and write to file
Includes:
* Variance of power, and log(power), across cases for each config
* EMDL (exponent mean difference of logs, an application of log power averaging) between configs
* Spearman ranked correlation between configs
* Relative entropy 

In [None]:
######################################################
# Create xarrays of statistics with these dimensions:
#   • wavelength
#   • [level]
#   • hour 
#
# RE is between HET (baseline, q) and HOM (p) cases
#   • Applied to power spectral density in horizontal space (directionless) across LES domain
#     • 92 case days, each with 14 hours
#     • 259 wavenumbers (domain 520x520)
#   • PSD each hour is normalized by exp(mean(log(PSDs))) averaged across all cases
#     to put all wavenumbers on a more equal footing
#   • 8 bins for power, each spans factor of 10 
#     • count number of cases in each bin
#     • 1 added to each bin to avoid div0 when q bins are empty
#     • Thus, reads like 100 count across all bins
#     • Then converted to a probability distribution (Σ=1.0)
#   • RE calculated as function of wavelength, (and level for 3D fields)
#     • smoothed across frequency space to remove noise from smal sample

winfac = 4.5 # factor for smoothing spectra
bins = [1e-20,1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e20] # for a normalized distribution
bin_names = ["<1e-3","1e-3:1e-2","1e-2:1e-1","1e-1:1e0","1e0:1e1","1e1:1e2","1e2:1e3",">1e3"] 

first = True
###>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
for v in het.data_vars:  # Loop thru variables    
    print(f"\033[1m{v}\033[0m",end=": ")

    sam_p,sam_q = hom[v],het[v] # Choose the variables

    p_lmean = np.exp(np.log(sam_p).groupby('time.hour').mean()) # exp of mean of logs
    p_lmean = xr.concat([p_lmean[3:],p_lmean[:3]],dim='hour')   # In proper order by hour
    q_lmean = np.exp(np.log(sam_q).groupby('time.hour').mean()) # exp of mean of logs, use for normalization
    q_lmean = xr.concat([q_lmean[3:],q_lmean[:3]],dim='hour')   # In order again
    hours = list(q_lmean.hour.values)

    ####### Make some empty data arrays for stats:
    # Relative entropy
    rex_name = f"re_{q_lmean.name}"
    rex = xr.zeros_like(q_lmean).rename(
          rex_name).assign_coords(
          hour = [f"{h:02}" for h in hours]).assign_attrs(
          {'units':"-",'description':f"relative entropy (HOM vs HET) of {q_lmean.attrs['description']}"})
    rex['wavelength'].attrs = {"units": "grid cells"}
    rex['hour'].attrs = {"time reference": "UTC"}
    # exp-mean-log of the ratio HET/HOM
    emlr_name = f"emlr_{q_lmean.name}"
    emlr = xr.zeros_like(q_lmean).rename(
          emlr_name).assign_coords(
          hour = [f"{h:02}" for h in hours]).assign_attrs(
          {'units':"-",'description':f"exp(mean(log(HET/HOM))) of {q_lmean.attrs['description']}"})
    emlr['wavelength'].attrs = {"units": "grid cells"}
    emlr['hour'].attrs = {"time reference": "UTC"}
    # Correlation (Pearson)
    corr_name = f"corr_{q_lmean.name}"
    corr = xr.zeros_like(q_lmean).rename(
          corr_name).assign_coords(
          hour = [f"{h:02}" for h in hours]).assign_attrs(
          {'units':"-",'description':f"correlation (HOM vs HET) of {q_lmean.attrs['description']}"})
    corr['wavelength'].attrs = {"units": "grid cells"}
    corr['hour'].attrs = {"time reference": "UTC"}
    # Logs of variances across cases
    lv_hom_name = f"lv_hom_{q_lmean.name}"
    lv_hom = xr.zeros_like(q_lmean).rename(
          lv_hom_name).assign_coords(
          hour = [f"{h:02}" for h in hours]).assign_attrs(
          {'units':"-",'description':f"Log variance across HOM cases of {q_lmean.attrs['description']}"})
    lv_hom['wavelength'].attrs = {"units": "grid cells"}
    lv_hom['hour'].attrs = {"time reference": "UTC"}
    lv_het_name = f"lv_het_{q_lmean.name}"
    lv_het = xr.zeros_like(q_lmean).rename(
          lv_het_name).assign_coords(
          hour = [f"{h:02}" for h in hours]).assign_attrs(
          {'units':"-",'description':f"Log variance across HET cases of {q_lmean.attrs['description']}"})
    lv_het['wavelength'].attrs = {"units": "grid cells"}
    lv_het['hour'].attrs = {"time reference": "UTC"}
    
    ########################################
    # Means (exp(mean(log(X)))) across cases
    eml_hom_name = f"eml_hom_{q_lmean.name}"
    eml_hom = p_lmean.rename(
          eml_hom_name).assign_coords(
          hour = [f"{h:02}" for h in hours]).assign_attrs(
          {'description':f"exp-mean-log across HOM cases of {q_lmean.attrs['description']}"})
    eml_hom['wavelength'].attrs = {"units": "grid cells"}
    eml_hom['hour'].attrs = {"time reference": "UTC"}
    eml_het_name = f"eml_het_{q_lmean.name}"
    eml_het = q_lmean.rename(
          eml_het_name).assign_coords(
          hour = [f"{h:02}" for h in hours]).assign_attrs(
          {'description':f"exp-mean-log across HET cases of {q_lmean.attrs['description']}"})
    eml_het['wavelength'].attrs = {"units": "grid cells"}
    eml_het['hour'].attrs = {"time reference": "UTC"}
    
    if len(sam_p.shape) == 3:  # Add plottable (if approximate) vertical coordinates
        rex = rex.assign_coords({"p_levs":('bottom_top',p_levs), "z_levs":('bottom_top',z_levs)})
        emlr = emlr.assign_coords({"p_levs":('bottom_top',p_levs), "z_levs":('bottom_top',z_levs)})
        corr = corr.assign_coords({"p_levs":('bottom_top',p_levs), "z_levs":('bottom_top',z_levs)})
        lv_hom = lv_hom.assign_coords({"p_levs":('bottom_top',p_levs), "z_levs":('bottom_top',z_levs)})
        lv_het = lv_het.assign_coords({"p_levs":('bottom_top',p_levs), "z_levs":('bottom_top',z_levs)})
        eml_hom = eml_hom.assign_coords({"p_levs":('bottom_top',p_levs), "z_levs":('bottom_top',z_levs)})
        eml_het = eml_het.assign_coords({"p_levs":('bottom_top',p_levs), "z_levs":('bottom_top',z_levs)})
        rex['p_levs'].attrs = {"units": "Pa (approx.)"}
        emlr['p_levs'].attrs = {"units": "Pa (approx.)"}
        corr['p_levs'].attrs = {"units": "Pa (approx.)"}
        lv_hom['p_levs'].attrs = {"units": "Pa (approx.)"}
        lv_het['p_levs'].attrs = {"units": "Pa (approx.)"}
        eml_hom['p_levs'].attrs = {"units": "Pa (approx.)"}
        eml_het['p_levs'].attrs = {"units": "Pa (approx.)"}
        rex['z_levs'].attrs = {"units": "m AGL (approx.)"}
        emlr['z_levs'].attrs = {"units": "m AGL (approx.)"}
        corr['z_levs'].attrs = {"units": "m AGL (approx.)"}
        lv_hom['z_levs'].attrs = {"units": "m AGL (approx.)"}
        lv_het['z_levs'].attrs = {"units": "m AGL (approx.)"}
        eml_hom['z_levs'].attrs = {"units": "m AGL (approx.)"}
        eml_het['z_levs'].attrs = {"units": "m AGL (approx.)"}

    # Loop through hours to calculate stats across cases - reassemple into xarrays
    for h in hours:
        print(h,end=" ")

        # Extract data just for this hour
        q_ens = sam_q.where(sam_q.time['time.hour']==h).dropna(dim='time')
        p_ens = sam_p.where(sam_p.time['time.hour']==h).dropna(dim='time')
        
        # The log-variances are straightforward
        lv_het.loc[dict(hour=f"{h:02}")] = freq_smooth2(1/q_lmean.wavelength,(q_ens/q_lmean.sel(hour=h)).var(dim='time'),winfac=winfac)
        lv_hom.loc[dict(hour=f"{h:02}")] = freq_smooth2(1/q_lmean.wavelength,(p_ens/p_lmean.sel(hour=h)).var(dim='time'),winfac=winfac)
        
        # EMLR
        emlr_spec = np.exp(np.log(q_ens/p_ens).mean(dim='time')) 
        emlr.loc[dict(hour=f"{h:02}")] = freq_smooth2(1/q_lmean.wavelength,emlr_spec,winfac=winfac)

        # Correlation (Spearman ranked)
        #corr_spec = np.spearmanr(q_ens,p_ens,dim='time')
        # Correlation (Pearsons applied to the log of PSD)
        corr_spec = xr.corr(np.log(q_ens),np.log(p_ens),dim='time')
        corr.loc[dict(hour=f"{h:02}")] = freq_smooth2(1/q_lmean.wavelength,corr_spec,winfac=winfac)
        
        # For relative entropy, there are a few steps... 
        #   first normalize by log mean across dates - what does this accomplish?
        #   Puts all wavenumbers on a more equal footing
        #   Also appears to ameliorate somewhat the result's dependency on which is p vs q
        q_norm = q_ens/q_lmean.sel(hour=h)
        p_norm = p_ens/q_lmean.sel(hour=h)
        # Calculate the histograms across bins
        #   Verified that shapes are maintained when we add 1 in each bin to ensure no +inf:
        #   I think this is the way to go... any value would be arbitrary...
        if len(sam_p.shape) == 2:
            q_his1 = np.stack([np.histogram(q_norm[:,i],bins=bins)[0] for i in range(q_norm.shape[1])]) + 1
            p_his1 = np.stack([np.histogram(p_norm[:,i],bins=bins)[0] for i in range(p_norm.shape[1])]) + 1
        else: # Vertically varying fields
            q_his1 = np.stack([[np.histogram(q_norm[:,j,i],bins=bins)[0] for i in range(q_norm.shape[2])] for j in range(q_norm.shape[1])]) + 1
            p_his1 = np.stack([[np.histogram(p_norm[:,j,i],bins=bins)[0] for i in range(p_norm.shape[2])] for j in range(q_norm.shape[1])]) + 1 
        # Convert to a probability distribution (sum = 1)
        p_pd1 = p_his1/np.expand_dims(p_his1.sum(axis=-1),axis=-1)
        q_pd1 = q_his1/np.expand_dims(q_his1.sum(axis=-1),axis=-1)
        # Calculate relative entropy
        re_spec = entropy(p_pd1,qk=q_pd1,axis=-1)
        rex.loc[dict(hour=f"{h:02}")] = freq_smooth2(1/q_lmean.wavelength,re_spec,winfac=winfac)

    if first: # Merge the DataArrays into one Dataset
        #ds = rex.to_dataset().assign_attrs({'description':d_string})
        ds = xr.merge([eml_het,eml_hom,lv_het,lv_hom,emlr,corr,rex]).assign_attrs({'description':d_string})
        first = False
    else:
        ds[eml_het_name] = eml_het
        ds[eml_hom_name] = eml_hom
        ds[lv_het_name] = lv_het
        ds[lv_hom_name] = lv_hom
        ds[emlr_name] = emlr
        ds[corr_name] = corr
        ds[rex_name] = rex

    print(" ")
    #sys.exit(0)
    
# For deflated NetCDF4 output
deflate = dict(zlib=True, complevel=1)
encoding = {var: deflate for var in ds.data_vars}

ds.to_netcdf(f"{ddir}fr2_stats2_PSD.nc4",engine="netcdf4",format="netCDF4",encoding=encoding)

print("***DONE***")