In [1]:
import sys
import os
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from matplotlib import rcParams, cycler
import numpy as np
import xarray as xr
import geopandas as gpd
import rioxarray
from eofs.xarray import Eof
from xeofs.xarray import EOF
import gc
import collections
import statsmodels.api as sm
import scipy
from sklearn.metrics import mean_squared_error
from math import sqrt
from itertools import product
import pyproj

In [193]:
# Define project repo path
inDirName = '/Users/smurugan9/research/aislens/aislens_emulation/'

# DATA FILE PATHS

# Data containing regridded flux and SSH for 150 years
regriddedFluxSSH_filepath = 'data/interim/RegriddedFluxSSH.nc'

# File contains all defined ice shelves
iceShelvesShape_filepath = 'data/interim/iceShelves.geojson'

# Folder for output figures
figures_folderpath = 'reports/figures/' 

interim_data_folder = 'data/interim/'
rmse_results_folder = 'rmse_grid_220911/'
flux_dedrafted_data_path = 'dedrafted_flux_IMBIE/'
randomized_realizations_path = 'randomized_realizations/'
flux_dedrafted_iceshelves_data_path = 'iceshelves_dedrafted_flux/'
reconstructions_neofs_path = 'reconstructions_neofs/'

In [3]:
# Read geoJSON region feature file as GeoDataFrame
iceshelvesmask = gpd.read_file(inDirName + iceShelvesShape_filepath)
# Convert to south polar stereographic projection
icems = iceshelvesmask.to_crs({'init': 'epsg:3031'});
crs = ccrs.SouthPolarStereo();

In [4]:
flux_clean = xr.open_dataset(inDirName+interim_data_folder+'flux_clean')
flux_clean = flux_clean.timeMonthly_avg_landIceFreshwaterFlux

In [5]:
%%time
# Drop all NaN-valued grid points
flux_clean = flux_clean.dropna('time',how='all')
flux_clean = flux_clean.dropna('y',how='all')
flux_clean = flux_clean.dropna('x',how='all')
flux_clean = flux_clean.drop("month")

In [6]:
%%time
model = EOF(flux_clean)
model.solve()
xeofs_eofs = model.eofs()
xeofs_pcs = model.pcs()
xeofs_n_modes = model.n_modes

In [129]:
xeofs_eofs

In [7]:
%%time
##############################
# FOURIER PHASE RANDOMIZATION 
##############################

# Define number of random Fourier realizations
n_realizations = 1
t_length = xeofs_pcs.shape[0]

# xeofs_pcs[:,i] when using PCA outputs
new_fl = np.empty((n_realizations,xeofs_pcs.shape[0],xeofs_pcs.shape[1]))

# Time limits for plotting
t1 = 0
tf = int(t_length/2)

for i in range(n_realizations):
    for m in range(xeofs_n_modes):
        fl = xeofs_pcs[:,m] # fluxpcs[:,i] when using PCA outputs
        fl_fourier = np.fft.rfft(fl)
        random_phases = np.exp(np.random.uniform(0,2*np.pi,int(len(fl)/2+1))*1.0j)
        fl_fourier_new = fl_fourier*random_phases
        new_fl[i,:,m] = np.fft.irfft(fl_fourier_new)
        print('calculated ifft for realization i, mode: {}'.format(m))

In [198]:
np.save("new_fl_phase_randomized", new_fl)

In [8]:
# Reconstruct flux dataset using phase randomized PCs.
# This section is to be called iteratively for ensemble runs with multiple realizations.
# This method also takes 'modes' as a parameter - used to reconstruct dataset with different number of selected modes
def generate_data(mode,mode_skip):
    flux_reconstr = model.reconstruct_randomized_X(new_fl[0],slice(1,mode,mode_skip))
    #flux_reconstr = flux_reconstr.dropna('time',how='all')
    #flux_reconstr = flux_reconstr.dropna('y',how='all')
    #flux_reconstr = flux_reconstr.dropna('x',how='all')
    #flux_reconstr = flux_reconstr.drop("month")
    return flux_reconstr

In [9]:
def dropna(total_data):
    total_data = total_data.dropna('time',how='all')
    return clipped_data

In [10]:
def time_series(clipped_data):
    clipped_ts = clipped_data.sum(['y','x'])
    return clipped_ts

In [11]:
def psd_calc(time_series):
    if all(time_series.isnull()) == True:
        psd_arr = plt.psd(time_series)
    else:
        psd_arr = time_series
    return psd_arr

In [126]:
#modified to not return f - in calculation of RMSE, only Px required
def psd_calc_grid(data,y,x):
    f, Px = scipy.signal.welch(data[:,y,x])
    return Px

In [13]:
flux_clean_mean = flux_clean.mean('time')

In [195]:
mode_skip = 1
xeofs_modes = list(range(71,xeofs_n_modes+1))
yxcoords = np.argwhere(np.array(flux_clean_mean))
yxcoordsna = np.nonzero(np.array(flux_clean_mean))

In [None]:
%%time
f, Px = scipy.signal.welch(flux_clean[:,yx[0],yx[1]])
#plt.semilogy(f, Px)

In [None]:
%%time
orig_grid_psd = np.zeros((flux_clean.shape[1],flux_clean.shape[2],2,129))
for yx in yxcoords:
    print('calc. psd for [{}]'.format(yx))
    f, Px = psd_calc_grid(flux_clean,yx[0],yx[1])
    orig_grid_psd[yx[0],yx[1],0,:] = f
    orig_grid_psd[yx[0],yx[1],1,:] = Px
del f, Px
gc.collect()

In [None]:
np.save("orig_grid_psd",orig_grid_psd)

In [16]:
orig_grid_psd = np.load("orig_grid_psd.npy")

### Reconstructed Data with all EOFs

In [19]:
%%time
# Reconstruction with all EOFs
flux_reconstr = generate_data(xeofs_n_modes,mode_skip)

In [20]:
%%time
# PSD at every grid point for reconstructed data
rec_grid_psd = np.zeros((flux_clean.shape[1],flux_clean.shape[2],2,129))
for yx in yxcoords:
    f, Px = psd_calc_grid(flux_reconstr,yx[0],yx[1])
    rec_grid_psd[yx[0],yx[1],0,:] = f
    rec_grid_psd[yx[0],yx[1],1,:] = Px
del f, Px
gc.collect()

In [21]:
np.save("rec_ALLEOFS_grid_psd",rec_grid_psd)

In [22]:
rec_ALLEOFS_grid_psd = np.load("rec_ALLEOFS_grid_psd.npy")

In [25]:
orig_grid_psd.shape

### Calculate RMSE of PSDs across spatial domain

In [34]:
np.log10(np.nan_to_num(orig_grid_psd[yx[0],yx[1],1,:]))

In [39]:
%%time
# Clean data to replace NaNs and +/-Inf ??
rmse_grid = np.empty((flux_clean.shape[1],flux_clean.shape[2]))
for yx in yxcoords:
    print('calculating rmse: [{},{}]'.format(yx[0],yx[1]))
    rmse = sqrt(mean_squared_error(np.nan_to_num(rec_ALLEOFS_grid_psd[yx[0],yx[1],1,:]),
                                   np.nan_to_num(orig_grid_psd[yx[0],yx[1],1,:])))
    rmse_grid[yx[0],yx[1]] = rmse
    del rmse
gc.collect()

In [71]:
new_data_rec = rec_ALLEOFS_grid_psd[yx[0],yx[1],1,:][~np.isnan(rec_ALLEOFS_grid_psd[yx[0],yx[1],1,:])]

In [73]:
rec_ALLEOFS_grid_psd[yx[0],yx[1],1,:]

In [74]:
def remove_nans(data):
    new_data = data[~np.isnan(data)]
    return new_data

In [82]:
%%time
# sklearn.metrics has a mean_squared_error function with a squared kwarg (defaults to True). 
# Setting squared to False will return the RMSE.
rmse_grid = np.empty((flux_clean.shape[1],flux_clean.shape[2]))
for yx in yxcoords:
    print('calculating rmse: [{},{}]'.format(yx[0],yx[1]))
    if remove_nans(rec_ALLEOFS_grid_psd[yx[0],yx[1],1,:]).shape[0]>0:
        rmse = mean_squared_error(10*np.log10(remove_nans(rec_ALLEOFS_grid_psd[yx[0],yx[1],1,:])),
                              10*np.log10(remove_nans(orig_grid_psd[yx[0],yx[1],1,:])),squared=False)
        rmse_grid[yx[0],yx[1]] = rmse
        del rmse
gc.collect()

In [85]:
rmse_grid = xr.DataArray(rmse_grid,
                         coords=flux_clean_mean.coords,
                         dims = flux_clean_mean.dims,
                         attrs=flux_clean_mean.attrs)

In [122]:
rmse_grid.to_netcdf("rmse_grid_ALLEOFS.nc")

In [102]:
# Replace 0 values with NaN.
# NOTE: This step assumes there is no RMSE value that is equal to zero
rmse_grid = rmse_grid.where(rmse_grid>0)

In [123]:
plt.figure(figsize=(20,8))
ax = plt.axes(projection=ccrs.SouthPolarStereo())
ax.coastlines()
ax.gridlines()

colorbarmax = np.nanmax([rmse_grid])
colorbarmin = np.nanmin([rmse_grid])


rmse_grid.plot(cmap="YlGn",vmax = colorbarmax, vmin = colorbarmin, cbar_kwargs={"label": "RMSE"})
plt.title("RMSE when reconstructed data includes all EOFs")
plt.savefig("rmse_grid_ALLEOFS.png")

In [153]:
%%time
# sklearn.metrics has a mean_squared_error function with a squared kwarg (defaults to True). 
# Setting squared to False will return the RMSE.
rmse_grid_comparisons = np.empty((xeofs_eofs.shape[0],xeofs_eofs.shape[1],xeofs_eofs.shape[2]))

In [166]:
%%time
for mode in xeofs_modes:
    print("Generating reconstructed data for mode: {}".format(mode*mode_skip))
    flux_reconstr = generate_data(mode,mode_skip)
    print("Calculating PSD across grid")
    for yx in yxcoords:
        if remove_nans(flux_reconstr[:,yx[0],yx[1]]).shape[0]>0:
            print('calculating rmse: [{},{}]'.format(yx[0],yx[1]))
            #if remove_nans(rec_Px).shape[0]>0:
            rmse_grid_comparisons[yx[0],yx[1],mode] = rmse_calc(remove_nans(psd_calc_grid(flux_reconstr,yx[0],yx[1])),
                                                                remove_nans(orig_grid_psd[yx[0],yx[1],1,:]))

In [None]:
%%time
for mode in xeofs_modes:
    print("Generating reconstructed data for mode: {}".format(mode*mode_skip))
    flux_reconstr = generate_data(mode,mode_skip)
    print("PSD across grid")
    for yx in yxcoords:
        if remove_nans(flux_reconstr[:,yx[0],yx[1]]).shape[0]>0:
            print('calculating rmse: [{},{}]'.format(yx[0],yx[1]))
            rmse_grid_comparisons[yx[0],yx[1],mode] = rmse_calc(remove_nans(psd_calc_grid(flux_reconstr,yx[0],yx[1])),remove_nans(orig_grid_psd[yx[0],yx[1],1,:]))
    print("Saving RMSE grid value file for mode: {}".format(mode*mode_skip))
    xr.DataArray(rmse_grid_comparisons[:,:,mode*mode_skip],
                 coords=flux_clean_mean.coords,
                 dims = flux_clean_mean.dims,
                 attrs=flux_clean_mean.attrs).to_netcdf(inDirName+
                                                        interim_data_folder+
                                                        rmse_results_folder+
                                                        "rmse_grid_comparisons_EOF_{}.nc".format(mode*mode_skip))

In [150]:
def rmse_calc(rec_data,orig_data):
    rmse = mean_squared_error(10*np.log10(rec_data),10*np.log10(orig_data),squared=False)
    return rmse

In [167]:
# Convert Numpy array to xarray
rmse_grid_comparisons = xr.DataArray(rmse_grid_comparisons,
                                     coords=xeofs_eofs.coords,
                                     dims = xeofs_eofs.dims,
                                     attrs=xeofs_eofs.attrs)

In [169]:
rmse_grid_comparisons.to_netcdf("rmse_grid_comparisons.nc")

In [184]:
%%time
xr.DataArray(rmse_grid_comparisons[:,:,mode],
             coords=flux_clean_mean.coords,
             dims = flux_clean_mean.dims,
             attrs=flux_clean_mean.attrs).to_netcdf("rmse_grid_comparisons_EOF-mode-{}.nc".format(mode))

In [191]:
rmse_grid_comparisons[:,:,10].where(rmse_grid_comparisons[:,:,10]>0).plot()

In [70]:
%%time
# sklearn.metrics has a mean_squared_error function with a squared kwarg (defaults to True). 
# Setting squared to False will return the RMSE.
rmse_grid = np.empty((flux_clean.shape[1],flux_clean.shape[2]))
for yx in yxcoords:
    print('calculating rmse: [{},{}]'.format(yx[0],yx[1]))
    rmse = mean_squared_error(10*np.nan_to_num(np.log10(np.nan_to_num(rec_ALLEOFS_grid_psd[yx[0],yx[1],1,:]))),
                              10*np.nan_to_num(np.log10(np.nan_to_num(orig_grid_psd[yx[0],yx[1],1,:]))),squared=False)
    rmse_grid[yx[0],yx[1]] = rmse
    del rmse
gc.collect()

In [46]:
rmse_grid = xr.DataArray(rmse_grid,
                         coords=flux_clean_mean.coords,
                         dims = flux_clean_mean.dims,
                         attrs=flux_clean_mean.attrs)

In [52]:
colorbarmax = np.nanmax([rmse_grid])
colorbarmin = np.nanmin([rmse_grid])
rmse_grid.plot(vmax = colorbarmax, vmin = colorbarmin)

In [None]:
rec_grid_psd = np.zeros((flux_clean.shape[0],flux_clean.shape[1],flux_clean.shape[2],2,129))

In [None]:
np.save("rec_grid_psd",rec_grid_psd)

In [None]:
%%time
rec_grid_psd = np.empty((flux_clean.shape[0],flux_clean.shape[1],flux_clean.shape[2],2,129))
for mode in xeofs_modes:
    print("Generating reconstructed data for mode: {}".format(mode*mode_skip))
    flux_reconstr = generate_data(mode,mode_skip)
    print("Calculating PSD across grid")
    for yx in yxcoords:
        f, Px = psd_calc_grid(flux_reconstr,yx[0],yx[1])
        rec_grid_psd[mode,yx[0],yx[1],0,:] = f
        rec_grid_psd[mode,yx[0],yx[1],1,:] = Px
        del f, Px
        gc.collect()
    np.save("rec_grid_psd",rec_grid_psd)

In [None]:
def grid_func(mode,y,x):
    print("Generating reconstructed data for mode: {}".format(mode*mode_skip))
    flux_reconstr = generate_data(mode)
    print("Calculating PSD & RMSE for {},{}".format(y,x))
    rec_f, rec_Px = psd_calc_grid(flux_reconstr[:,y,x])
    rmse = sqrt(mean_squared_error(10*np.log10(rec_Px),10*np.log10(orig_grid_psd[y,x,1,:])))
    return rec_f, rec_Px, rmse

In [None]:
rec_f_results, rec_Px_results, rmse_results  = list(map(grid_func, 
                                                        *zip(*product(xeofs_modes,yxcoordsna[0],yxcoordsna[1]))))

In [None]:
%%time
basin = basins[0]
n_reconstr = xeofs_n_modes
rmse_results = []
rec_results = np.empty((n_reconstr,orignp.shape[0],orignp.shape[1]))
for mode in range(1,n_reconstr+1):
    rmse, recc = basinplot_func_clean(basin, mode)
    rmse_results.append(rmse)
    rec_results[mode,:,:] = recc
    del recc
    gc.collect()

In [None]:
cmap = plt.cm.coolwarm
rcParams['axes.prop_cycle'] = cycler(color=cmap(np.linspace(0, 0.1, 1)))

In [None]:
plt.figure(figsize=(25,8))
for i in range(10, n_reconstr):
    plt.plot(rec_results[i,1,:], 10*np.log10(rec_results[i,0,:]), linewidth=0.3, color = cmap(i*10/n_reconstr))
plt.plot(orig[1], 10*np.log10(orig[0]), label='Original', linewidth=3, color='k')
plt.xlabel('Frequency')
plt.ylabel('PSD')
plt.title('Thwaites: PSD Comparison of Reconstructed Data')
plt.legend()

In [None]:
plt.figure(figsize=(25,8))
for i in range(2, n_reconstr):
    plt.plot(rec114[i,1,:], 10*np.log10(rec114[i,0,:]), linewidth=0.75, color = cmap(i*15/n_reconstr))
plt.plot(rec114[i,1,:], 10*np.log10(rec114[n_reconstr-1,0,:]), linewidth=0.75, color = cmap(i*15/n_reconstr))
plt.plot(orig[1], 10*np.log10(orig[0]), label='Original', linewidth=2.5, color='k')
plt.plot(rec114[np.argmin(rmse114),1,:], 10*np.log10(rec114[np.argmin(rmse114),0,:]), label='min RMSE PSD', linewidth=2.5, color='b')
plt.xlabel('Frequency')
plt.ylabel('PSD')
plt.title('Thwaites: PSD Comparison of Reconstructed Data')
plt.legend()