Rotated eofs 
=====

Author: [Ray Bell](https://github.com/raybellwaves)

In [None]:
%matplotlib inline
from IPython.display import Image
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf
hv.notebook_extension()

In [None]:
# See pre_proc/README for notes on creating this file.
da = xr.open_dataarray('NCEP_z500_monthly_1950-2000_NH.nc')
print(da)

# 3-month rolling mean to create seasonal averages e.g. JFM, FMA, MAM, ...
# centre=True gives the time coordinate for the middle of the window
sm = da.rolling(time=3, center=True).mean().dropna('time')

In [None]:
# Choose a season of interest (0 = JFM, ..., 11 = DJF)
season_dim = ['JFM', 'FMA', 'MAM', 'AMJ', 'MJJ', 'JJA', 'JAS', 'ASO', 'SON', 'OND', 'NDJ', 'DJF']
season = 0

da_sea = sm.isel(time=slice(season, -1, 12))
# Make sure you've grabbed all seasons e.g. JFM should give you YYYY-02-DDYHH...
print(da_sea.coords['time'])

In [None]:
# Calculate anomalies
#da_sea_anom = da_sea - da_sea.mean(dim=('time'))
# Calculate standarzied anomalies
da_sea_anom = (da_sea - da_sea.mean(dim=('time'))) / (da_sea - da_sea.mean(dim=('time'))).std(dim=('time'))
print(da_sea_anom)

In [None]:
# Weight data as square root of cos(lat)
data = da_sea_anom.values
wgts = np.expand_dims(np.sqrt(np.cos(np.deg2rad(da_sea_anom.coords['lat'].values)).clip(0., 1.)), axis=1)
# Project the weights to 2d
weights = np.broadcast_arrays(data[0:1], wgts)[1][0] # The [1][0] indexes (2, 1, 71, 360) to (71, 360)
data_weighted = data * weights

In [None]:
# Reshape the data to be (time, space)
records = len(da_sea_anom.coords['time'])
originalshape = data_weighted.shape[1:]
channels = np.prod(originalshape)
data_weighted_flat = data_weighted.reshape([records, channels])
print(np.shape(data_weighted_flat))

In [None]:
# Compute the singular value decomposition (https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.svd.html)
# Principal component, eigenvalue, eof
A, Lh, E = np.linalg.svd(data_weighted_flat, full_matrices=False)
print(np.shape(A))
print(np.shape(Lh))
print(np.shape(E))

In [None]:
# Construct the eigenvalues and normalize by N-1
L = (Lh * Lh) / (float(records - 1))
print(np.max(L))

# Remove the scaling on the principal component time-series that is
# implicitily introduced by using SVD instead of eigen-decomposition.
# The PCs may be re-scaled later if required.
P = A * Lh
print(np.shape(P))

In [None]:
# Use 10 eofs in rotation
neofs = 10

eofs = E[0:neofs, :].copy()
print(np.min(eofs), np.max(eofs))

# Keep a copy without normalizing for plotting
eofs2d = eofs.reshape((neofs,) + originalshape)
# Put eofs in an xr.DataArray for easy of plotting
eofs2d_da = xr.DataArray(eofs2d, coords=[range(eofs2d.shape[0]), da_sea_anom.coords['lat'], da_sea_anom.coords['lon']],
                         dims=['mode', 'lat', 'lon'], name='eofs')

# Normalize by square root of eigenvalues
eofs = eofs / np.sqrt(L[0:neofs])[:, np.newaxis]
print(np.shape(eofs))
print(np.min(eofs), np.max(eofs))

pcs = P[0:neofs, :].copy()

In [None]:
# variance explained by each mode
sub_varexpl = (L[0:neofs] / L.sum()) * 100
print(sub_varexpl)
print(np.sum(sub_varexpl))

Have a look at all modes of variability using [geoviews](https://github.com/ioam/geoviews). Just move the slider.

In [None]:
%%opts Image [projection=ccrs.Orthographic(0, 90) colorbar=False fig_size=200] (cmap='RdBu_r') Overlay [xaxis=None yaxis=None]
dataset = gv.Dataset(eofs2d_da, kdims=['mode', 'lon', 'lat'])
dataset.to(gv.Image, ['lon', 'lat']) * gf.coastline()

In [None]:
print('top 4 eofs computed in NCL for season '+season_dim[season])
Image(filename = 'NCL/'+season_dim[season]+'_eof.png', width=1000, height=1000)
# Can't get NCL to work on the NCEP data

In [None]:
# Based on a matlab function...
# Very slow... a few mins...
# And doesn't work...

#tol = 1e-10
#it_max = 1000
#p, nc = np.shape(eofs)

# Normalize
#scale = np.expand_dims(np.sqrt(np.diag(np.dot(eofs, eofs.T))), axis=1)
#scale2 = np.repeat(scale, nc, axis=1)

#eofs2 = eofs / scale

#TT = np.eye(nc)
#print(TT)
#print(np.shape(TT))
#d = 0;

#for i in range(it_max):
#    z = np.dot(eofs2,TT)
#    zdiag = np.diag(np.squeeze(np.dot(np.ones((1,p)), (z ** 2)))) / p
#    ztmp = np.dot(z,zdiag)
#    ztmp2 = (z ** 3) - ztmp
#    B = np.dot(eofs2.T,ztmp2)

#    U, S, V = np.linalg.svd(B)
#    TT = np.dot(U, V)

#    d2 = d
#    d = np.sum(S)
#    if d < (d2 * (1 + tol)): break

#reofs = np.dot(eofs2,TT)

#reofs = reofs * scale
#print(np.max(reofs), np.min(reofs))

In [None]:
# Rotate the eofs
# This is based on https://github.com/ajdawson/eofs/blob/experimental-rotation/lib/eofs/experimental/rotation/kernels.py
# and https://github.com/bmcmenamin/fa_kit/blob/master/fa_kit/rotation.py
# Not quite like the NCL output. i.e. there are spatial differences

itermax = 1000
gamma = 1.0 # For varimax

# Apply Kaiser row normalization.
scale = np.sqrt((eofs ** 2).sum(axis=0))
# In some cases such as NCEP data the scaling gives 0.
# This workout replaces the 0's with a very small number for division
scale[np.where(scale == 0)] = 1e-15

eofs /= scale
print(np.min(eofs), np.max(eofs))

# Define the initial values of the rotation matrix and the convergence monitor.
rot_mat = np.eye(neofs, dtype=eofs.dtype)
delta = 0.

# Iteratively compute the rotation matrix.
for i in range(itermax):
    z = np.dot(eofs.T, rot_mat)
    zdiag = np.diag(np.mean(z ** 2, axis=0) * gamma) # Check axis here
    
    b = np.dot(eofs, z ** 3 - np.dot(z, zdiag))
    
    svd_u, svd_s, svd_v = np.linalg.svd(b)
    
    rot_mat = np.dot(svd_u, svd_v)
    
    delta_new = np.sum(svd_s)
    if delta_new < delta: break
    delta = delta_new
        
# Apply the rotation to the input EOFs.
reofs = np.dot(eofs.T, rot_mat).T

# de-normalize
reofs *= scale
print(np.min(reofs), np.max(reofs))

# Show the variances of the modes (in descending order)
divisor = (reofs ** 2).sum(axis=0).sum()
reof_varexpl = ((reofs ** 2).sum(axis=1) / divisor) *100
reof_varexpl = np.flip(np.sort(reof_varexpl), axis=0)
print('reof var explained:')
print(reof_varexpl)

# Put the reofs into descending order by ...
# I'll admit i'm not sure what this is doing but it works (black magic)
abs_min = np.abs(np.min(reofs, axis=1))
abs_max = np.abs(np.max(reofs, axis=1))
to_flip = abs_max < abs_min
reofs[to_flip, :] *= -1

# Normalize by square root of the eigenvalues
LR = np.sqrt(np.sum(reofs**2, axis=1, keepdims=True))
reofs = reofs / LR
print(np.min(reofs), np.max(reofs))

# Put to xr.DataArray for plotting
reofs2d = reofs.reshape((neofs,) + originalshape)
# Multiply by minus 1 to match the color of the NCL plots
reofs2d *= -1
print(np.min(reofs), np.max(reofs))
reofs2d_da = xr.DataArray(reofs2d,
                          coords=[range(reofs2d.shape[0]),
                                  da_sea_anom.coords['lat'],
                                  da_sea_anom.coords['lon']],
                          dims=['mode', 'lat', 'lon'], name='eofs')

Plot the reofs

In [None]:
%%opts Image [projection=ccrs.Orthographic(0, 90) colorbar=False fig_size=200] (cmap='RdBu_r') Overlay [xaxis=None yaxis=None]
dataset = gv.Dataset(reofs2d_da, kdims=['mode', 'lon', 'lat'])
dataset.to(gv.Image, ['lon', 'lat']) * gf.coastline()

In [None]:
print('top 4 reofs computed in NCL for season '+season_dim[season])
Image(filename = 'NCL/'+season_dim[season]+'_rot_eof.png', width=1000, height=1000)

Plot the location of minimum and maximum for reof1 and reof2

In [None]:
reof1 = reofs2d_da.isel(mode=0)
ix = reof1.where(reof1==reof1.max(), drop=True)
latmax = ix.coords['lat'].values; lonmax = ix.coords['lon'].values
ix = reof1.where(reof1==reof1.min(), drop=True)
latmin = ix.coords['lat'].values; lonmin = ix.coords['lon'].values
print(latmax, lonmax)
print(latmin, lonmin)
print('season number is (see third cell):', season)

ax = plt.axes(projection=ccrs.Orthographic(-90, 90))
ax.coastlines()
ax.set_global()
plt.plot(lonmax, latmax, color='blue', marker='x', transform=ccrs.PlateCarree())
plt.plot(lonmin, latmin, color='red', marker='x', transform=ccrs.PlateCarree())
plt.show()

reof2 = reofs2d_da.isel(mode=1)
ix = reof2.where(reof2==reof2.max(), drop=True)
latmax = ix.coords['lat'].values; lonmax = ix.coords['lon'].values
ix = reof2.where(reof2==reof2.min(), drop=True)
latmin = ix.coords['lat'].values; lonmin = ix.coords['lon'].values
print(latmax, lonmax)
print(latmin, lonmin)

ax = plt.axes(projection=ccrs.Orthographic(-90, 90))
ax.coastlines()
ax.set_global()
plt.plot(lonmax, latmax, color='red', marker='x', transform=ccrs.PlateCarree())
plt.plot(lonmin, latmin, color='blue', marker='x', transform=ccrs.PlateCarree())
plt.show()

Here is the CPC NAO definition for season = 11 (DJF), season = 2 (MAM), season = 5 (JJA), season = 8 (SON):
![title](CPC/nao_correlation_map.gif)