# Calculating EOFs & PCs using ingrid's 'svd' command

## Importing ingrid DataCatalog calculations into notebooks using xarray and pandas

In [None]:
import pandas as pd
import xarray as xr
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

## First a digression about ingrid and time grids

In [None]:
# Ingrid dods URLs can be used directly into xr.open_dataset(), using decode_times
URL = 'https://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCDC/.ERSST/.version4/.anom/X/180/VALUE/Y/0/VALUE/dods'
ds = xr.open_dataset(URL,decode_times=False)
ds

### N.B. time grid trouble from ingrid to xarray
1. it usually works to convert into 'days since' using 'streamgridunitconvert' in ingrid
2. another method is to use 'decode_times=False' and then use pandas to replace the dataset's time grid

In [None]:
# Method 1:
baseURL = 'https://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCDC/.ERSST/.version4/.anom/X/180/VALUE/Y/0/VALUE'
# use the ingrid command: 'T (days since 1854-01-01) streamgridunitconvert'
Tconvert = '/T/(days%20since%201854-01-01)streamgridunitconvert'  
URL = baseURL+Tconvert+'/dods'
ds = xr.open_dataset(URL)
ds

In [None]:
# Method 2:
ds = xr.open_dataset('https://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCDC/.ERSST/.version4/.anom/X/180/VALUE/Y/0/VALUE/dods'
                     ,decode_times=False)
ds['T'] = pd.date_range('1/1/1854', periods=ds.anom.shape[0], freq='MS').shift(15, freq='D')
ds

In [None]:
ingridsnippet = """
 SOURCES .NOAA .NCDC .ERSST .version4 .anom zlev removeGRID
  T (Jan\ 1958) (Dec\ 2014) RANGE
  Y -40 40 RANGE
  X 120 290 RANGE
  yearly-anomalies
  T 12 boxAverage
  T (days\ since\ 1960-01-01) streamgridunitconvert
"""
snip = ' '.join(ingridsnippet.split()).replace('\ ','%20').replace(' ','/')
nc_url = 'https://iridl.ldeo.columbia.edu/'+snip+'/dods'
print(nc_url)

ds_anom = xr.open_dataset(nc_url)
ds_anom

### get the EOFs (spatial patterns)

In [None]:
ingridsnippet = """
 SOURCES .NOAA .NCDC .ERSST .version4 .anom zlev removeGRID
  T (Jan\ 1958) (Dec\ 2014) RANGE
  Y -40 40 RANGE
  X 120 290 RANGE
  yearly-anomalies
  T 12 boxAverage
  {Y cosd sqrt}[Y X][T]svd
  ev 1 4 RANGE .Ss
"""
snip = ' '.join(ingridsnippet.split()).replace('\ ','%20').replace(' ','/')
nc_url = 'https://iridl.ldeo.columbia.edu/'+snip+'/dods'

# save the EOFs in the existing dataset
ds_anom['Ss'] = xr.open_dataset(nc_url).Ss
ds_anom

### get the Principal Components (PCs)

In [None]:
ingridsnippet = """
 SOURCES .NOAA .NCDC .ERSST .version4 .anom zlev removeGRID
  T (Jan\ 1958) (Dec\ 2014) RANGE
  Y -40 40 RANGE
  X 120 290 RANGE
  yearly-anomalies
  T 12 boxAverage
  {Y cosd sqrt}[Y X][T]svd
  ev 1 4 RANGE .Ts
"""
snip = ' '.join(ingridsnippet.split()).replace('\ ','%20').replace(' ','/')
nc_url = 'https://iridl.ldeo.columbia.edu/'+snip+'/dods'
ds2 = xr.open_dataset(nc_url,decode_times=False)

# okay, lets fix the yearly time grid
ds_anom['T'] = pd.date_range('1/1/1958', periods=ds2.Ts.shape[1], freq='AS-JUL')
# now save the PCs
ds_anom['Ts'] = (['ev','T'],ds2.Ts)
ds_anom

### plots 

In [None]:
plt.figure(figsize=(8, 10))
plt.subplot(211)
ds_anom.Ss[0].plot()
plt.subplot(212)
ds_anom.Ss[1].plot()

In [None]:
ds_anom.Ts[0].plot(figsize=(10,5)); plt.title('PC1',fontsize=16)
ds_anom.Ts[1].plot(figsize=(10,5)); plt.title('PC2',fontsize=16)

In [None]:
ds_anom.to_netcdf('ingrid_EOF.nc',encoding={'T':{'dtype':'float32'}})