In [22]:
import numpy as np
import xarray as xr
xr.set_options(keep_attrs=True)
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature
import matplotlib.path as mpath
import pyxpcm
from pyxpcm.models import pcm


def pcm_fit_and_post(time_i=42, K=4, maxvar=0.99):  
    # Define features to use
    # Instantiate the PCM
    
    max_depth = 2000
    z = np.arange(0., -max_depth, -10.)
    features_pcm = {'THETA': z, 'SALT': z}
    features = {'THETA': 'THETA', 'SALT': 'SALT'}
    salt_nc = xr.open_dataset(salt).isel(time=time_i)
    theta_nc = xr.open_dataset(theta).isel(time=time_i)
    big_nc = xr.merge([salt_nc, theta_nc])
    both_nc = big_nc.where(big_nc.coords['Depth'] > 
                           max_depth).drop(['iter', 'Depth', 
                                            'rA', 'drF', 'hFacC'])   
 
    attr_d = {}

    for coord in both_nc.coords:
        attr_d[coord] = both_nc.coords[coord].attrs
        
    lons_new = np.linspace(both_nc.XC.min(), both_nc.XC.max(), 60*4)
    lats_new = np.linspace(both_nc.YC.min(), both_nc.YC.max(), 60)
    # ds = both_nc # .copy(deep=True)
    
    
    ds = both_nc# .interp(coords={'YC': lats_new, 'XC': lons_new}) #, method='cubic')
        
    m = pcm(K=K, features=features_pcm, 
            maxvar=maxvar, separate_pca=False,
            timeit=True, timeit_verb=1)
    # m.fit(ds, features=features, dim='Z') #, inplace=True)
    # m.predict(ds, features=features, dim='Z', inplace=True)
    # m.predict_proba(ds, features=features, dim='Z', inplace=True)
    # m.find_i_metric(ds, inplace=True)
    
    def sanitize():
        #del ds.PCM_LABELS.attrs['_pyXpcm_cleanable']
        #del ds.PCM_POST.attrs['_pyXpcm_cleanable']
        del ds.PCA_VALUES.attrs['_pyXpcm_cleanable']
        

    
    # m.preprocessing_this(ds, dim='Z', feature_name='SALT')
    # X=0
    
    # X, sampling_dims = m.preprocessing(ds, dim='Z', 
    #                                    features={'THETA': 'THETA', 'SALT': 'SALT'})
    
    m.add_pca_to_xarray(ds, dim='Z', 
                        features={'THETA': 'THETA', 'SALT': 'SALT'},
                        action='fit',
                        mask=None, inplace=True)
    
    for coord in attr_d:
        ds.coords[coord].attrs = attr_d[coord]

    
    sanitize()
    ds = ds.drop(['SALT', 'THETA'])
    return ds #, #m, X

In [12]:
main_dir = '/Users/simon/bsose_monthly/'
salt = main_dir + 'bsose_i106_2008to2012_monthly_Salt.nc'
theta = main_dir + 'bsose_i106_2008to2012_monthly_Theta.nc'

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
ds, m, X = pcm_fit_and_post()

{'THETA': 'THETA', 'SALT': 'SALT'}
  ?.1-preprocess.1-mask: 31 ms
amalgamated option triggered
  ?.1-preprocess.4-xarray: 1 ms
  ?.1-preprocess: 757 ms
{'THETA': 'THETA', 'SALT': 'SALT'}
  fit.1-preprocess.1-mask: 11 ms
amalgamated option triggered
  fit.1-preprocess.4-xarray: 1 ms
  fit.1-preprocess: 407 ms
  fit: 407 ms
  add_pca.xarray: 205 ms


In [23]:
ds = pcm_fit_and_post()

{'THETA': 'THETA', 'SALT': 'SALT'}
  fit.1-preprocess.1-mask: 512 ms
amalgamated option triggered
  fit.1-preprocess.4-xarray: 148 ms
  fit.1-preprocess: 638282 ms
  fit: 638324 ms
  add_pca.xarray: 16957 ms


In [46]:
dim = 'Z'
features= {'THETA': 'THETA', 'SALT': 'SALT'}
features_dict = ds.pyxpcm.feature_dict(m, features=features)
Xlist = []
for feature_in_pcm in features_dict:
    feature_in_ds = features_dict[feature_in_pcm]
    da = ds[feature_in_ds]
    feature_name = feature_in_pcm
    X, z, sampling_dims = m.ravel(da, dim=dim,
                                 feature_name=feature_name)
    X = m._interpoler[feature_name].transform(X, z)
    Xlist.append(X)

In [19]:
Xlist

[<xarray.DataArray (sampling: 10437, Z: 200)>
 dask.array<_interpnd, shape=(10437, 200), dtype=float64, chunksize=(10437, 200), chunktype=numpy.ndarray>
 Coordinates:
     time      datetime64[ns] 2011-08-01T15:12:00
   * sampling  (sampling) MultiIndex
   - YC        (sampling) float64 -76.35 -76.35 -75.53 ... -29.72 -29.72 -29.72
   - XC        (sampling) float64 203.3 204.8 191.3 192.8 ... 356.9 358.4 359.9
   * Z         (Z) float64 0.0 -10.0 -20.0 ... -1.97e+03 -1.98e+03 -1.99e+03,
 <xarray.DataArray (sampling: 10437, Z: 200)>
 dask.array<_interpnd, shape=(10437, 200), dtype=float64, chunksize=(10437, 200), chunktype=numpy.ndarray>
 Coordinates:
     time      datetime64[ns] 2011-08-01T15:12:00
   * sampling  (sampling) MultiIndex
   - YC        (sampling) float64 -76.35 -76.35 -75.53 ... -29.72 -29.72 -29.72
   - XC        (sampling) float64 203.3 204.8 191.3 192.8 ... 356.9 358.4 359.9
   * Z         (Z) float64 0.0 -10.0 -20.0 ... -1.97e+03 -1.98e+03 -1.99e+03]

In [25]:
Xlist[0].values.shape

(10437, 200)

In [24]:
Xlist[1].values.shape

(10437, 200)

In [26]:
X = np.append(Xlist[0].values, Xlist[1].values, axis=1)

In [28]:
X.shape

(10437, 400)

In [None]:
X, sampling_dims = preprocessing_that(self, ds, dim='Z', features={'THETA': 'THETA', 'SALT': 'SALT'}) #, action='?')


In [45]:
features_dict

{'THETA': 'THETA', 'SALT': 'SALT'}

In [60]:
ds

In [61]:
X

In [71]:
X

In [76]:
ds

In [5]:
X

'PCA_VALUES'

In [8]:
ds

In [14]:
ds

In [15]:
ds.drop(['SALT', 'THETA'])

In [16]:
ds

In [17]:
ds

In [20]:
ds

In [25]:
ds.to_netcdf('trial-full-pc.nc')