In [1]:
# ! /usr/local/bin/python3
# %load_ext autoreload
# %autoreload 2
import os
import numpy as np
import xarray as xr
xr.set_options(keep_attrs=True)
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature
import matplotlib.path as mpath
import time
from functools import wraps
import pyxpcm
from pyxpcm.models import pcm

In [5]:
def train_on_interpolated_year(time_i=42, K=5, maxvar=3, min_depth=300,
                               max_depth=2000, separate_pca=False):

    main_dir = '/Users/simon/bsose_monthly/'
    salt = main_dir + 'bsose_i106_2008to2012_monthly_Salt.nc'
    theta = main_dir + 'bsose_i106_2008to2012_monthly_Theta.nc'

    z = np.arange(-min_depth, -max_depth, -10.)
    features_pcm = {'THETA': z, 'SALT': z}
    features = {'THETA': 'THETA', 'SALT': 'SALT'}
    fname = 'interp.nc'                                  
    if not os.path.isfile(fname):  
        salt_nc = xr.open_dataset(salt).isel(time=slice(time_i, time_i+12))
        theta_nc = xr.open_dataset(theta).isel(time=slice(time_i, time_i+12))
        big_nc = xr.merge([salt_nc, theta_nc])
        both_nc = big_nc.where(big_nc.coords['Depth'] >
                           max_depth).drop(['iter', 'Depth',
                                            'rA', 'drF', 'hFacC'])

        lons_new = np.linspace(both_nc.XC.min(), both_nc.XC.max(), 60*4)
        lats_new = np.linspace(both_nc.YC.min(), both_nc.YC.max(), 60)
  
    
        ds = both_nc.interp(coords={'YC': lats_new, 'XC': lons_new}) #, method='cubic')
        ds.to_netcdf(fname)
    else:
        ds = xr.open_dataset(fname)
    m = pcm(K=K, features=features_pcm,
            separate_pca=separate_pca,
            maxvar=maxvar,
            timeit=True, timeit_verb=1)

    m.fit(ds, features=features, dim='Z')

    m.add_pca_to_xarray(ds, features=features,
                        dim='Z', inplace=True)

    m.find_i_metric(ds, inplace=True)
    m.predict(ds, features=features, dim='Z',inplace=True)

    del ds.PCA_VALUES.attrs['_pyXpcm_cleanable']
    del ds.IMETRIC.attrs['_pyXpcm_cleanable']
    del ds.A_B.attrs['_pyXpcm_cleanable']
    del ds.PCM_LABELS.attrs['_pyXpcm_cleanable']

    ds = ds.drop(['THETA', 'SALT'])

    return m, ds

In [6]:
m, ds = train_on_interpolated_year()

  fit.1-preprocess.1-mask: 65 ms
features_dict {'THETA': 'THETA', 'SALT': 'SALT'}
  fit.1-preprocess.2-feature_THETA.1-ravel: 258 ms
  fit.1-preprocess.2-feature_THETA.2-interp: 4 ms
  fit.1-preprocess.2-feature_THETA.3-scale_fit: 1936 ms
  fit.1-preprocess.2-feature_THETA.4-scale_transform: 1054 ms
  fit.1-preprocess.2-feature_THETA.total: 3255 ms
  fit.1-preprocess.2-feature_SALT.1-ravel: 255 ms
  fit.1-preprocess.2-feature_SALT.2-interp: 4 ms
  fit.1-preprocess.2-feature_SALT.3-scale_fit: 1372 ms
  fit.1-preprocess.2-feature_SALT.4-scale_transform: 1023 ms
  fit.1-preprocess.2-feature_SALT.total: 2656 ms
  fit.1-preprocess.2-feature_all.5-join: 182 ms
Fitting PCA
  fit.1-preprocess.2-feature_all.6-reduce_fit: 4940 ms
  fit.1-preprocess.2-feature_all.7-reduce_transform: 488 ms
  fit.1-preprocess.4-xarray: 24 ms
  fit.1-preprocess: 11652 ms
  fit.fit: 6778 ms
  fit.reorder: 0 ms
  fit.score: 49 ms
  fit: 18480 ms
  fit.1-preprocess.1-mask: 94 ms
features_dict {'THETA': 'THETA', 'SALT'

In [12]:
m._reducer['all'].explained_variance_ratio_

array([0.74913433, 0.16114421, 0.07013779])