# Custom classes compatible with `xarray` 

# 0. Processing Class 

Mean of moving window. Pure `xarray` allows you to center the window, `bottleneck` does not.

In [1]:
%%writefile processing.py 

import numpy as np
import xarray as xr
import scipy.signal

class processing():
    def __init__(self, data):
        #self._data_raw = data.copy()
        self._data = data.copy()
        
    @property
    def values(self):
        return self._data
    
    def rolling_mean(self, window=12):
        """
        rolling_mean(self, window=12)
        running mean centered in the window
        """
        self._data = self._data.rolling(time=window, center=True).mean()
        return self
    
    def detrend_ufunc(self, X, axis=0):
        ### mask nan points
        mask = ~np.isnan(X)
        ## define output matrix
        out = X*np.nan
        ### detrend along axis
        out[mask] = scipy.signal.detrend(X[mask], axis=axis, type='linear')
        return out

    def detrend(self,axis=0):
        self._data = xr.apply_ufunc(self.detrend_ufunc, self._data)
        return self

    def long_term_mean(self, dim='time'):
        '''long term mean alont dimension dim'''
        self._data = self._data.mean(dim)
        return self

    def global_avg(self, dim=['lat','lon']):
        '''long term mean alont dimension dim'''
        self._data = self._data.mean(dim)
        return self

    def global_mean(self, dim=['lat','lon']):
        '''long term mean alont dimension dim'''
        self._data = self._data.mean(dim)
        return self
    
    def global_median(self, dim=['lat','lon']):
        '''long term mean alont dimension dim'''
        self._data = self._data.median(dim)
        return self

    def zonal_mean(self,dim='lon'):
        '''long term mean alont dimension dim'''
        self._data = self._data.mean(dim)
        return self
    
    def zonal_median(self,dim='lon'):
        '''long term mean alont dimension dim'''
        self._data = self._data.median(dim)
        return self

    def ensemble_mean(self, dim='ensemble'):
        '''long term mean alont dimension dim'''
        self._data = self._data.mean(dim)
        return self

    def annual_mean(self, dim='time', nyears=35):
        self._data = self._data.groupby_bins(dim, nyears).mean(dim=dim)
        return self

    def remove_mean(self, dim='time'):
        ''' 
        remove_mean(X, dim='time')
        * use with .groupby_bins().apply() to remove annual mean
        '''
        self._data =  self._data - self._data.mean(dim)
        return self

    def annual_mean_repeating(self, dim='time', nyears=35, axis=0):
        tmp = self._data.groupby_bins(dim, nyears).mean(dim=dim)
        #tmp = ds.groupby('time.year').mean(dim='time')
        self._data = xr.DataArray(np.repeat(tmp.values, 12, axis=axis), dims=['time','lat','lon'])
        return self

Overwriting processing.py


In [None]:
import numpy as np
import xarray as xr
import scipy.signal
import scipy.stats as stats

class processing():
    def __init__(self, data):
        #self._data_raw = data.copy()
        self._data = data.copy()
        
    @property
    def values(self):
        return self._data
    
    def rolling_mean(self, window=12):
        """
        rolling_mean(self, window=12)
        running mean centered in the window
        """
        self._data = self._data.rolling(time=window, center=True).mean()
        return self
  
    def detrend_ufunc(self, y):
        """
        This only works with 3D matrices
        """
        ### Get dimensions
        ndim0 = np.shape(y)[0]
        ndim1 = np.shape(y)[1]
        ndim2 = np.shape(y)[2]

        ### Allocate space to store data
        y_dt = np.ones((ndim0, ndim1, ndim2))*np.NaN
        #slope = np.ones((ndim1, ndim2))*np.NaN
        #intercept = np.ones((ndim1, ndim2))*np.NaN

        ### x vector
        x = np.arange(ndim0)

        ### Remove linear trend
        for dim1 in range(ndim1):
            for dim2 in range(ndim2):
                ### only proceed if no NaNs
                if(np.sum(np.isnan(y[:, dim1, dim2]))==0):
                    ### fit linear regression
                    reg = stats.linregress(x, y[:, dim1, dim2])
                    #y_dt[:,dim1, dim2] = scipy.signal.detrend(X[mask], axis=0, type='linear')
                    ### make predictions at x values
                    yfit = reg.intercept + reg.slope * x

                    ### Save regression coefficients
                    #slope[dim1, dim2] = reg.slope
                    #intercept[dim1, dim2] = reg.intercept

                    ### subtract linear trend
                    y_dt[:, dim1, dim2] = y[:, dim1, dim2] - yfit
                    
        return y_dt

    
    def detrend(self):
        self._data = xr.apply_ufunc(self.detrend_ufunc, self._data)
        return self


    #def detrend_ufunc(self, X, axis=0):
    #    ### mask nan points
    #    mask = ~np.isnan(X)
    #    ## define output matrix
    #    out = X*np.nan
    #    ### detrend along axis
    #    out[mask] = scipy.signal.detrend(X[mask], axis=axis, type='linear')
    #    return out

    #def detrend(self,axis=0):
    #    self._data = xr.apply_ufunc(self.detrend_ufunc, self._data)
    #    return self

    
    def get_slope_ufunc(self, y):
        """
        This only works with 3D matrices
        """
        ### Get dimensions
        ndim0 = np.shape(y)[0]
        ndim1 = np.shape(y)[1]
        ndim2 = np.shape(y)[2]

        ### Allocate space to store data
        y_dt = np.ones((ndim0, ndim1, ndim2))*np.NaN
        #slope = np.ones((ndim1, ndim2))*np.NaN
        #intercept = np.ones((ndim1, ndim2))*np.NaN

        ### x vector
        x = np.arange(ndim0)

        ### Remove linear trend
        for dim1 in range(ndim1):
            for dim2 in range(ndim2):
                ### only proceed if no NaNs
                if(np.sum(np.isnan(y[:, dim1, dim2]))==0):
                    ### fit linear regression
                    reg = stats.linregress(x, y[:, dim1, dim2])
                    #y_dt[:,dim1, dim2] = scipy.signal.detrend(X[mask], axis=0, type='linear')
                    ### make predictions at x values
                    yfit = reg.intercept + reg.slope * x

                    ### Save regression coefficients
                    slope[dim1, dim2] = reg.slope
                    #intercept[dim1, dim2] = reg.intercept

                    ### subtract linear trend
                    #y_dt[:, dim1, dim2] = y[:, dim1, dim2] - yfit
                    
        return slope
    
    def get_slope(self):
        self._data = xr.apply_ufunc(self.get_slope_ufunc, self._data)
        return self
    
    def long_term_mean(self, dim='time'):
        '''long term mean alont dimension dim'''
        self._data = self._data.mean(dim)
        return self

    def global_avg(self, dim=['lat','lon']):
        '''long term mean alont dimension dim'''
        self._data = self._data.mean(dim)
        return self

    def global_mean(self, dim=['lat','lon']):
        '''long term mean alont dimension dim'''
        self._data = self._data.mean(dim)
        return self
    
    def global_median(self, dim=['lat','lon']):
        '''long term mean alont dimension dim'''
        self._data = self._data.median(dim)
        return self

    def zonal_mean(self,dim='lon'):
        '''long term mean alont dimension dim'''
        self._data = self._data.mean(dim)
        return self
    
    def zonal_median(self,dim='lon'):
        '''long term mean alont dimension dim'''
        self._data = self._data.median(dim)
        return self

    def ensemble_mean(self, dim='ensemble'):
        '''long term mean alont dimension dim'''
        self._data = self._data.mean(dim)
        return self

    def annual_mean(self, dim='time', nyears=35):
        self._data = self._data.groupby_bins(dim, nyears).mean(dim=dim)
        return self

    def remove_mean(self, dim='time'):
        ''' 
        remove_mean(X, dim='time')
        * use with .groupby_bins().apply() to remove annual mean
        '''
        self._data =  self._data - self._data.mean(dim)
        return self

    def annual_mean_repeating(self, dim='time', nyears=35, axis=0):
        tmp = self._data.groupby_bins(dim, nyears).mean(dim=dim)
        self._data = xr.DataArray(np.repeat(tmp.values, 12, axis=axis), dims=['time','lat','lon'])
        return self

In [1]:
import numpy as np
import xarray as xr



In [11]:
%%writefile skill_metrics.py 

import numpy as np
import xarray as xr

class skill_metrics():        
    def std(x, dim='time'):
        return x.std(dim=dim)

    def covariance(x, y, dim='time'):
        return ((x - x.mean(dim=dim)) * (y - y.mean(dim=dim))).mean(dim=dim)

    def correlation(x, y,dim='time'):
        cov = ((x - x.mean(dim=dim)) * (y - y.mean(dim=dim))).mean(dim=dim)
        return cov / (x.std(dim=dim) * y.std(dim=dim))

    def avg_abs_error(obs, prd, dim='time'):
        return xr.ufuncs.fabs(prd-obs).mean(dim=dim) 

    def avg_error(obs ,prd, dim='time'):
        return (prd-obs).mean(dim=dim) 

    def std_star(obs, prd, dim='time'):
        return prd.std(dim=dim) / obs.std(dim=dim)

    def rmse(m, r):
        return xr.ufuncs.sqrt(xr.ufuncs.square((m-r)).mean(dim='time'))

    def urmse(m, r):
        return xr.ufuncs.sqrt(xr.ufuncs.square( (m - m.mean(dim='time')) - (r - r.mean(dim='time')) ).mean(dim='time'))
    
    def ri(m, r,dim='time'):
        return xr.ufuncs.exp(xr.ufuncs.sqrt( xr.ufuncs.square( xr.ufuncs.log(m/r) ).mean(dim=dim)))
    
    def nse(m, r, dim='time'):
        numer = xr.ufuncs.square(m - m.mean(dim=dim)).mean(dim=dim) - xr.ufuncs.square(r - m).mean(dim=dim)
        return numer / xr.ufuncs.square(m - m.mean(dim=dim)).mean(dim=dim)

Writing skill_metrics.py


# 2. takhashi

In [12]:
%%writefile takahashi_2002.py 

import numpy as np
import xarray as xr

class takahashi_2002():
    def __init__(self, pco2, sst):
        self.pco2 = pco2
        self.sst = sst
    def pco2_T(self, dim='time'):
        return (self.pco2.mean(dim=dim) * xr.ufuncs.exp(0.0423*(self.sst - self.sst.mean(dim=dim)))).transpose('time','lat','lon')

    def pco2_nonT(self, dim='time'):
        return self.pco2 * xr.ufuncs.exp(0.0423*(self.sst.mean(dim=dim) - self.sst))
    
    def Dpco2_temp(self, dim='time'):
        return self.pco2_T(dim=dim).max(dim=dim) - self.pco2_T(dim=dim).min(dim=dim)
    
    def Dpco2_bio(self, dim='time'):
        return self.pco2_nonT(dim=dim).max(dim=dim) - self.pco2_nonT(dim=dim).min(dim=dim)
    
    def T_minus_B(self, dim='time'):
        return self.Dpco2_temp(dim=dim) - self.Dpco2_bio(dim=dim)

Writing takahashi_2002.py
