In [None]:
%pylab inline

In [None]:
import scipy.stats as ss
import xarray as xr
from gnl.plots import loghist

In [None]:
Prec = xr.open_dataarray("../2017-08-12/wd/A64/2d/Prec.nc")

# Normalzing precipitation

A standard way to normalize positive non-gaussian distributions is using the so-called Box-Cox transform, but there are many fancier ways to do this.

In [None]:
y = Prec.values[Prec.values>.005]

In [None]:
loghist(y)
ylim([-10,0])

In [None]:
yt,_ = ss.boxcox(y)

In [None]:
plt.plot(np.random.choice(yt, 1000),'.')

The box-cox transformation does a pretty good job for the strictly postive precipitation values.

In [None]:
y = Prec.values.ravel()

In [None]:
import autograd.numpy as np
from autograd import grad

In [None]:
def f(y, theta):
    
    if abs(theta) < 1e-5:
        return y
    else:
        return np.arcsinh(theta*y)/theta

def f(y, theta):
    
    if theta ==0:
        return np.log(y)
    else:
        return (y**theta - 1)/theta
    


def negloglikelihood(theta, y=y):
    ft = f(y, theta)
 
    val = - .5 * np.log(ft.dot(ft-ft.mean())) \
          - .5 * np.mean(theta * np.log(y))
        
    return -val


class BoxCox(object):
    def fit(self, y):
        yg = np.linspace(-1,1, 100)
        
        tht = np.r_[-1:0:50j]
        ftht = [negloglikelihood(x, y[y>.01]) for x in tht]

        ls = tht[np.argmin(ftht)]
        
        self.optim_ = ls
        
    def transform(self, y):
        return self._f(y, self.optim_)
        
    
    def _f(self, y, theta):
        if theta ==0:
            return np.log(y)
        else:
            return (y**theta - 1)/theta
    

    def _neglog(self, y, theta):
        ft = self._f(y, theta)
        val = - .5 * np.log(ft.dot(ft-ft.mean())) \
              - .5 * np.mean(theta * np.log(y))
            
        return val



class Asinh(object):
    def fit(self, y):
        tht = np.linspace(.001, 20.0, 100)
        
        ftht = [negloglikelihood(x, y[y>.01]) for x in tht]

        ls = tht[np.argmin(ftht)]
        
        self.optim_ = ls
        
    def transform(self, y):
        return self._f(y, self.optim_)
        
    
    def _f(self, y, theta):
        if abs(theta) < 1e-5:
            return y
        else:
            return np.arcsinh(theta*y)/theta

    def _neglog(self, y, theta):
        ft = self._f(y, theta)
        val = - .5 * np.log(ft.dot(ft-ft.mean())) \
              - .5 * np.mean(np.log(1+(theta*y)**2))
            
        return val

In [None]:
mod = BoxCox()

mod.fit(y[y>.001])
yt = mod.transform(y[y>.001])
plt.plot(np.random.choice(yt, 1000), '.')

In [None]:
loghist(yt)

This is using the inverse sinh transform

In [None]:
mod = Asinh()

mod.fit(y[y>.001])
yt = mod.transform(y[y>.001])
plt.plot(np.random.choice(yt, 1000), '.')

It seems that the BoxCox transform performs the best for the nonzero precipitation values, but the Asinh transform will probably work well for Q1 and Q2.