In [1]:
from osgeo import gdal
import matplotlib.pyplot as plt
import numpy as np
import urllib
%matplotlib qt

In [3]:
# The kernal of HANTS algorithm. Implemented by Mattijn.
def makediag3d(M):
    b = np.zeros((M.shape[0], M.shape[1] * M.shape[1]))
    b[:, ::M.shape[1] + 1] = M
    
    #logging.info('function `makediag3d` complete')    
    return b.reshape(M.shape[0], M.shape[1], M.shape[1]) 

def get_starter_matrix(base_period_len, sample_count, frequencies_considered_count):
    nr = min(2 * frequencies_considered_count + 1,
                  sample_count)  # number of 2*+1 frequencies, or number of input images
    mat = np.zeros(shape=(nr, sample_count))
    mat[0, :] = 1
    ang = 2 * np.pi * np.arange(base_period_len) / base_period_len
    cs = np.cos(ang)
    sn = np.sin(ang)
    # create some standard sinus and cosinus functions and put in matrix
    i = np.arange(1, frequencies_considered_count + 1)
    ts = np.arange(sample_count)
    for column in range(sample_count):
        index = np.mod(i * ts[column], base_period_len)
        # index looks like 000, 123, 246, etc, until it wraps around (for len(i)==3)
        mat[2 * i - 1, column] = cs.take(index)
        mat[2 * i, column] = sn.take(index)

    #logging.info('HANTS: function `get_starter_matrix` complete')
    return mat

def HANTS(sample_count, inputs,
          frequencies_considered_count=3,
          outliers_to_reject='Lo',
          low=0., high=255,
          fit_error_tolerance=5,
          dod = 5,
          delta=0.1):
    """
    Function to apply the Harmonic analysis of time series applied to arrays

    sample_count    = nr. of images (total number of actual samples of the time series)
    base_period_len    = length of the base period, measured in virtual samples
            (days, dekads, months, etc.)
    frequencies_considered_count    = number of frequencies to be considered above the zero frequency
    inputs     = array of input sample values (e.g. NDVI values)
    ts    = array of size sample_count of time sample indicators
            (indicates virtual sample number relative to the base period);
            numbers in array ts maybe greater than base_period_len
            If no aux file is used (no time samples), we assume ts(i)= i,
            where i=1, ..., sample_count
    outliers_to_reject  = 2-character string indicating rejection of high or low outliers
            select from 'Hi', 'Lo' or 'None'
    low   = valid range minimum
    high  = valid range maximum (values outside the valid range are rejeced
            right away)
    fit_error_tolerance   = fit error tolerance (points deviating more than fit_error_tolerance from curve
            fit are rejected)
    dod   = degree of overdeterminedness (iteration stops if number of
            points reaches the minimum required for curve fitting, plus
            dod). This is a safety measure
    delta = small positive number (e.g. 0.1) to suppress high amplitudes
    """
    # logger = mp.get_logger()
    # logger.info('HANTS: HANTS is active %s', inputs.shape)

    # define some parameters
    base_period_len = sample_count  #
    
    # check which setting to set for outlier filtering
    if outliers_to_reject == 'Hi':
        sHiLo = -1
    elif outliers_to_reject == 'Lo':
        sHiLo = 1
    else:
        sHiLo = 0

    nr = min(2 * frequencies_considered_count + 1,
             sample_count)  # number of 2*+1 frequencies, or number of input images

    # create empty arrays to fill
    outputs = np.zeros(shape=(inputs.shape[0], sample_count))

    mat = get_starter_matrix(base_period_len, sample_count, frequencies_considered_count)

    # repeat the mat array over the number of arrays in inputs
    # and create arrays with ones with shape inputs where high and low values are set to 0
    mat = np.tile(mat[None].T, (1, inputs.shape[0])).T
    p = np.ones_like(inputs)
    p[(low >= inputs) | (inputs > high)] = 0
    nout = np.sum(p == 0, axis=-1)  # count the outliers for each timeseries


    # prepare for while loop
    ready = np.zeros((inputs.shape[0]), dtype=bool)  # all timeseries set to false

    #dod = 1  # (2*frequencies_considered_count-1)  # Um, no it isn't :/
    noutmax = sample_count - nr - dod
    # prepare to add delta to suppress high amplitudes but not for [0,0]
    Adelta = np.tile(np.diag(np.ones(nr))[None].T, (1, inputs.shape[0])).T * delta
    Adelta[:, 0, 0] -= delta
    
    
    for _ in range(sample_count):
        if ready.all():
            break        
        
        # multiply outliers with timeseries
        za = np.einsum('ijk,ik->ij', mat, p * inputs)
        #print za

        # multiply mat with the multiplication of multiply diagonal of p with transpose of mat
        diag = makediag3d(p)
        #print diag
        
        A = np.einsum('ajk,aki->aji', mat, np.einsum('aij,jka->ajk', diag, mat.T))
        # add delta to suppress high amplitudes but not for [0,0]
        A += Adelta
        #A[:, 0, 0] = A[:, 0, 0] - delta
        #print A

        # solve linear matrix equation and define reconstructed timeseries
        zr = np.linalg.solve(A, za)
        #print zr
        
        outputs = np.einsum('ijk,kj->ki', mat.T, zr)
        #print outputs

        # calculate error and sort err by index
        err = p * (sHiLo * (outputs - inputs))
        rankVec = np.argsort(err, axis=1, )

        # select maximum error and compute new ready status
        maxerr = np.max(err, axis=-1)
        #maxerr = np.diag(err.take(rankVec[:, sample_count - 1], axis=-1))
        ready = (maxerr <= fit_error_tolerance) | (nout == noutmax)        

        # if ready is still false
        if not ready.all():
            j = rankVec.take(sample_count - 1, axis=-1)

            p.T[j.T, np.indices(j.shape)] = p.T[j.T, np.indices(j.shape)] * ready.astype(
                int)  #*check
            nout += 1

    #logging.info('HANTS: function `HANTS` complete')
    #print 'function HANTS complete'
    return outputs

In [4]:
import pandas as pd

In [5]:
file_in = r'P:\Pr\3556.10\Werkmap\PR3556.10.03_WAQUA\tmp\CSV\U10D338Lp300S060Fm320K00_set1.csv'
df = pd.read_csv(file_in)

In [6]:
array_org = np.array([df.ZWL.as_matrix()])

In [7]:
# fig = plt.figure(figsize=(15,3))

# ax2 = fig.add_subplot(111)
# img2 = ax2.imshow(np.ma.masked_equal(array_org1, 0), cmap='viridis',interpolation = 'nearest')
# ax2.set_xlabel('Long')
# ax2.set_ylabel('ansi')
# ax2.set_title('ORIGINAL array shape: '+str(array_org1.shape))

# plt.colorbar(img2, orientation='horizontal', ax=ax2)
# plt.tight_layout()
# #plt.savefig(r'D:\tmp\HANTS_OUT//slice_ansi_long.png', dpi=200)
# plt.show()

In [8]:
import plotly
plotly.__version__

'2.0.0'

In [9]:
plt.plot(array_org[0])

[<matplotlib.lines.Line2D at 0xb62e9e8>]

In [11]:
array_nan_to_num = np.nan_to_num(array_org)

In [12]:
5/100.

0.05

In [20]:
array_hants = HANTS(array_nan_to_num.shape[1], array_nan_to_num*10.,
                    frequencies_considered_count = 20,
                    outliers_to_reject = 'Lo',
                    low = -10000/100., 
                    high = 10000/100.,
                    fit_error_tolerance = 500/100.,
                    dod = 5/100.,
                    delta = 0.1)
array_hants /= 10

In [21]:
# fig = plt.figure(figsize=(15,3))

# ax2 = fig.add_subplot(111)
# img2 = ax2.imshow(np.ma.masked_equal(array_hants, 0), cmap='viridis',interpolation = 'nearest')
# ax2.set_xlabel('Long')
# ax2.set_ylabel('ansi')
# ax2.set_title('ORIGINAL array shape: '+str(array_hants.shape))

# plt.colorbar(img2, orientation='horizontal', ax=ax2)
# plt.tight_layout()
# #plt.savefig(r'D:\tmp\HANTS_OUT//slice_ansi_long.png', dpi=200)
# plt.show()

In [22]:
array_dif = array_org - array_hants

In [23]:
fig = plt.figure(figsize=(15,3))
plt.plot(array_org[0])
plt.plot(array_hants[0])
plt.plot(array_dif[0])
plt.grid()
plt.show()

In [32]:
test = array_dif < -0.05
test


invalid value encountered in less



array([[False, False, False, ..., False, False, False]], dtype=bool)

In [36]:
def nan_helper(y):
    """Helper to handle indices and logical indices of NaNs.

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= np.interp(x(nans), x(~nans), y[~nans])
    """

    return np.isnan(y), lambda z: z.nonzero()[0]


In [30]:
plt.plot(array_hants[0][test])

[<matplotlib.lines.Line2D at 0xb7ebac8>]

In [33]:
array_org[test]=np.nan

In [35]:
plt.plot(array_org[0])

[<matplotlib.lines.Line2D at 0xb73f2b0>]