# shp

> Spatially Homogenious Pixels Identification

In [None]:
#| default_exp shp

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
from scipy import stats
import numpy as np
import itertools

In [None]:
#| export
try:
    import cupy as cp
except:
    cp = None
import math

## Kolmogorov-Smirnov (KS) two-sample test

In [None]:
#| export
# It looks cupy do not support pointer to pointer: float** rmli_stack
# These code are modified from https://dev.thep.lu.se/yat under GPLv3 license.
_ks_test_kernel = cp.ElementwiseKernel(
    'raw T rmli_stack, int32 nlines, int32 width, int32 nimages, int32 az_half_win, int32 r_half_win',
    'raw T dist, raw T p',
    '''
    int az_win = 2*az_half_win+1;
    int r_win = 2*r_half_win+1;
    int win = az_win*r_win;
    
    int ref_idx = i/win;
    int ref_az = ref_idx/width;
    int ref_r = ref_idx -ref_az*width;
    
    int win_idx = i - ref_idx*win;
    int win_az = win_idx/r_win;
    int win_r = win_idx - win_az*r_win;
    int sec_az = ref_az + win_az - az_half_win;
    int sec_r = ref_r + win_r - r_half_win;
    int sec_idx = sec_az*width + sec_r;
    
    if (ref_r >= width && ref_az >= nlines) {
        return;
    }
    if (sec_az < 0 || sec_az >= nlines || sec_r < 0 || sec_r >= width) {
        dist[ref_idx*win+win_az*r_win+win_r] = -1.0;
        p[ref_idx*win+win_az*r_win+win_r] = -1.0;
        return;
    }
    
    // Compute the maximum difference between the cumulative distributions
    int j1 = 0, j2 = 0;
    T f1, f2, d, dmax = 0.0, en = nimages;

    while (j1 < nimages && j2 < nimages) {
        f1 = rmli_stack[ref_idx*nimages + j1];
        f2 = rmli_stack[sec_idx*nimages + j2];
        if (f1 <= f2) j1++;
        if (f1 >= f2) j2++;
        d = fabs((j2-j1)/en);
        if (d > dmax) dmax = d;
    }
    en=sqrt(en/2);
    p[ref_idx*win+win_az*r_win+win_r] = ks_p((en+0.12+0.11/en)*dmax);
    dist[ref_idx*win+win_az*r_win+win_r] = dmax;
    ''',
    name = 'ks_test_kernel',no_return=True,
    preamble = '''
    __device__ T ks_p(T x)
    {
        T x2 = -2.0*x*x;
        int sign = 1;
        T p = 0.0,p2 = 0.0;
    
        for (int i = 1; i <= 100; i++) {
            p += sign*2*exp(x2*i*i);
            if (p==p2) return p;
            sign = -sign;
            p2 = p;
        }
        return p;
    }
    ''',)

In [None]:
#| export
def ks_test(rmli:cp.ndarray, # the rmli stack, dtype: cupy.floating
            az_half_win:int, # SHP identification half search window size in azimuth direction
            r_half_win:int, # SHP identification half search window size in range direction
            block_size:int=128, # the CUDA block size, it only affects the calculation speed
           ) -> tuple[cp.ndarray,cp.ndarray] : # the KS test statistics `dist` and p value `p`
    '''
    SHP identification based on Two-Sample Kolmogorov-Smirnov Test.
    '''
    sorted_rmli = cp.sort(rmli,axis=-1) # In fact, this step is most time consuming, consider move it out
    az_win = 2*az_half_win+1
    r_win = 2*r_half_win+1
    nlines = rmli.shape[0]
    width = rmli.shape[1]
    nimages = rmli.shape[-1]
    dist = cp.empty((nlines,width,az_win,r_win),dtype=rmli.dtype)
    p = cp.empty((nlines,width,az_win,r_win),dtype=rmli.dtype)

    _ks_test_kernel(sorted_rmli,cp.int32(nlines),cp.int32(width),cp.int32(nimages),
                    cp.int32(az_half_win),cp.int32(r_half_win),dist,p,
                    size=width*nlines*r_win*az_win,block_size=block_size)
    return dist,p

The `ks_test` function apply the Two-Sample Kolmogorov-Smirnov Test on a stack of rmli images to identify SHPs candidate for further processing. This method is originally published in [@ferrettiNewAlgorithmProcessing2011]. This function is designed to run on GPU for high speed.

The `rmli` is a three dimentional cupy `ndarray`. The `dtype` should be `float`. From outerest to innerest, the three dimentions are azimuth, range and image. For each pixel P, a search window centered at P is defined by `az_half_win` and `r_half_win`. All pixels in this search window is compared with P by KS test. They are refered here as secondary pixels. The total number of secondary pixels (including P) is (2\*`az_half_win`+1)\*(2\*`r_half_win`+1).

The returns are the ks test statistic which is the maximum value of the absolute difference between the emperical cumulative distribution functions of the two samples, and p value. Both of them are 4 dimentional cupy ndarrays. From outerest ot innerest, they are azimuth, range, secondary pixel relative azimuth, secondary pixel relative range. For P at the corner of the image where part of the search window is out of the image, the result is `-1`.

Here is a simplest example. First simulate rmli time series of two pixels from two correlated normal distributions:

In [None]:
sample_size = 20
rng = np.random.default_rng()
sample1 = stats.uniform.rvs(size=sample_size, random_state=rng).astype(cp.float32)
sample2 = stats.norm.rvs(size=sample_size, random_state=rng).astype(cp.float32)

Convert the data to cupy ndarray and make sure the `dtype` is `cp.float32` and the data are sorted:

In [None]:
rmli_stack = cp.stack((cp.asarray(sample1), cp.asarray(sample2))).reshape(1,2,sample_size)
rmli_stack = rmli_stack.astype(cp.float32)
rmli_stack.shape

(1, 2, 20)

The shape of `rmli_stack` shows it contains 20 images. Each of the image has 1 pixel in azimuth dimention and 2 pixels in range dimention. Set the `az_half_win` and `r_half_win` to 1 and apply the `ks_test` function:

In [None]:
dist,p = ks_test(rmli_stack,1,1)
print(dist.shape)
print(dist)

(1, 2, 3, 3)
[[[[-1.  -1.  -1. ]
   [-1.   0.   0.5]
   [-1.  -1.  -1. ]]

  [[-1.  -1.  -1. ]
   [ 0.5  0.  -1. ]
   [-1.  -1.  -1. ]]]]


`dist` is the ks test statistic. The shape of it shows for each pixel P in this `1*2` image, a `3*3` search window is defined and all pixels in this search window is test with P. The value `0` in `dist` is the ks test result of pixel P and pixel P itself. The value `-1` means the secondary pixel is out of the image and no ks test is applied.

In [None]:
print(p.shape)
print(p)

(1, 2, 3, 3)
[[[[-1.         -1.         -1.        ]
   [-1.          0.          0.00816168]
   [-1.         -1.         -1.        ]]

  [[-1.         -1.         -1.        ]
   [ 0.00816168  0.         -1.        ]
   [-1.         -1.         -1.        ]]]]


`p` is the ks test p value with same shape of `dist`.

In [None]:
print(stats.ks_2samp(sample1, sample2,method='asymp'))

KstestResult(statistic=0.5, pvalue=0.00777741, statistic_location=0.053860884, statistic_sign=-1)


By comparing the result of `ks_test` and `ks_2samp` from `scipy`, the statistics are same which prove the correctness of `ks_test`. The difference in p value is because the approcimation method used are different but the orders of magnitudes are consistent.

In [None]:
#| hide
# test
nimages = 5
nlines = 5
width = 5
az_half_win = 5
r_half_win = 5

rng = np.random.default_rng()
sample_list = []
for i in range(nlines*width):
    if i==0:
        sample_list.append(np.sort(stats.uniform.rvs(size=nimages, random_state=rng)).astype(np.float32))
    else:
        sample_list.append(np.sort(stats.norm.rvs(size=nimages, random_state=rng)).astype(np.float32))

sample_stack = np.stack(sample_list).reshape(nlines,width,nimages)
cp_sample_stack = cp.asarray(sample_stack)
dist,p = ks_test(cp_sample_stack,az_half_win,r_half_win)
# we do not test the calculated p value just because 
# the p value calculation methods in scipy and numerical recipe
# are different and their difference can reach to 10 times!
assert dist.shape == (nlines,width,az_half_win*2+1,r_half_win*2+1)
for az,r,az_win,r_win in itertools.product(range(nlines),range(width),range(az_half_win),range(r_half_win)):
    sec_az = az + az_win - az_half_win
    sec_r = r + r_win - r_half_win
    if (sec_az<0 or sec_az>nlines or sec_r<0 or sec_r>width):
        assert abs(dist[az,r,az_win,r_win]-(-1)) < 1.0e-7
    else:
        scipy_dist,p = stats.ks_2samp(sample_stack[az,r,:],sample_stack[sec_az,sec_r,:],method='asymp')
        assert abs(dist[az,r,az_win,r_win]-scipy_dist) < 1.0e-7

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()