In [208]:
import math

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
from IPython.display import display
from sklearn.neighbors import KernelDensity
# %matplotlib ipympl
%matplotlib qt5
# %matplotlib widget
# matplotlib.use("nbagg")  # interactive !
# matplotlib.use('Qt5Agg')

In [209]:
def getExtremePoints(data, typeOfInflexion=None, maxPoints=None):
    """
    This method returns the indeces where there is a change in the trend of the input series.
    typeOfInflexion = None returns all inflexion points, max only maximum values and min
    only min,
    """
    a = np.diff(data)
    asign = np.sign(a)
    signchange = ((np.roll(asign, 1) - asign) != 0).astype(int)
    idx = np.where(signchange == 1)[0]

    if typeOfInflexion == 'max' and data[idx[0]] < data[idx[1]]:
        idx = idx[1:][::2]

    elif typeOfInflexion == 'min' and data[idx[0]] > data[idx[1]]:
        idx = idx[1:][::2]
    elif typeOfInflexion is not None:
        idx = idx[::2]

    # sort ids by min value
    if 0 in idx:
        idx = np.delete(idx, 0)
    if (len(data) - 1) in idx:
        idx = np.delete(idx, len(data) - 1)
    idx = idx[np.argsort(data[idx])]
    # If we have maxpoints we want to make sure the timeseries has a cutpoint
    # in each segment, not all on a small interval
    if maxPoints is not None:
        idx = idx[:maxPoints]
        if len(idx) < maxPoints:
            return (np.arange(maxPoints) + 1) * (len(data) // (maxPoints + 1))

    return idx


from scipy import ndimage as ndi


def local_maxima_3D(data, order=1):
    """Detects local maxima in a 3D array

    Parameters
    ---------
    data : 3d ndarray
    order : int
        How many points on each side to use for the comparison

    Returns
    -------
    coords : ndarray
        coordinates of the local maxima
    values : ndarray
        values of the local maxima
    """
    size = 1 + 2 * order
    footprint = np.ones((size, size, size))
    footprint[order, order, order] = 0

    filtered = ndi.maximum_filter(data, footprint=footprint)
    mask_local_maxima = data > filtered
    coords = np.asarray(np.where(mask_local_maxima)).T
    values = data[mask_local_maxima]

    return coords, values

In [210]:
# i = 767
# i = 877
# i = 987 # abnormal?
# i = 452
# i = 356 # good shape
# i = 333 # good
# i = 803 # good not enough
# i = 111 # looks good, using this
# i = 85
# i = 86 # strange
# i = 91  # edge density
i = 106  # should use this
# i = 95 # classic
# i = 99 # classic
trj = np.load(f'../data/SHL_msk3_features/clean_trj_segs.npy', allow_pickle=True)[i]
fs = np.load(f'../data/SHL_msk3_features/clean_multi_feature_segs.npy', allow_pickle=True)[i]
fs_msk = np.load(f'../data/SHL_msk3_features/fs_seg_masks.npy', allow_pickle=True)[i]

d = fs[2]
v = fs[3]
a = fs[4]
jk = fs[5]
hc = fs[7]
hcr = fs[8]

x = trj[0]
y = trj[1]
d_ = np.vstack([x, y]).T

minx, miny = min(x), min(y)

n = len(x)



In [211]:
# plt.figure(figsize=(15,15))
plt.figure()
plt.title('trj')
# plt.scatter(x, y)
plt.plot(x, y, color='red', marker='o', markerfacecolor='white',
         markeredgecolor='#1f77b4', markeredgewidth=1.5)
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.ticklabel_format(style='plain', axis='x', useOffset=False)
plt.show()

#### scale!

In [212]:
from sklearn.preprocessing import MinMaxScaler

target_scale = 100 # 0~100
grid_size = 100

# transform trj to same scale as the grid of kde
scaler = MinMaxScaler(feature_range=(0, 1))
x_s = scaler.fit_transform(x.reshape(-1, 1)).squeeze() * target_scale
y_s = scaler.fit_transform(y.reshape(-1, 1)).squeeze() * target_scale
plt.figure()
plt.title('trj scaled')

plt.plot(x_s, y_s, color='red', marker='o', markerfacecolor='white',
         markeredgecolor='#1f77b4', markeredgewidth=1.5)
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()

#### Create meshgrid

In [213]:
deltaX = (max(x) - min(x)) / 10
deltaY = (max(y) - min(y)) / 10

xmin = min(x) - deltaX
xmax = max(x) + deltaX

ymin = min(y) - deltaY
ymax = max(y) + deltaY

# Create meshgrid
x_mg, y_mg = np.mgrid[xmin:xmax:complex(0, grid_size ), ymin:ymax:complex(0, grid_size )]


#### Boundary correction using mirroring, then do kde
https://kdepy.readthedocs.io/en/latest/examples.html#boundary-correction-using-mirroring

In [214]:
data = np.vstack([x_s, y_s]).T  # use scaled data!!!.

# n_bdry_ext = round(n * .1)
# begin_ext = data[1:n_bdry_ext][::-1] # note: remove 1st repeat element
# end_ext = data[-n_bdry_ext:-1][::-1] #
# data_ext = np.vstack([begin_ext, data, end_ext])

# x_s_mir = np.concatenate([(2 * x_s.min() - x_s)[:-1], x_s, (2 * x_s.max() - x_s)[1:]])
# y_s_mir = np.concatenate([(2 * y_s.min() - y_s)[:-1], y_s, (2 * y_s.max() - y_s)[1:]])
x_s_mir = np.concatenate([(2 * x_s.min() - x_s), x_s, (2 * x_s.max() - x_s)])
y_s_mir = np.concatenate([(2 * y_s.min() - y_s), y_s, (2 * y_s.max() - y_s)])
data_mir = np.vstack([x_s_mir, y_s_mir]).T  # use scaled data!!!.
data_mir.shape  # n points and each point dim

(384, 2)

mirrored, hence the grid_size to do kde is 3 times

In [215]:
grid_size_mir = grid_size * 3

In [216]:
plt.figure()
plt.title('trj scaled mirrored')

plt.plot(x_s_mir, y_s_mir, color='red', marker='o', markerfacecolor='white',
         markeredgecolor='#1f77b4', markeredgewidth=1.5)
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()

## FFTKDE

#### fix grid error:
because i found that if we do not manually generate grid using np.linspace, the peak values in kde detected is lagged!!

https://github.com/tommyod/KDEpy/issues/15

In [217]:
from KDEpy import FFTKDE
# Create 2D grid
kde_grid_x = np.linspace(data_mir.min()-1, data_mir.max()+1, grid_size_mir)  # "-1, +1" is used to ensure range
kde_grid_y = np.linspace(data_mir.min()-1, data_mir.max()+1, grid_size_mir)
kde_grid = np.stack(np.meshgrid(kde_grid_x, kde_grid_y), -1).reshape(-1, 2)
kde_grid[:, [0, 1]] = kde_grid[:, [1, 0]] # Swap indices

In [218]:
fit = FFTKDE().fit(data_mir)
z_kde = fit.evaluate(kde_grid)
z_kde_grid = z_kde.reshape(grid_size_mir, grid_size_mir).T


In [219]:
# xff_mir, yff_mir = np.unique(xy_mir[:, 0]), np.unique(xy_mir[:, 1])
# zffr_mir = z_kde_mir.reshape(grid_size_mir, grid_size_mir).T


In [220]:

plt.figure()
N = 8  # Number of contours
plt.title('contours of mirrored kde')
plt.contour(kde_grid_x, kde_grid_y, z_kde_grid, N, linewidths=0.8, colors="k")
plt.contourf(kde_grid_x, kde_grid_y, z_kde_grid, N, cmap="PuBu")
plt.plot(data[:, 0], data[:, 1], "ok", ms=2)
# plt.gca().invert_yaxis()
# plt.yticks([])
# plt.xticks([])
plt.show()

### MIDDLE PART (NON MIRROR)

#### take out middle part (non mirror)

In [221]:
kde_grid_x_mid = kde_grid_x[grid_size:2 * grid_size]
kde_grid_y_mid = kde_grid_y[grid_size:2 * grid_size]
z_kde_grid_mid = z_kde_grid[grid_size:2 * grid_size, grid_size:2 * grid_size]

#### 3d kde of middle part (non mirror)

In [222]:
fig = plt.figure()
ax = plt.axes(projection='3d')
# surf = ax.plot_surface(xff, yff, zffr, rstride=1, cstride=1, cmap='coolwarm', edgecolor='none')
surf = ax.plot_surface(x_mg, y_mg, z_kde_grid_mid, rstride=1, cstride=1, cmap='coolwarm', edgecolor='none')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('PDF')
ax.set_title(' 3d kde of middle part (non mirror)')
# ax.set_ylim3d( y_mg.max(), y_mg.min(),)
ax.set_xlim3d(x_mg.max(), x_mg.min(), )  # to make the axis values order same as 2d plot

fig.colorbar(surf, shrink=0.5, aspect=5)  # add color bar indicating the PDF
ax.view_init(60, 35)
# plt.show()

#### peak_local_max of middle part

In [223]:

from skimage.feature import peak_local_max

pk_coords_mid = peak_local_max(z_kde_grid_mid, exclude_border=False, threshold_rel=0.3,
                               min_distance=round(grid_size*.05))  # coordinate!!  note the ration .3 and min_dsitance 2

#### show middle part kde image

In [224]:
plt.figure()
plt.title('z_kde_grid_mid')
plt.imshow(z_kde_grid_mid)
plt.plot(pk_coords_mid[:, 1], pk_coords_mid[:, 0], 'r.')  # note columns' order
plt.gca().invert_yaxis()
plt.show()

### ENTIRE PART

#### do  peaklocalmax for entire mirrored result

In [225]:
from skimage.feature import peak_local_max
# coordinate!!  note the ratio and distance
pk_coords_mir = peak_local_max(z_kde_grid, exclude_border=False, threshold_rel=0.1,
                               min_distance=2)
# pk_coords_mir = peak_local_max(z_kde_grid, exclude_border=False,)
pk_coords_mir

array([[100, 100],
       [298, 297],
       [ 52,   4],
       [148, 196],
       [250, 202],
       [  8,  72],
       [192, 128],
       [206, 270],
       [ 73,  14],
       [271, 212],
       [127, 186],
       [297, 291],
       [101, 107],
       [ 99,  93],
       [ 71,  10],
       [129, 190],
       [269, 208],
       [ 81,  26],
       [119, 174],
       [279, 224],
       [288, 262],
       [110, 136],
       [ 90,  64],
       [ 78,  23],
       [122, 177],
       [276, 221],
       [292, 268],
       [106, 130],
       [ 94,  70],
       [245, 212],
       [153, 186],
       [ 47,  14],
       [ 60,   1],
       [140, 199],
       [258, 199],
       [293, 271],
       [105, 127],
       [ 95,  73],
       [ 20,  60],
       [180, 140],
       [ 67,   3],
       [218, 258],
       [133, 197],
       [265, 201],
       [210, 266],
       [188, 132],
       [ 12,  68],
       [287, 256],
       [111, 142],
       [ 89,  58],
       [244, 221],
       [154, 177],
       [288,

In [226]:
def find_peak_repeatedly(data, min_peaks = 3*4, max_peaks = 3*9,threshold_rel=0, min_distance=2):
    """
    To avoid the situation that a very high peak occurred lead to the rest peaks cannot be identified,
    hence we repeatedly find peaks by making already identified peaks equal to zero until the `min_peaks` is met.
    Besides, the `max_peaks` is also required, if peaks more than `max_peaks` is detected, we only need the former highest
    `max_peaks`.

    Returns: indices of peaks

    """
    data_cp = np.copy(data)

    results = None
    n_pks = 0
    while n_pks <= min_peaks:
        pks =  peak_local_max(data_cp, exclude_border=False, threshold_rel=threshold_rel, min_distance=min_distance)
        print(n_pks, pks)
        if results is None:
            results = pks
        else:
            results = np.concatenate([results, pks])
        n_pks += len(pks)
        # results += pks
        for p in pks:
            data_cp[p[0], p[1]] = 0

    if n_pks > max_peaks:
        print([coord for coord in results])
        pk_vals = np.array([[*coord, data[coord[0], coord[1]]] for coord in results])
        # sort by peak val
        pk_vals = pk_vals[pk_vals[:, 2].argsort()][::-1]
        results = pk_vals[:max_peaks, [0, 1]].astype(int)
    return np.array(results)
print()
pk_coords_mir = find_peak_repeatedly(z_kde_grid, threshold_rel=0.1, min_distance=4)
print()


0 [[100 100]
 [298 297]
 [ 52   4]
 [148 196]
 [250 202]
 [  8  72]
 [192 128]
 [206 270]
 [ 73  14]
 [271 212]
 [127 186]
 [ 81  26]
 [119 174]
 [279 224]
 [288 262]
 [110 136]
 [ 90  64]
 [292 268]
 [106 130]
 [ 94  70]
 [245 212]
 [153 186]
 [ 47  14]
 [ 60   1]
 [140 199]
 [258 199]
 [ 20  60]
 [180 140]
 [ 67   3]
 [218 258]
 [133 197]
 [265 201]
 [244 221]
 [154 177]
 [288 249]
 [ 46  23]
 [110 149]
 [ 90  51]]
[array([100, 100]), array([298, 297]), array([52,  4]), array([148, 196]), array([250, 202]), array([ 8, 72]), array([192, 128]), array([206, 270]), array([73, 14]), array([271, 212]), array([127, 186]), array([81, 26]), array([119, 174]), array([279, 224]), array([288, 262]), array([110, 136]), array([90, 64]), array([292, 268]), array([106, 130]), array([94, 70]), array([245, 212]), array([153, 186]), array([47, 14]), array([60,  1]), array([140, 199]), array([258, 199]), array([20, 60]), array([180, 140]), array([67,  3]), array([218, 258]), array([133, 197]), array([2

In [227]:
z_kde_grid[[0,0]]

array([[2.22009910e-16, 2.22061952e-16, 2.22009910e-16, 2.22027258e-16,
        2.21979553e-16, 2.22037558e-16, 2.22046502e-16, 2.22063850e-16,
        2.22025360e-16, 2.22053821e-16, 2.22040268e-16, 2.22055447e-16,
        2.22053279e-16, 2.22044605e-16, 2.22031594e-16, 2.22044605e-16,
        2.22026173e-16, 2.21996900e-16, 2.22068457e-16, 2.22086889e-16,
        2.22040268e-16, 2.22046231e-16, 2.22063850e-16, 2.22033221e-16,
        2.22008826e-16, 2.22023463e-16, 2.22036473e-16, 2.22043250e-16,
        2.22078486e-16, 2.22055854e-16, 2.22045215e-16, 2.22019262e-16,
        2.22025708e-16, 2.22040408e-16, 2.22039168e-16, 2.22032318e-16,
        2.22067278e-16, 2.22027535e-16, 2.22031083e-16, 2.22040670e-16,
        2.22047078e-16, 2.22071100e-16, 2.22046502e-16, 2.22040268e-16,
        2.22018584e-16, 2.22035931e-16, 2.22035931e-16, 2.22072794e-16,
        2.22066289e-16, 2.22053279e-16, 2.22018313e-16, 2.22009436e-16,
        2.22039878e-16, 2.22024971e-16, 2.22008977e-16, 2.220148

#### histogram of kde 2d

In [228]:
plt.figure()
plt.hist(z_kde_grid.reshape(-1), bins=100)
plt.show()

#### show entire kde image

In [229]:
plt.figure()
plt.title('z_kde_grid')
plt.imshow(z_kde_grid)
plt.plot(pk_coords_mir[:, 1], pk_coords_mir[:, 0], 'r.')  # note columns' order
plt.gca().invert_yaxis()
plt.show()

### get peak index

In [230]:
pk_coords_mir_correct = np.vstack(
    [pk_coords_mir[:, 1], pk_coords_mir[:, 0]]).T  # make columns order correct to calculate distance later
pk_coords_mir_correct

array([[100, 100],
       [297, 298],
       [  4,  52],
       [196, 148],
       [202, 250],
       [ 72,   8],
       [128, 192],
       [270, 206],
       [ 14,  73],
       [212, 271],
       [186, 127],
       [ 26,  81],
       [174, 119],
       [224, 279],
       [262, 288],
       [136, 110],
       [ 64,  90],
       [268, 292],
       [130, 106],
       [ 70,  94],
       [212, 245],
       [186, 153],
       [ 14,  47],
       [  1,  60],
       [199, 140],
       [199, 258],
       [ 60,  20]])

find the close one to pk_coord in data

In [231]:
pk_coords_unmir_correct = []
for pk in pk_coords_mir_correct:
    x, y = pk[0], pk[1]
    if x in range(grid_size, 2 * grid_size) and y in range(grid_size, 2 * grid_size):
        pk_coords_unmir_correct.append(pk)
print(pk_coords_unmir_correct)
pk_coords_unmir_correct = np.array(pk_coords_unmir_correct) - grid_size
pk_coords_unmir_correct

[array([100, 100]), array([196, 148]), array([128, 192]), array([186, 127]), array([174, 119]), array([136, 110]), array([130, 106]), array([186, 153]), array([199, 140])]


array([[ 0,  0],
       [96, 48],
       [28, 92],
       [86, 27],
       [74, 19],
       [36, 10],
       [30,  6],
       [86, 53],
       [99, 40]])

In [232]:
from scipy.spatial import distance

pk_point_idx = []
print('pk_coord, min_dist_point, min_dist, min_dist_point_idx')

for pk_coord in pk_coords_unmir_correct:
    min_dist = math.inf
    min_dist_point_idx = -1  # idx
    for i, point in enumerate(data):
        # if i not in range(grid_size, 2*grid_size):
        #     continue
        dist = distance.euclidean(point, pk_coord)
        if dist < min_dist:
            min_dist = dist
            min_dist_point = point
            min_dist_point_idx = i
    print(pk_coord, min_dist_point, min_dist, min_dist_point_idx)
    pk_point_idx.append(min_dist_point_idx)
print(pk_point_idx)

pk_coord, min_dist_point, min_dist, min_dist_point_idx
[0 0] [0. 0.] 0.0 0
[96 48] [96.40872355 48.70072033] 0.8112113868554266 80
[28 92] [28.35095076 92.47029287] 0.5868064572290991 121
[86 27] [85.56235498 26.91277286] 0.44625300014265945 60
[74 19] [74.60063581 19.04992882] 0.6027074480308069 51
[36 10] [36.39979347 10.1001491 ] 0.41214640723998386 29
[30  6] [30.32308851  5.81893732] 0.3703645190332504 25
[86 53] [85.52988086 53.73529361] 0.8727363234704144 88
[99 40] [99.98362606 39.68849734] 1.03177232429288 73
[0, 80, 121, 60, 51, 29, 25, 88, 73]


In [233]:
plt.figure()
# plt.scatter(x, y)
plt.plot(x_s, y_s, color='red', marker='o', markerfacecolor='white',
         markeredgecolor='#1f77b4', markeredgewidth=1.5)
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.ticklabel_format(style='plain', axis='x', useOffset=False)
plt.scatter(x_s[pk_point_idx], y_s[pk_point_idx], c='red', zorder=100)
plt.show()