# How to extract contour components

https://towardsdatascience.com/simple-example-of-2d-density-plots-in-python-83b83b934f67

In [1]:
import math

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
from IPython.display import display
from sklearn.neighbors import KernelDensity
# %matplotlib ipympl
%matplotlib qt5
# %matplotlib widget
# matplotlib.use("nbagg")  # interactive !
# matplotlib.use('Qt5Agg')


def getKernelDensityEstimation(values, x, bandwidth=0.2, kernel='gaussian'):
    model = KernelDensity(kernel=kernel, bandwidth=bandwidth)
    model.fit(values[:, np.newaxis])
    log_density = model.score_samples(x[:, np.newaxis])
    return np.exp(log_density)


def getExtremePoints(data, typeOfInflexion=None, maxPoints=None):
    """
    This method returns the indeces where there is a change in the trend of the input series.
    typeOfInflexion = None returns all inflexion points, max only maximum values and min
    only min,
    """
    a = np.diff(data)
    asign = np.sign(a)
    signchange = ((np.roll(asign, 1) - asign) != 0).astype(int)
    idx = np.where(signchange == 1)[0]

    if typeOfInflexion == 'max' and data[idx[0]] < data[idx[1]]:
        idx = idx[1:][::2]

    elif typeOfInflexion == 'min' and data[idx[0]] > data[idx[1]]:
        idx = idx[1:][::2]
    elif typeOfInflexion is not None:
        idx = idx[::2]

    # sort ids by min value
    if 0 in idx:
        idx = np.delete(idx, 0)
    if (len(data) - 1) in idx:
        idx = np.delete(idx, len(data) - 1)
    idx = idx[np.argsort(data[idx])]
    # If we have maxpoints we want to make sure the timeseries has a cutpoint
    # in each segment, not all on a small interval
    if maxPoints is not None:
        idx = idx[:maxPoints]
        if len(idx) < maxPoints:
            return (np.arange(maxPoints) + 1) * (len(data) // (maxPoints + 1))

    return idx

# plt.ion()
# plt.show()

In [2]:
!pwd

/home/lsc/pyprojs/tmi_mvts_transformer/data_test


In [3]:
# i = 767
# i = 877
# i = 987 # abnormal?
# i = 452
# i = 356 # good shape
# i = 333 # good
# i = 803 # good not enough
i = 111 # looks good, using this
# i = 85
# i = 86 # strange
# i = 91 # edge density
# i = 103
trj = np.load(f'../data/SHL_msk3_features/clean_trj_segs.npy', allow_pickle=True)[i]
fs = np.load(f'../data/SHL_msk3_features/clean_multi_feature_segs.npy', allow_pickle=True)[i]
fs_msk = np.load(f'../data/SHL_msk3_features/fs_seg_masks.npy', allow_pickle=True)[i]

d = fs[2]
v = fs[3]
a = fs[4]
jk = fs[5]
hc = fs[7]
hcr = fs[8]

x = trj[0]
y = trj[1]


minx, miny = min(x), min(y)

n = len(x)



In [4]:
# plt.figure(figsize=(15,15))
plt.figure()
# plt.scatter(x, y)
plt.plot(x, y, color='red', marker='o', markerfacecolor='white',
         markeredgecolor='#1f77b4', markeredgewidth=1.5)
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.ticklabel_format(style='plain', axis='x', useOffset=False)
plt.show()

In [5]:
# plt.figure()
# plt.plot(v)
# vep = getExtremePoints(v)
# plt.scatter(vep, [v[i] for i in vep])
# plt.show()

In [6]:
# plt.figure()
# plt.plot(a)

In [7]:
# plt.figure()
# plt.plot(jk)

In [8]:
# plt.figure()
# plt.plot(hc)

In [9]:
# plt.figure()
# plt.plot(hcr)

## For the KDE representation, choose the boundaries:

In [10]:
deltaX = (max(x) - min(x)) / 10
deltaY = (max(y) - min(y)) / 10

xmin = min(x) - deltaX
xmax = max(x) + deltaX

ymin = min(y) - deltaY
ymax = max(y) + deltaY

print(xmin, xmax, ymin, ymax)

51.17698147112625 51.1784096848067 -0.43061690609143183 -0.42956194899425026


In [11]:
grid_size = 50
# Create meshgrid
xx, yy = np.mgrid[xmin:xmax:complex(0, grid_size), ymin:ymax:complex(0, grid_size)]

## Fit a gaussian kernel

In [12]:
from scipy.stats import gaussian_kde
class GaussianKde(gaussian_kde):
    """
    https://stackoverflow.com/questions/63812970/scipy-gaussian-kde-matrix-is-not-positive-definite
    Drop-in replacement for gaussian_kde that adds the class attribute EPSILON
    to the covmat eigenvalues, to prevent exceptions due to numerical error.
    """

    EPSILON = 1e-10  # adjust this at will

    def _compute_covariance(self):
        """Computes the covariance matrix for each Gaussian kernel using
        covariance_factor().
        """
        self.factor = self.covariance_factor()
        # Cache covariance and inverse covariance of the data
        if not hasattr(self, '_data_inv_cov'):
            self._data_covariance = np.atleast_2d(np.cov(self.dataset, rowvar=1,
                                                         bias=False,
                                                         aweights=self.weights))
            # we're going the easy way here
            self._data_covariance += self.EPSILON * np.eye(
                len(self._data_covariance))
            self._data_inv_cov = np.linalg.inv(self._data_covariance)

        self.covariance = self._data_covariance * self.factor ** 2
        self.inv_cov = self._data_inv_cov / self.factor ** 2
        L = np.linalg.cholesky(self.covariance * 2 * np.pi)
        self._norm_factor = 2 * np.log(np.diag(L)).sum()  # needed for scipy 1.5.2
        self.log_det = 2 * np.log(np.diag(L)).sum()  # changed var name on 1.6.2





In [13]:
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
display(values.shape)
kernel = gaussian_kde(values, bw_method='silverman')
# kernel = GaussianKde(values)

kp = kernel(positions).T
f = np.reshape(kernel(positions).T, xx.shape)
kvt = kernel(values).T


(2, 75)

In [14]:
from sklearn.neighbors import KernelDensity
kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(values.T)
log_density = kde.score_samples(values.T)
density = np.exp(log_density)

In [15]:
fig = plt.figure()
ax = fig.gca()

cfset = ax.contourf(xx, yy, f, cmap='coolwarm')
ax.imshow(np.rot90(f), cmap='coolwarm', extent=[xmin, xmax, ymin, ymax])
cset = ax.contour(xx, yy, f, colors='k')
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('Latitude')
ax.set_ylabel('Longitude')
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
plt.show()

## Render KDE in 3d

In [16]:
from mpl_toolkits.mplot3d import axes3d
fig = plt.figure()
ax = plt.axes(projection='3d')
surf = ax.plot_surface(xx, yy, f, rstride=1, cstride=1, cmap='coolwarm', edgecolor='none')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('PDF')
ax.set_title('Surface plot of Gaussian 2D KDE')
fig.colorbar(surf, shrink=0.5, aspect=5)  # add color bar indicating the PDF
ax.view_init(60, 35)
plt.show()

In [17]:
# fig = plt.figure()
# ax = plt.axes(projection='3d')
# w = ax.plot_wireframe(xx, yy, f)
# ax.set_xlabel('x')
# ax.set_ylabel('y')
# ax.set_zlabel('PDF')
# ax.set_title('Wireframe plot of Gaussian 2D KDE');
# plt.show()

## How to extract plot lines?

In [18]:
# plt.figure()
#
# for j in range(len(cset.allsegs)):
#     for ii, seg in enumerate(cset.allsegs[j]):
#         plt.plot(seg[:, 0], seg[:, 1], '.-', label=f'Cluster{j}, level{ii}')
#
# plt.legend()


## Render data with histogram 2d

Use histogram 2d to get the areas with the highest surface

In [35]:
plt.figure()
h = plt.hist2d(x, y, bins=64)
plt.colorbar(h[3])
plt.xlabel('x')
plt.ylabel('y')
plt.title('Frequency histogram')
plt.show()

In [20]:
plt.figure()
plt.imshow(f)
plt.show()

In [21]:
from skimage.feature import peak_local_max

peaks = peak_local_max(f, min_distance=1)
peaks


array([[19, 43],
       [37, 14],
       [36, 16]])

## using MinMaxScaler!!

In [22]:
from sklearn.preprocessing import MinMaxScaler
# transform trj to same scale as the grid of kde
scaler = MinMaxScaler(feature_range=(-1, 1))
x_s = scaler.fit_transform(x.reshape(-1, 1)).squeeze()*grid_size
y_s = scaler.fit_transform(y.reshape(-1, 1)).squeeze()*grid_size
plt.figure()
plt.plot(x_s, y_s, color='red', marker='o', markerfacecolor='white',
         markeredgecolor='#1f77b4', markeredgewidth=1.5)
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.show()

In [23]:
import numpy
import math
from scipy.spatial import distance
trj_s = np.array([x_s, y_s]).T
peak_point_idx = []
for pk in peaks:
    min_dist = math.inf
    min_dist_point = -1 # idx
    for i, point in enumerate(trj_s):
        dist=distance.euclidean(point, pk)
        if dist <min_dist:
            min_dist = dist
            min_dist_point = i
    peak_point_idx.append(min_dist_point)
peak_point_idx




[47, 51, 51]

In [24]:
ep_kvt = getExtremePoints(kvt)


In [25]:
plt.figure()
# plt.scatter(x, y)
plt.plot(x_s, y_s, color='red', marker='o', markerfacecolor='white',
         markeredgecolor='#1f77b4', markeredgewidth=1.5)
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.ticklabel_format(style='plain', axis='x', useOffset=False)
plt.scatter(x_s[peak_point_idx], y_s[peak_point_idx], c='red', zorder=100)
plt.scatter(x_s[ep_kvt], y_s[ep_kvt], c='blue', zorder=200)
plt.show()

## I finally use KDEpy
data should be scaled !!!!

In [26]:
from KDEpy import FFTKDE
data = np.array([x_s, y_s]).T  # use scaled data!!!
grid_points = grid_size # Grid points in each dimension
N = 8  # Number of contours
xy, z_kde = FFTKDE().fit(data)((grid_points, grid_points))
xy.shape, z_kde.shape


((2500, 2), (2500,))

In [27]:
xff, yff = np.unique(xy[:, 0]), np.unique(xy[:, 1])
zffr = z_kde.reshape(grid_points, grid_points).T
plt.figure()
plt.contour(xff, yff, zffr, N, linewidths=0.8, colors="k")
plt.contourf(xff, yff, zffr, N, cmap="PuBu")
plt.plot(data[:, 0], data[:, 1], "ok", ms=2)
# plt.yticks([])
# plt.xticks([])
plt.show()

In [28]:
fig = plt.figure()
ax = plt.axes(projection='3d')
# surf = ax.plot_surface(xff, yff, zffr, rstride=1, cstride=1, cmap='coolwarm', edgecolor='none')
surf = ax.plot_surface(xx, yy, zffr, rstride=1, cstride=1, cmap='coolwarm', edgecolor='none')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('PDF')
ax.set_title(' 2D pyKDE')
fig.colorbar(surf, shrink=0.5, aspect=5)  # add color bar indicating the PDF
ax.view_init(60, 35)
# plt.show()

In [29]:
pk_zffr = peak_local_max(zffr, threshold_rel=.3, min_distance=2) # coordinate!!  note the ration .3 and min_dsitance 2

plt.figure()
plt.imshow(zffr)
plt.plot(pk_zffr[:, 1], pk_zffr[:, 0], 'r.')
plt.show()

In [30]:
from scipy.spatial import distance
trj_s = np.array([x_s, y_s]).T
peak_point_idx = []
for pk in peaks:
    min_dist = math.inf
    min_dist_point = -1 # idx
    for i, point in enumerate(trj_s):
        dist=distance.euclidean(point, pk)
        if dist <min_dist:
            min_dist = dist
            min_dist_point = i
    peak_point_idx.append(min_dist_point)
peak_point_idx
np.unique(peak_point_idx)

array([47, 51])

In [31]:
plt.figure()
# plt.scatter(x, y)
plt.plot(x_s, y_s, color='red', marker='o', markerfacecolor='white',
         markeredgecolor='#1f77b4', markeredgewidth=1.5)
plt.xlabel("Latitude")
plt.ylabel("Longitude")
plt.ticklabel_format(style='plain', axis='x', useOffset=False)
plt.scatter(x_s[peak_point_idx], y_s[peak_point_idx], c='red', zorder=100)
# plt.scatter(x_s[ep_kvt], y_s[ep_kvt], c='blue', zorder=200)
plt.show()