In [75]:
# import mpld3
# mpld3.enable_notebook()
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import DBSCAN
from matplotlib import pyplot as plt
from mpl_toolkits import mplot3d
import seaborn as sns
from kneed import KneeLocator
%matplotlib inline

In [76]:
path = "3Ddata/contrails1/0.2.csv"

# path = "data/contrails1/0.2.csv"
df = pd.read_csv(path)

icedf = df[df.iceLabel == 1]

clusterdf = icedf[['Points:0', 'Points:1', 'Points:2']]

data = clusterdf[["Points:0","Points:1","Points:2"]].to_numpy()

%matplotlib notebook
fig = plt.figure(figsize=(10,10))
ax = plt.axes(projection='3d')
ax.scatter3D(data[:,0], data[:,1], data[:,2], s=2.5)
# ax.view_init(azim=200)
plt.show()

<IPython.core.display.Javascript object>

In [77]:
minimumPoints = icedf.origId.value_counts().max()

In [78]:
neigh = NearestNeighbors(n_neighbors=3)
nbrs = neigh.fit(data)
distances, indices = nbrs.kneighbors(data)

In [79]:
distances = np.sort(distances, axis=0)
distances = distances[:,1]
print(len(distances))
%matplotlib notebook
plt.plot(distances)

53428


<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1551d624908>]

In [80]:
kneedle = KneeLocator(range(1,len(distances)+1),  #x values
                      distances, # y values
                      S=1.0, #parameter suggested from paper
                      curve="concave", #parameter from figure
                      direction="increasing", #parameter from figure
                      online=True
                     ) 

kneedle.plot_knee()
print(kneedle.knee_y)
epsilon = kneedle.knee_y

<IPython.core.display.Javascript object>

0.126912965856111


In [83]:
m = DBSCAN(eps=epsilon, min_samples=minimumPoints)
m.fit(data)

DBSCAN(eps=0.126912965856111, min_samples=38)

In [84]:
clusters = m.labels_
print("number of cluster found: {}".format(len(set(m.labels_))))
print('cluster for each point: ', m.labels_)

# for c in clusters:
#     print(c)

number of cluster found: 6
cluster for each point:  [0 0 0 ... 4 4 4]


In [85]:
# colors = ['royalblue', 'maroon', 'forestgreen', 'mediumorchid', 'tan', 'deeppink', 'olive', 'goldenrod', 'lightcyan', 'navy']
colors = ['red', 'yellow', 'orange', 'green', 'blue', 'black']
vectorizer = np.vectorize(lambda x: colors[x % len(colors)])

In [86]:
fig = plt.figure(figsize=(15,15))
ax = plt.axes(projection='3d')
ax.scatter3D(data[:,0], data[:,1], data[:,2], c=vectorizer(clusters), s=10)
# ax.view_init(0)
plt.show()

<IPython.core.display.Javascript object>