In [None]:
import open3d as o3d
import numpy as np
import matplotlib.pyplot as plt
from digiforest_analysis.utils.timing import Timer

timer = Timer()

In [None]:
# Visualize segmentation results
def visualize(cloud, labels, method):
    if labels is not None:
        cmap = plt.get_cmap("prism")
        # cmap = plt.get_cmap("nipy_spectral")
        colors = cmap(labels / (max_label if max_label > 0 else 1))
        colors[labels < 0] = 0
        cloud.colors = o3d.utility.Vector3dVector(colors[:, :3])
    o3d.visualization.draw_geometries(
        [cloud],
        # zoom=0.5,
        # front=[0.79, 0.02, 0.60],
        # lookat=[2.61, 2.04, 1.53],
        # up=[-0.60, -0.012, 0.79],
        window_name=method,
    )

In [None]:
# load data
pcd_file = "/home/matias/vilens_slam_data/test_data/forest_cloud.pcd"
pcd = o3d.t.io.read_point_cloud(pcd_file)
print(pcd)
visualize(pcd.to_legacy(), None, "original_cloud")

In [None]:
mask = (pcd.point.normals[:, 2] >= -0.5) & (pcd.point.normals[:, 2] <= 0.5)
pcd = pcd.select_by_mask(mask).to_legacy()

visualize(pcd, None, "filtered_by_normal")

In [None]:
pcd = pcd.voxel_down_sample(voxel_size=0.1)
o3d.visualization.draw_geometries(
    [pcd],
    zoom=0.5,
    front=[0.79, 0.02, 0.60],
    lookat=[2.61, 2.04, 1.53],
    up=[-0.60, -0.012, 0.79],
)

In [None]:
# # plt.scatter(np.asarray(pcd.points)[:,0], np.asarray(pcd.points)[:,1], alpha=0.1, s=1)
# plt.hist2d(
#     np.asarray(pcd.points)[:, 0], np.asarray(pcd.points)[:, 1], bins=200, alpha=1
# )

In [None]:
# DBSCAN (open3d)
eps = 0.8
min_cluster_size = 20
with timer("dbscan_open3d"):
    labels = np.array(
        pcd.cluster_dbscan(eps=eps, min_points=min_cluster_size, print_progress=False)
    )
max_label = labels.max()
print("num_clusters = %d" % (max_label + 1))
print(timer)
visualize(pcd, labels, "dbscan_open3d")

In [None]:
# DBSCAN (sklearn)
from sklearn.cluster import DBSCAN

X = np.asarray(pcd.points)
eps = 0.3
min_cluster_size = 20

with timer("dbscan_sklearn"):
    db = DBSCAN(eps=eps, min_samples=min_cluster_size).fit(X)
labels = db.labels_
max_label = labels.max()

print("num_clusters = %d" % (max_label + 1))
visualize(pcd, labels, "dbscan_sklearn")
print(timer)

In [None]:
# HDBSCAN
from sklearn.cluster import HDBSCAN

X = np.asarray(pcd.points)
min_cluster_size = 20

with timer("hbscan"):
    hdb = HDBSCAN(min_cluster_size=min_cluster_size).fit(X)
labels = hdb.labels_
max_label = labels.max()

print("num_clusters = %d" % (max_label + 1))
visualize(pcd, labels, "hdbscan")
print(timer)

In [None]:
# # OPTICS (O(n^2)- Not Suitable for large pointclouds)
# from sklearn.cluster import OPTICS, cluster_optics_dbscan
# X = np.asarray(pcd.points)[:,:2]
# optics = OPTICS(min_samples=50, xi=0.05, max_eps=1.0, algorithm="kd_tree")
# optics.fit(X)
# labels = cluster_optics_dbscan(reachability=optics.reachability_,
#                                core_distances=optics.core_distances_,
#                                ordering=optics.ordering_,
#                                eps=0.5)
# max_label = labels.max()

# print("num_clusters = %d" % (max_label + 1))
# visualize(pcd, labels)
# print(timer)

In [None]:
# KMeans
from sklearn.cluster import KMeans

num_clusters = 350

X = np.asarray(pcd.points)

with timer("kmeans"):
    labels = KMeans(n_clusters=num_clusters, n_init="auto").fit_predict(X)
max_label = labels.max()

print("num_clusters = %d" % (max_label + 1))
visualize(pcd, labels, "kmeans")
print(timer)

In [None]:
# Bisecting KMeans
from sklearn.cluster import BisectingKMeans

num_clusters = 250

X = np.asarray(pcd.points)[:, :2]
with timer("bisecting_kmeans"):
    labels = BisectingKMeans(n_clusters=num_clusters).fit_predict(X)
max_label = labels.max()

print("num_clusters = %d" % (max_label + 1))
visualize(pcd, labels, "bisecting_kmeans")
print(timer)

In [None]:
# Bisecting KMeans
from sklearn.cluster import MiniBatchKMeans

num_clusters = 250

X = np.asarray(pcd.points)[:, :2]
with timer("minibatch_kmeans"):
    labels = MiniBatchKMeans(n_clusters=num_clusters).fit_predict(X)
max_label = labels.max()

print("num_clusters = %d" % (max_label + 1))
visualize(pcd, labels, "minibatch_kmeans")
print(timer)

In [None]:
# # MeanShift
# from sklearn.cluster import MeanShift

# X = np.asarray(pcd.points)[:,:2]
# with timer("mean_shift"):
#     labels = MeanShift().fit_predict(X)
# max_label = labels.max()

# print("num_clusters = %d" % (max_label + 1))
# visualize(pcd, labels)
# print(timer)

In [None]:
# GaussianMixture
from sklearn.mixture import GaussianMixture

X = np.asarray(pcd.points)[:, :2]
with timer("gmm"):
    labels = GaussianMixture(n_components=100).fit_predict(X)
max_label = labels.max()

print("num_clusters = %d" % (max_label + 1))
visualize(pcd, labels, "gmm")
print(timer)

In [None]:
# # AgglomerativeClustering
# from sklearn.cluster import AgglomerativeClustering

# X = np.asarray(pcd.points)[:,:2]
# with timer("agglomerative"):
#     labels = AgglomerativeClustering(n_clusters=100).fit_predict(X)
# max_label = labels.max()

# print("num_clusters = %d" % (max_label + 1))
# visualize(pcd, labels, "agglomerative")
# print(timer)

In [None]:
# # SpectralClustering
# from sklearn.cluster import SpectralClustering

# X = np.asarray(pcd.points)[:,:2]
# with timer("spectral"):
#     labels = SpectralClustering(n_clusters=100).fit_predict(X)
# max_label = labels.max()

# print("num_clusters = %d" % (max_label + 1))
# visualize(pcd, labels)
# print(timer)

In [None]:
# BIRCH
from sklearn.cluster import Birch

X = np.asarray(pcd.points)[:, :2]
with timer("birch"):
    labels = Birch(n_clusters=None).fit_predict(X)
max_label = labels.max()

print("num_clusters = %d" % (max_label + 1))
visualize(pcd, labels, "birch")
print(timer)

In [None]:
# # Euclidean clustering
# from sklearn.neighbors import KDTree
# X = np.asarray(pcd.points)
# ec = EuclideanClustering(X, distance_threshold = 1.0, min_cluster_size=100, max_cluster_size=10000)
# visualize(pcd, labels)