In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import open3d as o3d
import numpy as np
import cupoch as cph
import argparse
from sklearn.metrics import silhouette_score
from torchclustermetrics import silhouette
from cupoch import geometry

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
columns = ['point_id', 'frame', 'x', 'y', 'z']


def get_file_path(name):
    file_path = '../Results/MapPoints_' + str(name) + '.csv'
    return file_path


def get_dataframe(name):
    dataframe = pd.read_csv(get_file_path(name), names=columns)
    return dataframe


def get_number_of_unique_points(dataframe):
    unique_points = dataframe['point_id'].nunique()
    return unique_points


def read_in_ply(input_file):
    pointcloud = cph.io.read_point_cloud("../Visualize/" + input_file)
    return pointcloud

def data_mining(name):
    df = get_dataframe(name)
    unique_points = get_number_of_unique_points(df)

In [3]:
filename = 'original'
df = get_dataframe(filename)
pointcloud = read_in_ply('..\Results\original\\1\original.ply')
pointcloud.dimension

<bound method PyCapsule.dimension of geometry::PointCloud with 1687273 points.>

In [4]:

def get_silhouette(pointcloud, labels):
    original_data = np.array(pointcloud.points.cpu())
    silhouette_avg = silhouette_score(original_data, labels)
    return silhouette_avg

def cluster_old(pointcloud, eps=0.1, min_points=10):
    labels = np.array(pointcloud.cluster_dbscan(eps=eps, min_points=min_points).cpu())
    max_label = labels.max()
    print("Point cloud has %d clusters" % (max_label + 1))

    # Count points in each cluster
    unique_labels, counts = np.unique(labels, return_counts=True)

    # Prepare data for export
    cluster_data = {'ClusterLabel': [], 'NumPoints': []}

    for label, count in zip(unique_labels, counts):
        cluster_data['ClusterLabel'].append(label)
        cluster_data['NumPoints'].append(count)

        print(f"Cluster {label} has {count} points")

    # Create a DataFrame for export
    cluster_df = pd.DataFrame(cluster_data)
    return labels, cluster_df


def cluster(pointcloud, eps, min_points):
    labels = np.array(pointcloud.cluster_dbscan(eps=eps, min_points=min_points).cpu())
    max_label = labels.max()
    print("Point cloud has %d clusters" % (max_label + 1))

    # Count points in each cluster
    unique_labels, counts = np.unique(labels, return_counts=True)

    # Prepare data for export
    cluster_data = {'ClusterLabel': [], 'NumPoints': []}

    for label, count in zip(unique_labels, counts):
        cluster_data['ClusterLabel'].append(label)
        cluster_data['NumPoints'].append(count)

        print(f"Cluster {label} has {count} points")

    # Create a DataFrame for export
    cluster_df = pd.DataFrame(cluster_data)

    # Calculate Silhouette Score using the original data
    #silhouette_avg = get_silhouette(pointcloud, labels)
    #print(f"Silhouette Score: {silhouette_avg}")

    # Add Silhouette Score to the DataFrame
    #cluster_df['SilhouetteScore'] = silhouette_avg

    return labels, cluster_df

In [5]:
def visualize(pointcloud, labels):
    cmap = plt.get_cmap("viridis")
    max_label = labels.max()
    colors = cmap(labels / (max_label if max_label > 0 else 1))
    # colors[labels < 0] = 0
    pointcloud.colors = cph.utility.Vector3fVector(colors[:, :3])
    cph.visualization.draw_geometries([pointcloud])

In [6]:
def save_results(df_data, df_cluster, output_csv_file):
    df_data.to_csv(output_csv_file + '.csv', index=False)

# Main

In [7]:
labels, clusterdf = cluster(pointcloud, eps=0.1, min_points=10)

Point cloud has 217 clusters
Cluster -1 has 57877 points
Cluster 1 has 49 points
Cluster 2 has 29 points
Cluster 3 has 14 points
Cluster 4 has 242 points
Cluster 5 has 15 points
Cluster 6 has 13 points
Cluster 7 has 112 points
Cluster 8 has 29 points
Cluster 9 has 23 points
Cluster 10 has 45 points
Cluster 11 has 25 points
Cluster 12 has 17 points
Cluster 13 has 19 points
Cluster 14 has 12 points
Cluster 15 has 15 points
Cluster 16 has 13 points
Cluster 17 has 26 points
Cluster 18 has 13 points
Cluster 19 has 28 points
Cluster 20 has 20 points
Cluster 21 has 16 points
Cluster 22 has 20 points
Cluster 23 has 116 points
Cluster 24 has 14 points
Cluster 25 has 15 points
Cluster 26 has 18 points
Cluster 27 has 1 points
Cluster 28 has 78 points
Cluster 29 has 16 points
Cluster 30 has 58 points
Cluster 31 has 2 points
Cluster 32 has 3 points
Cluster 33 has 15 points
Cluster 34 has 18 points
Cluster 35 has 18 points
Cluster 36 has 42 points
Cluster 37 has 1 points
Cluster 38 has 12 points
Clu

In [8]:
print(clusterdf)

     ClusterLabel  NumPoints
0              -1      57877
1               1         49
2               2         29
3               3         14
4               4        242
..            ...        ...
212           212         11
213           213         11
214           214         11
215           215         11
216           216         11

[217 rows x 2 columns]


In [9]:
print(df)

         point_id  frame         x          y         z
0               0      4  18.16210 -14.758000  32.79170
1               1      4  32.73420  -0.329008  50.00960
2               2      4  20.40610 -10.003400  40.01140
3               3      4  20.55920 -13.848400  16.91910
4               4      4  23.13690 -13.108900  22.85620
...           ...    ...       ...        ...       ...
1687268    200019   2424 -13.06340   4.567680   5.40135
1687269    200020   2424  -8.95971   4.597340   2.83931
1687270    200021   2424 -10.62660  11.524500   7.74365
1687271    200022   2424  -8.41464   1.494700   5.59172
1687272    200023   2424  -9.97235   2.479570   4.52479

[1687273 rows x 5 columns]


In [10]:
# Assuming your DataFrame is named 'df'
duplicated_rows = df[df.duplicated()]

# Number of repeated observations
num_repeated_observations = len(duplicated_rows)

print(f"Number of repeated observations: {num_repeated_observations}")

Number of repeated observations: 12871


In [11]:
len(labels)

1687273

In [12]:
df['ClusterLabel'] = labels

In [13]:
print(df)

         point_id  frame         x          y         z  ClusterLabel
0               0      4  18.16210 -14.758000  32.79170            -1
1               1      4  32.73420  -0.329008  50.00960            -1
2               2      4  20.40610 -10.003400  40.01140            -1
3               3      4  20.55920 -13.848400  16.91910            -1
4               4      4  23.13690 -13.108900  22.85620            -1
...           ...    ...       ...        ...       ...           ...
1687268    200019   2424 -13.06340   4.567680   5.40135            -1
1687269    200020   2424  -8.95971   4.597340   2.83931            -1
1687270    200021   2424 -10.62660  11.524500   7.74365            -1
1687271    200022   2424  -8.41464   1.494700   5.59172            -1
1687272    200023   2424  -9.97235   2.479570   4.52479            -1

[1687273 rows x 6 columns]


In [14]:
visualize(pointcloud, labels)