# Exploration of Point Cloud Data

This notebook is used for exploratory data analysis and visualization of the point cloud data and labels from the training dataset.

In [None]:
import h5py as h5
import numpy as np
import plotly.graph_objects as go

# Load the point cloud data from the HDF5 file
with h5.File('../data/train_xyze_1e4.h5', 'r') as f:
    pts = f['data'][:]

# Load the labels from the HDF5 file
with h5.File('../data/train_label_1e4.h5', 'r') as f:
    labels = f['labels'][:]

# Visualize a sample point cloud
entry = 0  # Change this to visualize different entries
print('Visualizing entry', entry)
d = go.Scatter3d(x=pts[entry][:, 0], y=pts[entry][:, 1], z=pts[entry][:, 2],
                  mode='markers',
                  marker=dict(size=2, color=labels[entry], colorscale='Viridis'))

fig = go.Figure(data=[d])
fig.update_layout(width=1200, height=800,
                  scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'),
                  title='3D Scatter Plot of Point Cloud')
fig.show()

## Summary Statistics

Let's compute some summary statistics for the point cloud data and labels.

In [None]:
# Summary statistics for point cloud data
print('Point cloud data shape:', pts.shape)
print('Labels shape:', labels.shape)

# Unique labels
unique_labels = np.unique(labels)
print('Unique labels:', unique_labels)
print('Number of unique labels:', len(unique_labels))