# FiftyOne + Crops3D

This notebook demonstrates how to:
1. Install and set up FiftyOne
2. Load the crops3d dataset from Hugging Face
3. Compute geometric embeddings from 3D point clouds
4. Visualize embeddings and perform similarity search

**Dataset**: crops3d - 1,180 3D point cloud scans of agricultural crops

## 1. Installation and Setup

Install FiftyOne and required libraries for 3D point cloud processing.

In [None]:
# Install FiftyOne and dependencies
!pip install fiftyone

# Install Hugging Face Hub for dataset download
!pip install huggingface_hub

# Install 3D processing libraries
!pip install open3d trimesh

# Install visualization dependencies
!pip install umap-learn scikit-learn

## 2. Download and Load the crops3d Dataset

The crops3d dataset contains PLY files (point clouds) of various agricultural crops.

In [None]:
import json
import os
from huggingface_hub import snapshot_download, login
import fiftyone as fo

# Optional: Set your Hugging Face token to avoid rate limits
HF_TOKEN = None  # Replace with your token or set as environment variable

if HF_TOKEN:
    login(token=HF_TOKEN)

print("Downloading crops3d dataset from Hugging Face...")
print("This may take a while as the dataset contains ~2.3GB of 3D data...\n")

# Download the dataset
snapshot_download(
    repo_id="Voxel51/crops3d",
    local_dir="./crops3d_data",
    repo_type="dataset"
)

print("Download completed!")

In [None]:
# Load dataset into FiftyOne
dataset = fo.Dataset.from_dir(
    dataset_dir="./crops3d_data",
    dataset_type=fo.types.FiftyOneDataset,
    name="crops3d"
)

print(f"Dataset loaded: {dataset.name}")
print(f"Number of samples: {len(dataset)}")
print(f"Media type: {dataset.media_type}")

In [None]:
# Fix PLY paths to be absolute (required for FiftyOne 3D visualization)
def update_dataset_ply_paths(dataset):
    """Update PLY file paths in FiftyOne 3D dataset to use absolute paths."""
    for sample in dataset:
        fo3d_filepath = sample.filepath
        fo3d_directory = os.path.dirname(fo3d_filepath)
        
        with open(fo3d_filepath, 'r') as f:
            fo3d_data = json.load(f)
        
        for child in fo3d_data.get('children', []):
            if child.get('_type') == 'PlyMesh' and 'plyPath' in child:
                child['plyPath'] = os.path.join(fo3d_directory, child['plyPath'])
        
        with open(fo3d_filepath, 'w') as f:
            json.dump(fo3d_data, f, indent=2)

print("Updating PLY paths...")
update_dataset_ply_paths(dataset)
print("Paths updated!")

## 3. Computing Embeddings from 3D Point Clouds

We'll extract geometric and color features from the PLY files to create meaningful embeddings.

In [None]:
# WARNING! This might take a while

import numpy as np
import open3d as o3d
from pathlib import Path
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

def compute_point_cloud_features(ply_path):
    """
    Features extracted:
    - Spatial: height, width, volume, density (17 features)
    - Color: RGB statistics, vegetation indices (11 features)
    - Shape: surface normals, roughness (2 features)
    
    Returns:
        numpy array of 30 features
    """
    try:
        # Load point cloud
        pcd = o3d.io.read_point_cloud(str(ply_path))
        points = np.asarray(pcd.points)
        colors = np.asarray(pcd.colors) if pcd.has_colors() else None
        
        features = []
        
        # === SPATIAL FEATURES (important for crop structure) ===
        if len(points) > 0:
            # Height features (Z-axis)
            z_coords = points[:, 2]
            features.extend([
                np.max(z_coords),           # Max height (plant height)
                np.min(z_coords),           # Min height
                np.mean(z_coords),          # Mean height
                np.std(z_coords),           # Height variation
                np.percentile(z_coords, 90) 
            ])
            
            # Horizontal spread (canopy width)
            x_coords = points[:, 0]
            y_coords = points[:, 1]
            features.extend([
                np.ptp(x_coords),  # X range (width)
                np.ptp(y_coords),  # Y range (depth)
                np.std(x_coords),  # X spread
                np.std(y_coords)   # Y spread
            ])
            
            # Centroid
            centroid = np.mean(points, axis=0)
            features.extend(centroid) 
            
            # Volume estimation 
            if len(points) > 10:
                hull, _ = pcd.compute_convex_hull()
                hull_volume = hull.get_volume()
                features.append(np.log1p(hull_volume))  # Log scale
            else:
                features.append(0)
            
            # Point density
            features.append(len(points))  # Total points
            bbox_volume = np.ptp(x_coords) * np.ptp(y_coords) * np.ptp(z_coords)
            if bbox_volume > 0:
                features.append(len(points) / bbox_volume)  # Density
            else:
                features.append(0)
            
            # Distance from center statistics
            distances = np.linalg.norm(points - centroid, axis=1)
            features.extend([
                np.mean(distances),  # Mean radius
                np.std(distances)    # Radius variation
            ])
        else:
            features.extend([0] * 17)
        
        # === COLOR FEATURES (important for plant health) ===
        if colors is not None and len(colors) > 0:
            # RGB statistics
            features.extend([
                np.mean(colors[:, 0]),  # Mean red
                np.mean(colors[:, 1]),  # Mean green
                np.mean(colors[:, 2]),  # Mean blue
                np.std(colors[:, 0]),   # Red variation
                np.std(colors[:, 1]),   # Green variation
                np.std(colors[:, 2])    # Blue variation
            ])
            
            # Vegetation indices
            r, g, b = colors[:, 0], colors[:, 1], colors[:, 2]
            
            # Greenness index (higher = healthier vegetation)
            greenness = g - 0.5 * (r + b)
            features.extend([
                np.mean(greenness),
                np.std(greenness)
            ])
            
            # Normalized Green-Red Difference Index
            with np.errstate(divide='ignore', invalid='ignore'):
                ngrd = (g - r) / (g + r + 1e-10)
                ngrd = np.nan_to_num(ngrd)
            features.append(np.mean(ngrd))
            
            # Color entropy (diversity)
            hist, _ = np.histogramdd(colors, bins=8)
            hist = hist.flatten() + 1e-10
            hist = hist / hist.sum()
            entropy = -np.sum(hist * np.log(hist))
            features.append(entropy)
            
            # Dominant color channel
            mean_colors = [np.mean(r), np.mean(g), np.mean(b)]
            features.append(np.argmax(mean_colors))  # 0=red, 1=green, 2=blue
        else:
            features.extend([0] * 11)
        
        # === SHAPE FEATURES ===
        if len(points) > 100:
            # Estimate normals
            pcd.estimate_normals()
            normals = np.asarray(pcd.normals)
            
            if len(normals) > 0:
                # Normal variation (surface roughness)
                features.append(np.std(normals[:, 2])) 
                
                # Planarity (how flat is the top)
                z_normals = np.abs(normals[:, 2])
                features.append(np.mean(z_normals))
            else:
                features.extend([0] * 2)
        else:
            features.extend([0] * 2)
            
        return np.array(features, dtype=np.float32)
        
    except Exception as e:
        print(f"Error processing {ply_path}: {e}")
        return np.zeros(30, dtype=np.float32)

In [None]:
# Compute embeddings for all samples
print("Computing geometric features for each point cloud...")
print("This may take a while for 1,180 samples...\n")

embeddings = []
sample_ids = []

# Process samples
for sample in tqdm(dataset.iter_samples(), total=len(dataset), desc="Processing"):
    # Get PLY file path
    ply_path = Path(sample.filepath).with_suffix('.ply')
    if not ply_path.exists():
        ply_path = Path(sample.filepath.replace('.fo3d', '.ply'))
    
    # Compute features
    features = compute_point_cloud_features(ply_path)
    embeddings.append(features)
    sample_ids.append(sample.id)

embeddings = np.array(embeddings)
print(f"\nComputed {embeddings.shape[0]} embeddings of dimension {embeddings.shape[1]}")

## 4. Add Embeddings to FiftyOne for Visualization

In [None]:
import fiftyone.brain as fob
from sklearn.preprocessing import StandardScaler

# Normalize embeddings
scaler = StandardScaler()
embeddings_normalized = scaler.fit_transform(embeddings)

# Add UMAP visualization
print("Computing UMAP visualization...")
fob.compute_visualization(
    dataset,
    embeddings=embeddings_normalized,
    brain_key="geometric_embeddings",
    method="umap",
    num_dims=2,
    verbose=False,
    seed=42
)

# Add similarity index
print("Building similarity index...")
fob.compute_similarity(
    dataset,
    embeddings=embeddings_normalized,
    brain_key="geometric_similarity",
    metric="euclidean"
)

print("âœ… Embeddings added to dataset!")

## 5. Launch FiftyOne App for Visualization

In [None]:
# Launch the FiftyOne App
session = fo.launch_app(dataset, port=5151)

print("ðŸš€ FiftyOne App launched!")
print("\nðŸ“Š Available in the Embeddings panel:")
print("  â€¢ geometric_embeddings - UMAP visualization")
print("  â€¢ geometric_similarity - For finding similar crops")