In [29]:
# Step 1: Prepare Your Data
import os
import pandas as pd

# List STL files in your dataset folder
dataset_folder = r'D:\research\small_big_flat_free\dataset- check'
stl_files = [f for f in os.listdir(dataset_folder) if f.lower().endswith('.stl')]

# Read manual labels from CSV
labels_df = pd.read_csv(r'D:\research\small_big_flat_free\label.csv')
print(labels_df.head())

     Model ID       Model Type
0   41106.stl       FLAT MODEL
1  137749.stl  FREE-FORM MODEL
2   32770.stl  FREE-FORM MODEL
3   34783.stl  FREE-FORM MODEL
4   34784.stl  FREE-FORM MODEL


In [30]:
# Check unique label values in the updated label CSV
print('Unique label values:', labels_df.iloc[:,1].unique())

Unique label values: ['FLAT MODEL' 'FREE-FORM MODEL' nan]


In [31]:
# Step 2: Convert STL Files to Point Clouds
import trimesh
import numpy as np
import os

def stl_to_pointcloud(stl_path, n_points=1024):
    mesh = trimesh.load(stl_path)
    points, _ = trimesh.sample.sample_surface(mesh, n_points)
    return points

# Create the pointclouds directory if it doesn't exist
os.makedirs('pointclouds', exist_ok=True)

for stl_file in stl_files:
    stl_path = os.path.join(dataset_folder, stl_file)
    points = stl_to_pointcloud(stl_path)
    np.save(f"pointclouds/{os.path.splitext(stl_file)[0]}.npy", points)

In [33]:
# Step 3: Preprocess Point Clouds (Canonicalization)
from sklearn.decomposition import PCA
import os

def canonicalize_pointcloud(points):
    # Center
    centered = points - np.mean(points, axis=0)
    # PCA alignment
    pca = PCA(n_components=3)
    aligned = pca.fit_transform(centered)
    # Sort by coordinates for consistency
    sorted_points = aligned[np.lexsort((aligned[:,2], aligned[:,1], aligned[:,0]))]
    return sorted_points

import glob
os.makedirs('canonical', exist_ok=True)
for npy_file in glob.glob('pointclouds/*.npy'):
    points = np.load(npy_file)
    canonical = canonicalize_pointcloud(points)
    np.save(f"canonical/{os.path.basename(npy_file)}", canonical)

In [35]:
# Step 4: Match Point Clouds with Labels
canonical_folder = 'canonical'
matched = []
print('Label CSV columns:', labels_df.columns)  # Debug: print columns to find correct name
# Update 'model' to the actual column name if needed, e.g., 'filename' or similar
for npy_file in glob.glob(f'{canonical_folder}/*.npy'):
    model_name = os.path.splitext(os.path.basename(npy_file))[0] + '.stl'  # Add .stl to match CSV
    # Change 'model' to the correct column name below
    label_row = labels_df[labels_df.iloc[:,0] == model_name]  # Use first column for matching
    if not label_row.empty:
        label = label_row.iloc[0,1]  # Use second column for label
        matched.append((npy_file, label))
print(matched[:5])

Label CSV columns: Index(['Model ID', 'Model Type'], dtype='object')
[('canonical\\103742.npy', 'FLAT MODEL'), ('canonical\\103815.npy', 'FLAT MODEL'), ('canonical\\103817.npy', 'FLAT MODEL'), ('canonical\\103821.npy', 'FLAT MODEL'), ('canonical\\103824.npy', 'FREE-FORM MODEL')]


In [36]:
# Check label distribution in matched dataset
from collections import Counter
labels = [label for _, label in matched]
label_counts = Counter(labels)
print('Label distribution:', label_counts)

Label distribution: Counter({'FREE-FORM MODEL': 124, 'FLAT MODEL': 121})


In [37]:
# Step 5: Build the CanonNet Model (MLP)
!pip install torch
import torch
import torch.nn as nn

class CanonNet(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, num_classes=2):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, num_classes)
        )
    def forward(self, x):
        return self.mlp(x)

# Example: input_dim = 1024*3 if using 1024 points, each with 3 coordinates
model = CanonNet(input_dim=1024*3)



In [39]:
# Step 6: Train the Model
from torch.utils.data import Dataset, DataLoader

class PointCloudDataset(Dataset):
    def __init__(self, matched):
        self.data = matched
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        npy_file, label = self.data[idx]
        points = np.load(npy_file).flatten()
        label_idx = 0 if label == 'flat' else 1
        return torch.tensor(points, dtype=torch.float32), torch.tensor(label_idx)

dataset = PointCloudDataset(matched)

if len(dataset) == 0:
    print('No matched data found. Please check your labels and point cloud files.')
else:
    train_loader = DataLoader(dataset, batch_size=16, shuffle=True)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    for epoch in range(10):
        for x, y in train_loader:
            out = model(x)
            loss = criterion(out, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 1, Loss: 0.0000
Epoch 2, Loss: 0.0000
Epoch 3, Loss: 0.0000
Epoch 4, Loss: 0.0000
Epoch 5, Loss: 0.0000
Epoch 6, Loss: 0.0000
Epoch 4, Loss: 0.0000
Epoch 5, Loss: 0.0000
Epoch 6, Loss: 0.0000
Epoch 7, Loss: 0.0000
Epoch 8, Loss: 0.0000
Epoch 9, Loss: 0.0000
Epoch 7, Loss: 0.0000
Epoch 8, Loss: 0.0000
Epoch 9, Loss: 0.0000
Epoch 10, Loss: 0.0000
Epoch 10, Loss: 0.0000


In [40]:
# Step 7: Evaluate the Model
from sklearn.metrics import accuracy_score, confusion_matrix

# Split matched data for validation (simple split)
val_data = matched[:len(matched)//5]
val_dataset = PointCloudDataset(val_data)
val_loader = DataLoader(val_dataset, batch_size=16)

all_preds = []
all_labels = []
model.eval()
with torch.no_grad():
    for x, y in val_loader:
        out = model(x)
        preds = torch.argmax(out, dim=1)
        all_preds.extend(preds.numpy())
        all_labels.extend(y.numpy())

print("Accuracy:", accuracy_score(all_labels, all_preds))
print("Confusion Matrix:\n", confusion_matrix(all_labels, all_preds))

Accuracy: 1.0
Confusion Matrix:
 [[49]]




In [45]:
# Step 8: Use the Model for New Data
def classify_new_stl(stl_path, model):
    points = stl_to_pointcloud(stl_path)
    canonical = canonicalize_pointcloud(points)
    x = torch.tensor(canonical.flatten(), dtype=torch.float32).unsqueeze(0)
    out = model(x)
    pred = torch.argmax(out, dim=1).item()
    return 'flat' if pred == 0 else 'free form'

# Example usage: classify one STL file from raw_meshes folder
single_stl = r'D:\research\small_big_flat_free\raw_meshes\52139.stl'  # Change to your STL filename
result = classify_new_stl(single_stl, model)
print(f"{single_stl}: {result}")

D:\research\small_big_flat_free\raw_meshes\52139.stl: free form
