In [34]:
# Cell 1: Imports & Config
import os
import time
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import (
    accuracy_score, balanced_accuracy_score,
    f1_score, precision_score, confusion_matrix, classification_report
)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from pathlib import Path

import sys
sys.path.append("../Utils")
from models  import DGCNN  #  DGCNN & PointNet implementation
import configs
# Paths

TRAIN_DIR = configs.TRAIN_DIR
TEST_DIR = configs.TEST_DIR
MODEL_DIR = configs.MODEL_DIR


# Config class
class CFG:
    num_points = 1024
    batch_size = 16
    epochs = 20
    lr = 1e-3
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    k = 20
    emb_dims = 1024
    dropout = 0.5
    num_classes = len(os.listdir(TRAIN_DIR))


In [None]:
# Verify and setup environment
import random

# 1. Set random seeds for reproducibility
torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
random.seed(42)

# 2. Verify paths
for path_name, path in [("TRAIN_DIR", TRAIN_DIR), ("TEST_DIR", TEST_DIR), ("MODEL_DIR", MODEL_DIR)]:
    path = Path(path)
    if not path.exists() and path_name != "MODEL_DIR":
        raise ValueError(f"{path_name} does not exist: {path}")
    elif path_name == "MODEL_DIR":
        os.makedirs(path, exist_ok=True)
    print(f"‚úì {path_name}: {path}")

# 3. GPU setup if available
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print(f"‚úì Using GPU: {torch.cuda.get_device_name()}")
    print(f"‚úì GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("‚ö†Ô∏è Using CPU - training might be slow")

In [32]:
# Cell 2: Dataset + Utils
def load_point_file_safe(file_path):
    """Load a point cloud file with flexible delimiters and varying columns."""
    for delim in [None, " ", ",", "\t"]:
        try:
            pc = np.loadtxt(file_path, delimiter=delim)
            if pc.ndim == 1:  # single point
                pc = pc[np.newaxis, :]
            return pc
        except Exception:
            continue
    raise ValueError(f"Could not read {file_path}")

def normalize_unit(pc):
    centroid = np.mean(pc, axis=0)
    pc = pc - centroid
    m = np.max(np.sqrt(np.sum(pc**2, axis=1)))
    return pc / m

def farthest_point_sampling(pc, n_points):
    """Sample N farthest points from a point cloud."""
    N, _ = pc.shape
    if N <= n_points:
        return np.pad(pc, ((0, n_points-N), (0, 0)))
    centroids = np.zeros((n_points,))
    distance = np.ones(N) * 1e10
    farthest = np.random.randint(0, N)
    for i in range(n_points):
        centroids[i] = farthest
        dist = np.sum((pc - pc[farthest])**2, axis=1)
        distance = np.minimum(distance, dist)
        farthest = np.argmax(distance)
    return pc[centroids.astype(np.int32)]

class PointCloudDataset(Dataset):
    def __init__(self, root_dir, num_points=1024):
        self.root_dir = Path(root_dir)
        self.num_points = num_points
        self.files, self.labels = [], []
        for i, class_dir in enumerate(sorted(self.root_dir.iterdir())):
            if class_dir.is_dir():
                for f in class_dir.glob("*.*"):
                    self.files.append(f)
                    self.labels.append(i)

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        try:
            label = int(self.labels[idx])
            pc = load_point_file_safe(str(self.files[idx]))

            # ‚úÖ Keep only XYZ (first 3 columns)
            if pc.shape[1] > 3:
                pc = pc[:, :3]

            pc = normalize_unit(pc)
            pc = farthest_point_sampling(pc, self.num_points)
            return torch.from_numpy(pc.T.astype(np.float32)), label  # [3, N]
        except Exception as e:
            warnings.warn(f"‚ö†Ô∏è Dataset error idx={idx}: {e}")
            return torch.zeros((3, self.num_points), dtype=torch.float32), 0


In [None]:
# Cell 3: Dataloaders
from torch.utils.data import random_split

# Initialize datasets
train_dataset = PointCloudDataset(TRAIN_DIR, num_points=CFG.num_points)
test_dataset = PointCloudDataset(TEST_DIR, num_points=CFG.num_points)

# Split training data into train and validation
val_size = int(0.1 * len(train_dataset))  # 10% for validation
train_size = len(train_dataset) - val_size
train_subset, val_subset = random_split(
    train_dataset, 
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

# Create data loaders with num_workers based on CPU count
num_workers = min(4, os.cpu_count() or 1)
train_loader = DataLoader(
    train_subset, 
    batch_size=CFG.batch_size, 
    shuffle=True, 
    num_workers=num_workers,
    pin_memory=torch.cuda.is_available()
)
val_loader = DataLoader(
    val_subset, 
    batch_size=CFG.batch_size, 
    shuffle=False, 
    num_workers=num_workers,
    pin_memory=torch.cuda.is_available()
)
test_loader = DataLoader(
    test_dataset, 
    batch_size=CFG.batch_size, 
    shuffle=False, 
    num_workers=num_workers,
    pin_memory=torch.cuda.is_available()
)

print("Dataset sizes:")
print(f"Training:   {len(train_subset):5d} samples")
print(f"Validation: {len(val_subset):5d} samples")
print(f"Test:       {len(test_dataset):5d} samples")
print(f"Using {num_workers} workers for data loading")

Train samples: 557, Test samples: 134


In [35]:
# Cell 4: Model setup
args = CFG
model = DGCNN(args, output_channels=CFG.num_classes).to(CFG.device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=CFG.lr)


In [None]:
# Verify setup
print(f"Device being used: {CFG.device}")
print(f"Model directory: {MODEL_DIR}")
print(f"Training samples per batch: {CFG.batch_size}")
print(f"Number of training batches: {len(train_loader)}")
print(f"Number of test batches: {len(test_loader)}")
print(f"Total training samples: {len(train_dataset)}")
print(f"Total test samples: {len(test_dataset)}")
print(f"Number of classes: {CFG.num_classes}")

# Try a forward pass with a small batch
for batch in train_loader:
    points, labels = batch
    points = points.to(CFG.device)
    labels = labels.to(CFG.device)
    try:
        output = model(points)
        print(f"Forward pass successful. Output shape: {output.shape}")
    except Exception as e:
        print(f"Forward pass failed: {e}")
    break  # Only test one batch

In [36]:
# Cell 5: Training & Evaluation functions
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss, total_correct, total_samples = 0, 0, 0
    for points, labels in loader:
        points, labels = points.to(device), labels.to(device)
        optimizer.zero_grad()
        preds = model(points)
        loss = criterion(preds, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * points.size(0)
        total_correct += preds.argmax(1).eq(labels).sum().item()
        total_samples += labels.size(0)
    return total_loss / total_samples, total_correct / total_samples

def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss, total_correct, total_samples = 0, 0, 0
    all_labels, all_preds = [], []
    with torch.no_grad():
        for points, labels in loader:
            points, labels = points.to(device), labels.to(device)
            preds = model(points)
            loss = criterion(preds, labels)
            total_loss += loss.item() * points.size(0)
            total_correct += preds.argmax(1).eq(labels).sum().item()
            total_samples += labels.size(0)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.argmax(1).cpu().numpy())
    return (total_loss / total_samples,
            total_correct / total_samples,
            np.array(all_labels),
            np.array(all_preds))


In [38]:
# Cell 6: Training Loop
# Initialize tracking variables
best_val_acc = 0
train_losses, val_losses = [], []
train_accs, val_accs = [], []
start_time = time.time()
patience = 5  # early stopping patience
patience_counter = 0

try:
    for epoch in range(CFG.epochs):
        # Training phase
        tr_loss, tr_acc = train_one_epoch(model, train_loader, optimizer, criterion, CFG.device)
        
        # Validation phase
        val_loss, val_acc, val_labels, val_preds = evaluate(model, val_loader, criterion, CFG.device)
        
        # Store metrics
        train_losses.append(tr_loss)
        train_accs.append(tr_acc)
        val_losses.append(val_loss)
        val_accs.append(val_acc)

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            patience_counter = 0
            checkpoint = {
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_acc': val_acc,
                'val_loss': val_loss
            }
            torch.save(checkpoint, str(MODEL_DIR / "best_model.pth"))
            print(f"‚úì Saved best model with validation accuracy: {val_acc:.4f}")
        else:
            patience_counter += 1

        # Print progress
        print(f"Epoch {epoch+1}/{CFG.epochs} | "
              f"Train Loss: {tr_loss:.4f}, Train Acc: {tr_acc:.4f} | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        # Early stopping
        if patience_counter >= patience:
            print(f"‚ö†Ô∏è Early stopping triggered after {patience} epochs without improvement")
            break

    # Training completion
    exec_time = time.time() - start_time
    print(f"\n‚úÖ Training completed in {exec_time:.2f} seconds")
    print(f"Best validation accuracy: {best_val_acc:.4f}")
    
    # Final test evaluation
    test_loss, test_acc, test_labels, test_preds = evaluate(model, test_loader, criterion, CFG.device)
    print(f"Final test accuracy: {test_acc:.4f}")

except Exception as e:
    print(f"‚ùå Error during training: {str(e)}")
    raise

‚ùå Error during training: Torch not compiled with CUDA enabled


AssertionError: Torch not compiled with CUDA enabled

In [None]:
# Cell 7: Metrics
overall_acc = accuracy_score(val_labels, val_preds)
balanced_acc = balanced_accuracy_score(val_labels, val_preds)
f1 = f1_score(val_labels, val_preds, average="weighted")
prec_per_class = precision_score(val_labels, val_preds, average=None)

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print("üìä Metrics:")
print(f"Overall Accuracy: {overall_acc:.4f}")
print(f"Balanced Accuracy: {balanced_acc:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"Precision per class: {prec_per_class}")
print(f"Execution Time (s): {exec_time:.2f}")
print(f"Trainable Parameters: {num_params}")


In [None]:
# Cell 8: Confusion Matrix
cm = confusion_matrix(val_labels, val_preds)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()


In [None]:
# Cell 9: Training Curves
plt.figure(figsize=(12,5))

plt.subplot(1,2,1)
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Val Loss")
plt.legend(); plt.title("Loss Curve")

plt.subplot(1,2,2)
plt.plot(train_accs, label="Train Acc")
plt.plot(val_accs, label="Val Acc")
plt.legend(); plt.title("Accuracy Curve")

plt.show()
