**Load the HDF5 File**

In [44]:
import os
import numpy as np
import torch
from torch.utils.data import random_split
from torch_geometric.loader import DataLoader
from pc_dataset import H5PCDataset
import torch_geometric.transforms as T
from torch.utils.data import Dataset,Subset
from torch_geometric.data import Data
from torchmetrics import JaccardIndex

In [45]:
torch.cuda.empty_cache()

In [54]:
# File path to the hdf5 dataset
#script_dir = os.path.dirname(os.getcwd())
#hdf5_file_path= os.path.join(script_dir, '.', 'docs', 'sim_pc_dataset.h5')
hdf5_file_path= "../docs/sim_pc_dataset_moved.h5"

# take out colors
class SelectLast3Features:
    def __call__(self, data):
        # If data.x is defined, select only its last 3 features.
        if data.x is not None:
            data.x = data.x[:, -3:]
        return data

# transform and pre_transform
transform = T.Compose([
    T.RandomJitter(0.01),
    T.RandomRotate(15, axis=0),
    T.RandomRotate(15, axis=1),
    T.RandomRotate(15, axis=2)
    ])

pre_transform =  T.Compose([
    #T.NormalizeScale(),
    SelectLast3Features()
    ])

# Create the dataset
full_dataset = H5PCDataset(hdf5_file_path, pre_transform = pre_transform)

# Define split sizes (e.g., 80% training and 20% validation)
total_size = len(full_dataset)
train_size = int(0.8 * total_size)
test_size = total_size - train_size

# Randomly split the dataset
train_subset, test_subset = random_split(full_dataset, [train_size, test_size])

In [55]:
#Wrap train_subset in AugmentedSubset
class AugmentedSubset(Subset):
    def __init__(self, subset, transform):
        super().__init__(subset.dataset, subset.indices)
        self.transform = transform

    def __getitem__(self, idx):
        data = super().__getitem__(idx)
        return self.transform(data)

train_dataset = AugmentedSubset(train_subset, transform)
test_dataset = test_subset 

In [56]:
print(train_dataset[0])
print(train_dataset.dataset.num_classes)
print(test_dataset[0])
print(test_dataset.dataset.num_classes)

Data(x=[4096, 3], y=[4096], pos=[4096, 3])
13
Data(x=[4096, 3], y=[4096], pos=[4096, 3])
13


In [57]:
batch_size=32
num_workers=0

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                          num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
                         num_workers=num_workers)

**Training with Simulated Point Cloud**

In [58]:
import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.typing import WITH_TORCH_CLUSTER

from pyg_pointnet2 import PyGPointNet2NoColor

if not WITH_TORCH_CLUSTER:
    quit("This example requires 'torch-cluster'")

In [59]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = PyGPointNet2NoColor(num_classes=13).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def train():
    model.train()

    total_loss = correct_nodes = total_nodes = 0
    for i, data in enumerate(train_loader):
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        correct_nodes += out.argmax(dim=1).eq(data.y).sum().item()
        total_nodes += data.num_nodes

        if (i + 1) % 10 == 0:
            print(f'[{i+1}/{len(train_loader)}] Loss: {total_loss / 10:.4f} '
                  f'Train Acc: {correct_nodes / total_nodes:.4f}')
            total_loss = correct_nodes = total_nodes = 0

In [60]:

@torch.no_grad()
def test(loader):
    model.eval()
    jaccard = JaccardIndex(num_classes=loader.dataset.dataset.num_classes, task="multiclass").to(device)
    
    for data in loader:
        data = data.to(device)
        outs = model(data)
        preds = outs.argmax(dim=-1)
        jaccard.update(preds, data.y)
    
    return jaccard.compute().item()

In [61]:
import time
for epoch in range(1, 101):
    # Track epoch start time
    start_time = time.perf_counter()
    train()
    iou = test(test_loader)
    # Calculate epoch duration
    epoch_time = time.perf_counter() - start_time
    
    # Print results with time
    print(f'Epoch: {epoch:02d}, Test IoU: {iou:.4f}, Time: {epoch_time:.2f}s')

[10/13] Loss: 2.2054 Train Acc: 0.3502
Epoch: 01, Test IoU: 0.0400, Time: 3.74s
[10/13] Loss: 1.2729 Train Acc: 0.5875
Epoch: 02, Test IoU: 0.0610, Time: 3.36s
[10/13] Loss: 1.1296 Train Acc: 0.6604
Epoch: 03, Test IoU: 0.1421, Time: 3.26s
[10/13] Loss: 0.9521 Train Acc: 0.7275
Epoch: 04, Test IoU: 0.2038, Time: 3.25s
[10/13] Loss: 0.9057 Train Acc: 0.7386
Epoch: 05, Test IoU: 0.1910, Time: 3.26s
[10/13] Loss: 0.9306 Train Acc: 0.7328
Epoch: 06, Test IoU: 0.1804, Time: 3.26s
[10/13] Loss: 0.8381 Train Acc: 0.7513
Epoch: 07, Test IoU: 0.2087, Time: 3.26s
[10/13] Loss: 0.7937 Train Acc: 0.7731
Epoch: 08, Test IoU: 0.1956, Time: 3.27s
[10/13] Loss: 0.8541 Train Acc: 0.7437
Epoch: 09, Test IoU: 0.2025, Time: 3.29s
[10/13] Loss: 0.7821 Train Acc: 0.7637
Epoch: 10, Test IoU: 0.2228, Time: 3.28s
[10/13] Loss: 0.7749 Train Acc: 0.7740
Epoch: 11, Test IoU: 0.2087, Time: 3.29s
[10/13] Loss: 0.7040 Train Acc: 0.7789
Epoch: 12, Test IoU: 0.2217, Time: 3.30s
[10/13] Loss: 0.7176 Train Acc: 0.7716
E

In [62]:
sim_checkpoint_path= 'checkpoints/pointnet2_smartlab_sim_transform_seg_x3_100_checkpoint.pth'

# Save model, optimizer state, and any other info needed
torch.save({
    'epoch': 100,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    #'loss': loss,
    #'test_accuracy': test_acc
}, sim_checkpoint_path)

print("Checkpoint saved successfully!")

Checkpoint saved successfully!
