# Assignement 2


Setting up the code environment

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
# Add pre-installed pytorch3d to sys.path
import sys
sys.path.append("/content/drive/My Drive/GoogleColab/pytorch3d_packages")

The classifier

In [None]:
import os, zipfile, shutil
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split

# Unzipping the dataset
# Note, problem with the code. For extract_path, it needs to run once and get an error,
# then change the variable to extract_path = "/content/ShapeNetCore/ShapeNetCore/ShapeNetCore"

zip_path = "/content/drive/My Drive/GoogleColab/ShapeNetCore.zip"
extract_path = "/content/ShapeNetCore/ShapeNetCore"

if not os.path.exists(extract_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Extraction complete.")


# Check to see that the ShapeNetCore models are found
print(os.listdir(extract_path))


# Variables
categories = ["03642806", "03211117", "03046257", "02992529", "02808440"]
category_names= ["Laptop", "Monitor", "Clock", "Cellphone", "Bathtub"]
batch_size = 16
lr = 0.001
epochs = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Using device:", device)

# Reading the binvox files:
def read_binvox(file_path):
    with open(file_path, 'rb') as f:
        if f.readline().decode().strip() != '#binvox 1':
            raise IOError('Not a binvox file')

        dims = []
        while True:
            line = f.readline().decode().strip()
            if line.startswith('dim'):
                dims = list(map(int, line.split()[1:]))
            elif line == 'data':
                break

        raw_data = np.frombuffer(f.read(), dtype=np.uint8)
        values, counts = raw_data[::2], raw_data[1::2]
        data = np.repeat(values, counts).astype(np.float32)
        return data.reshape(dims)

# Dataset for the voxel files
class VoxelDataset(Dataset):
    def __init__(self, base_path, categories, voxel_size=32, cache=True):
        self.samples, self.labels = [], []
        self.category2idx = {cat: i for i, cat in enumerate(categories)}
        self.voxel_size = voxel_size
        self.cache = cache
        self.cache_data = {}

        for cat in categories:
            cat_path = os.path.join(base_path, cat)
            if not os.path.exists(cat_path):
                continue
            for model_id in os.listdir(cat_path):
                f = os.path.join(cat_path, model_id, "models", "model_normalized.surface.binvox")
                if os.path.exists(f):
                    self.samples.append(f)
                    self.labels.append(self.category2idx[cat])

        if not self.samples:
            print(os.listdir(base_path))
            raise RuntimeError("No .binvox files found!")

        print(f"Loaded {len(self.samples)} samples from {len(categories)} categories")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        f, label = self.samples[idx], self.labels[idx]

        # cache in memory (optional, speeds up training a lot)
        if self.cache and f in self.cache_data:
            vox = self.cache_data[f]
        else:
            vox = read_binvox(f)
            # downsample for speed
            if vox.shape[0] != self.voxel_size:
                factor = vox.shape[0] // self.voxel_size
                vox = vox[::factor, ::factor, ::factor]
            vox = torch.tensor(vox, dtype=torch.float32).unsqueeze(0)  # (1, D, H, W)
            if self.cache:
                self.cache_data[f] = vox

        return vox, torch.tensor(label, dtype=torch.long)


# 3D CNN
class VoxelCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv3d(1, 32, 3, padding=1), nn.BatchNorm3d(32), nn.ReLU(),
            nn.MaxPool3d(2),
            nn.Conv3d(32, 64, 3, padding=1), nn.BatchNorm3d(64), nn.ReLU(),
            nn.MaxPool3d(2),
            nn.Conv3d(64, 128, 3, padding=1), nn.BatchNorm3d(128), nn.ReLU(),
            nn.MaxPool3d(2)
        )
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool3d(1),  # → (B,128,1,1,1)
            nn.Flatten(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)

# Getting the dataset
dataset = VoxelDataset(extract_path, categories, voxel_size=32, cache=True)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_set, val_set = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=2)

# Training the model
model = VoxelCNN(len(categories)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for voxels, labels in train_loader:
        voxels, labels = voxels.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(voxels)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    model.eval()
    correct = 0
    total = 0
    class_correct = [0] * len(categories)
    class_total = [0] * len(categories)
    with torch.no_grad():
        for voxels, labels in val_loader:
            voxels, labels = voxels.to(device), labels.to(device)
            outputs = model(voxels)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            for i in range(len(labels)):
                label = labels[i].item()
                pred = predicted[i].item()
                class_total[label] += 1
                if label == pred:
                    class_correct[label] += 1

    val_acc = correct / total
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}, Val Acc: {val_acc:.4f}")
    for i in range(len(categories)):
        if class_total[i] > 0:
            acc = class_correct[i] / class_total[i]
            print(f"Category {category_names[i]}: {acc:.4f}")
    print("\n")


['03211117', '03046257', '02808440', '02992529', '03642806']
Loaded 3887 samples from 5 categories
Epoch 1/5, Loss: 0.6544, Val Acc: 0.8972
Category Laptop: 0.9770
Category Monitor: 0.8873
Category Clock: 0.7769
Category Cellphone: 0.9390
Category Bathtub: 0.9185


Epoch 2/5, Loss: 0.3785, Val Acc: 0.7584
Category Laptop: 0.8736
Category Monitor: 0.5587
Category Clock: 0.4077
Category Cellphone: 0.9634
Category Bathtub: 1.0000


Epoch 3/5, Loss: 0.3022, Val Acc: 0.9062
Category Laptop: 0.9885
Category Monitor: 0.9014
Category Clock: 0.8846
Category Cellphone: 0.9512
Category Bathtub: 0.8478


Epoch 4/5, Loss: 0.2671, Val Acc: 0.9267
Category Laptop: 1.0000
Category Monitor: 0.8826
Category Clock: 0.8538
Category Cellphone: 0.9451
Category Bathtub: 0.9783


Epoch 5/5, Loss: 0.2379, Val Acc: 0.9049
Category Laptop: 0.9770
Category Monitor: 0.8826
Category Clock: 0.8692
Category Cellphone: 0.9512
Category Bathtub: 0.8804


