# This is a demo of using Gradual Magnitude Pruning to sparsify Pointcloud object detection algorithm (pointnet++)

## Define the network and run GMP using sparseML

In [1]:
# Imports
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import os
import argparse
import importlib
import shutil
from models.pointnet2_utils import PointNetSetAbstraction
from tqdm import tqdm
from collections import OrderedDict
from typing import Dict, Callable
import pandas as pd
from data_utils.ModelNetDataLoader import ModelNetDataLoader
import provider


import sparseml
from sparseml.pytorch.optim import ScheduledModifierManager
from sparseml.pytorch.utils import TensorBoardLogger, ModuleExporter, get_prunable_layers, tensor_sparsity

In [2]:
# Define PointNet++ Model
class get_model(nn.Module):
    def __init__(self,num_class,normal_channel=True):
        super(get_model, self).__init__()
        in_channel = 6 if normal_channel else 3
        self.normal_channel = normal_channel
        self.sa1 = PointNetSetAbstraction(npoint=512, radius=0.2, nsample=32, in_channel=in_channel, mlp=[64, 64, 128], group_all=False)
        self.sa2 = PointNetSetAbstraction(npoint=128, radius=0.4, nsample=64, in_channel=128 + 3, mlp=[128, 128, 256], group_all=False)
        self.sa3 = PointNetSetAbstraction(npoint=None, radius=None, nsample=None, in_channel=256 + 3, mlp=[256, 512, 1024], group_all=True)
        self.fc1 = nn.Linear(1024, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.drop1 = nn.Dropout(0.4)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.drop2 = nn.Dropout(0.4)
        self.fc3 = nn.Linear(256, num_class)

    def forward(self, xyz):
        B, _, _ = xyz.shape
        if self.normal_channel:
            norm = xyz[:, 3:, :]
            xyz = xyz[:, :3, :]
        else:
            norm = None
        l1_xyz, l1_points = self.sa1(xyz, norm)
        l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)
        l3_xyz, l3_points = self.sa3(l2_xyz, l2_points)
        x = l3_points.view(B, 1024)
        x = self.drop1(F.relu(self.bn1(self.fc1(x))))
        x = self.drop2(F.relu(self.bn2(self.fc2(x))))
        x = self.fc3(x)
        x = F.log_softmax(x, -1)


        return x, l3_points

class get_loss(nn.Module):
    def __init__(self):
        super(get_loss, self).__init__()

    def forward(self, pred, target, trans_feat):
        total_loss = F.nll_loss(pred, target)

        return total_loss

In [3]:
# Instantiate model and load pretrained model
model = get_model(10, False)
model = model.cuda()
checkpoint = torch.load('checkpoints/best_model.pth')
model.load_state_dict(checkpoint['model_state_dict'])

# Save the dense model for future inference
dense_model = model

In [4]:
# Print the prunable layers using sparseml
for (name, layer) in get_prunable_layers(model):
    print(f"{name}")

sa1.mlp_convs.0
sa1.mlp_convs.1
sa1.mlp_convs.2
sa2.mlp_convs.0
sa2.mlp_convs.1
sa2.mlp_convs.2
sa3.mlp_convs.0
sa3.mlp_convs.1
sa3.mlp_convs.2
fc1
fc2
fc3


In [5]:
pruning_recipe_path = "pointnet_recipy.yaml"
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
criterion = get_loss()

In [6]:
#load the data
data_path = 'data/modelnet40_normal_resampled/'
args = argparse.Namespace(batch_size=24, decay_rate=0.0001, epoch=200, gpu='0', learning_rate=0.001, log_dir='pointnet2_cls_ssg', model='pointnet2_cls_ssg', num_category=10, num_point=1024, optimizer='Adam', process_data=False, use_cpu=False, use_normals=False, use_uniform_sample=False)
train_dataset = ModelNetDataLoader(root=data_path, args=args, split='train', process_data=args.process_data)
trainDataLoader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=10, drop_last=True)
test_dataset = ModelNetDataLoader(root=data_path, args=args, split='test', process_data=args.process_data)
testDataLoader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=10)


The size of train data is 3991
The size of test data is 908


In [7]:
# create ScheduledModifierManager and Optimizer wrapper
manager = ScheduledModifierManager.from_yaml(pruning_recipe_path)
logger = TensorBoardLogger(log_path="./tensorboard_outputs")
optimizer = manager.modify(model, optimizer, loggers=[logger], steps_per_epoch=len(trainDataLoader))

2023-05-08 20:48:08 sparseml.pytorch.utils.logger INFO     Logging all SparseML modifier-level logs to sparse_logs/08-05-2023_20.48.08.log


In [8]:
# Write wrapper for running one epoch for test or train
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

def log_string(str):
        logger.info(str)
        print(str)

def run_model_one_epoch(model, data_loader, criterion, device, train=False, optimizer=None):
    if train:
        model.train()
    else:
        model.eval()

    running_loss = 0.0
    total_correct = 0
    total_predictions = 0

    # loop through batches
    mean_correct = []
    for batch_id, (points, target) in tqdm(enumerate(data_loader, 0), total=len(data_loader), smoothing=0.9):
        if train:
            optimizer.zero_grad()
        
        points = points.data.numpy()
        # print("Before processing",points.shape)
        points = provider.random_point_dropout(points)
        points[:, :, 0:3] = provider.random_scale_point_cloud(points[:, :, 0:3])
        points[:, :, 0:3] = provider.shift_point_cloud(points[:, :, 0:3])
        points = torch.Tensor(points)
        points = points.transpose(2, 1)

        # print("After processing",points.shape)
        if not args.use_cpu:
            points, target = points.cuda(), target.cuda()

        pred, trans_feat = model(points)
        loss = criterion(pred, target.long(), trans_feat)
        pred_choice = pred.data.max(1)[1]

        correct = pred_choice.eq(target.long().data).cpu().sum()
        mean_correct.append(correct.item() / float(points.size()[0]))
        if train:
            loss.backward()
            optimizer.step()
        
        # global_step += 1

    instance_acc = np.mean(mean_correct)
    # print('Instance Accuracy: %f' % instance_acc)

    return instance_acc

cuda


In [9]:
# run GMP algorithm
epoch = 0
for epoch in range(manager.max_epochs):
    # run training loop
    epoch_name = f"{epoch + 1}/{manager.max_epochs}"
    print(f"Running Training Epoch {epoch_name}")
    train_acc = run_model_one_epoch(model, trainDataLoader, criterion, device, train=True, optimizer=optimizer)
    # print(f"Training Epoch: {epoch_name}\nTraining Loss: {train_loss}\nTop 1 Acc: {train_acc}\n")

    # run validation loop
    print(f"Running Validation Epoch {epoch_name}")
    val_acc = run_model_one_epoch(model, testDataLoader, criterion, device, train=False, optimizer=optimizer)
    # print(f"Validation Epoch: {epoch_name}\nVal Loss: {val_loss}\nTop 1 Acc: {val_acc}\n")
    
    # logger.log_scalar("Metrics/Loss (Train)", train_loss, epoch)
    print(f"Training Epoch: {epoch_name}\nTraining Acc: {train_acc}\n")
    logger.log_scalar("Metrics/Accuracy (Train)", train_acc, epoch)
    # logger.log_scalar("Metrics/Loss (Validation)", val_loss, epoch)
    print(f"Validation Epoch: {epoch_name}\nValidation Acc: {val_acc}\n")
    logger.log_scalar("Metrics/Accuracy (Validation)", val_acc, epoch)

    
manager.finalize(model)

Running Training Epoch 1/15


100%|██████████| 166/166 [00:38<00:00,  4.27it/s]

Running Validation Epoch 1/15



100%|██████████| 38/38 [00:08<00:00,  4.33it/s]

Training Epoch: 1/15
Training Acc: 0.9656124497991967

Validation Epoch: 1/15
Validation Acc: 0.9232456140350876

Running Training Epoch 2/15



100%|██████████| 166/166 [00:36<00:00,  4.58it/s]

Running Validation Epoch 2/15



100%|██████████| 38/38 [00:09<00:00,  3.86it/s]

Training Epoch: 2/15
Training Acc: 0.9678714859437753

Validation Epoch: 2/15
Validation Acc: 0.9320175438596491

Running Training Epoch 3/15



100%|██████████| 166/166 [00:35<00:00,  4.66it/s]

Running Validation Epoch 3/15



100%|██████████| 38/38 [00:09<00:00,  4.13it/s]

Training Epoch: 3/15
Training Acc: 0.968624497991968

Validation Epoch: 3/15
Validation Acc: 0.9375

Running Training Epoch 4/15



100%|██████████| 166/166 [00:35<00:00,  4.62it/s]

Running Validation Epoch 4/15



100%|██████████| 38/38 [00:09<00:00,  4.02it/s]

Training Epoch: 4/15
Training Acc: 0.9683734939759037

Validation Epoch: 4/15
Validation Acc: 0.9364035087719299

Running Training Epoch 5/15



100%|██████████| 166/166 [00:38<00:00,  4.30it/s]

Running Validation Epoch 5/15



100%|██████████| 38/38 [00:09<00:00,  4.05it/s]

Training Epoch: 5/15
Training Acc: 0.96285140562249

Validation Epoch: 5/15
Validation Acc: 0.9298245614035089

Running Training Epoch 6/15



100%|██████████| 166/166 [00:35<00:00,  4.71it/s]

Running Validation Epoch 6/15



100%|██████████| 38/38 [00:09<00:00,  3.99it/s]

Training Epoch: 6/15
Training Acc: 0.963855421686747

Validation Epoch: 6/15
Validation Acc: 0.9396929824561404

Running Training Epoch 7/15



100%|██████████| 166/166 [00:37<00:00,  4.39it/s]

Running Validation Epoch 7/15



100%|██████████| 38/38 [00:08<00:00,  4.25it/s]

Training Epoch: 7/15
Training Acc: 0.958835341365462

Validation Epoch: 7/15
Validation Acc: 0.9320175438596493

Running Training Epoch 8/15



100%|██████████| 166/166 [00:38<00:00,  4.26it/s]

Running Validation Epoch 8/15



100%|██████████| 38/38 [00:09<00:00,  4.06it/s]

Training Epoch: 8/15
Training Acc: 0.962098393574297

Validation Epoch: 8/15
Validation Acc: 0.9188596491228072

Running Training Epoch 9/15



100%|██████████| 166/166 [00:35<00:00,  4.64it/s]

Running Validation Epoch 9/15



100%|██████████| 38/38 [00:09<00:00,  4.15it/s]

Training Epoch: 9/15
Training Acc: 0.9585843373493974

Validation Epoch: 9/15
Validation Acc: 0.930921052631579

Running Training Epoch 10/15



100%|██████████| 166/166 [00:37<00:00,  4.38it/s]

Running Validation Epoch 10/15



100%|██████████| 38/38 [00:08<00:00,  4.30it/s]

Training Epoch: 10/15
Training Acc: 0.9515562248995982

Validation Epoch: 10/15
Validation Acc: 0.9287280701754387

Running Training Epoch 11/15



100%|██████████| 166/166 [00:35<00:00,  4.68it/s]

Running Validation Epoch 11/15



100%|██████████| 38/38 [00:09<00:00,  4.19it/s]

Training Epoch: 11/15
Training Acc: 0.9585843373493976

Validation Epoch: 11/15
Validation Acc: 0.9111842105263158

Running Training Epoch 12/15



100%|██████████| 166/166 [00:35<00:00,  4.64it/s]

Running Validation Epoch 12/15



100%|██████████| 38/38 [00:09<00:00,  4.09it/s]

Training Epoch: 12/15
Training Acc: 0.9613453815261043

Validation Epoch: 12/15
Validation Acc: 0.9122807017543857

Running Training Epoch 13/15



100%|██████████| 166/166 [00:35<00:00,  4.67it/s]

Running Validation Epoch 13/15



100%|██████████| 38/38 [00:09<00:00,  4.09it/s]

Training Epoch: 13/15
Training Acc: 0.9578313253012049

Validation Epoch: 13/15
Validation Acc: 0.9232456140350876

Running Training Epoch 14/15



100%|██████████| 166/166 [00:35<00:00,  4.66it/s]

Running Validation Epoch 14/15



100%|██████████| 38/38 [00:09<00:00,  4.19it/s]

Training Epoch: 14/15
Training Acc: 0.9560742971887549

Validation Epoch: 14/15
Validation Acc: 0.9155701754385964

Running Training Epoch 15/15



100%|██████████| 166/166 [00:39<00:00,  4.18it/s]

Running Validation Epoch 15/15



100%|██████████| 38/38 [00:08<00:00,  4.38it/s]

Training Epoch: 15/15
Training Acc: 0.9595883534136548

Validation Epoch: 15/15
Validation Acc: 0.9287280701754387






## Calculate different performance and sparsity metrics


In [10]:
# Test code for running inference
def test(model, loader, num_class=40, vote_num=1):
    mean_correct = []
    classifier = model.eval()
    class_acc = np.zeros((num_class, 3))

    for j, (points, target) in tqdm(enumerate(loader), total=len(loader)):
        if not args.use_cpu:
            points, target = points.cuda(), target.cuda()

        points = points.transpose(2, 1)
        vote_pool = torch.zeros(target.size()[0], num_class).cuda()

        for _ in range(vote_num):
            pred, _ = classifier(points)
            vote_pool += pred
        pred = vote_pool / vote_num
        pred_choice = pred.data.max(1)[1]

        for cat in np.unique(target.cpu()):
            classacc = pred_choice[target == cat].eq(target[target == cat].long().data).cpu().sum()
            class_acc[cat, 0] += classacc.item() / float(points[target == cat].size()[0])
            class_acc[cat, 1] += 1
        correct = pred_choice.eq(target.long().data).cpu().sum()
        mean_correct.append(correct.item() / float(points.size()[0]))

    class_acc[:, 2] = class_acc[:, 0] / class_acc[:, 1]
    class_acc = np.mean(class_acc[:, 2])
    instance_acc = np.mean(mean_correct)
    return instance_acc, class_acc

In [11]:
# Dense Model Accuracy

dense_instance_acc, dense_class_acc = test(dense_model,testDataLoader,num_class=10)
print("Dense Instance Test Accuracy",dense_instance_acc)
print("Dense Class Test Accuracy",dense_class_acc)

# Sparse Model Accuracy
sparse_model = model
sparse_instance_acc, sparse_class_acc = test(sparse_model,testDataLoader,num_class=10)
print("Sparse Instance Test Accuracy",sparse_instance_acc)
print("Sparse Class Test Accuracy",sparse_class_acc)

100%|██████████| 38/38 [00:09<00:00,  4.21it/s]

Dense Instance Test Accuracy 0.9451754385964913
Dense Class Test Accuracy 0.9473511904761904



100%|██████████| 38/38 [00:09<00:00,  4.13it/s]

Sparse Instance Test Accuracy 0.9473684210526315
Sparse Class Test Accuracy 0.9502678571428571





In [12]:
# Save sparse model

save_dir = "checkpoints/"
exporter = ModuleExporter(sparse_model, output_dir=save_dir)
exporter.export_pytorch(name="sparse_best_model.pth")

In [13]:
# Get weight sparsity

wsp = []
names = []
for (name, layer) in get_prunable_layers(model):
    names.append(name)
    wsp.append(tensor_sparsity(layer.weight).item())
    print(f"{name}.weight: {tensor_sparsity(layer.weight).item():.4f}")

sa1.mlp_convs.0.weight: 0.2135
sa1.mlp_convs.1.weight: 0.4832
sa1.mlp_convs.2.weight: 0.5154
sa2.mlp_convs.0.weight: 0.6508
sa2.mlp_convs.1.weight: 0.6293
sa2.mlp_convs.2.weight: 0.6190
sa3.mlp_convs.0.weight: 0.8001
sa3.mlp_convs.1.weight: 0.8503
sa3.mlp_convs.2.weight: 0.9526
fc1.weight: 0.9407
fc2.weight: 0.8120
fc3.weight: 0.3227


In [14]:
# Get input and activation sparsity

insp = []
osp = []

def remove_all_forward_hooks(model: torch.nn.Module) -> None:
    for name, child in model._modules.items():
        if child is not None:
            if hasattr(child, "_forward_hooks"):
                child._forward_hooks: Dict[int, Callable] = OrderedDict()
            remove_all_forward_hooks(child)
remove_all_forward_hooks(model)

def sparsity_hook(layer_name, layer):
    def hook(_, input, output):
        # input_sparsity = (input[0] == 0).sum().item() / input[0].numel()
        # print(input[0].shape)
        input_sparsity = tensor_sparsity(input[0]).item()
        insp.append(input_sparsity)
        # print
        # output_sparsity = ((torch.abs(output) > 0.1) == 0).sum().item() / output.numel()
        output_sparsity = (torch.relu(output) == 0).sum().item() / output.numel()
        osp.append(output_sparsity)
        print(f"{layer_name} ({layer.__class__.__name__}) - Input sparsity: {input_sparsity:.4f}, Output sparsity: {output_sparsity:.4f}")
    return hook

def register_hooks(module, module_name):
    if isinstance(module, (nn.Conv2d, nn.ReLU, nn.Linear)):
        hook = sparsity_hook(module_name, module)
        module.register_forward_hook(hook)
    # else:
    #     print(f"Skipping {module_name} ({module.__class__.__name__})")


# Register hooks for each module (including submodules)
for name, layer in model.named_modules():
    register_hooks(layer, name)

data = torch.randn(24, 1024, 3)

data = data.transpose(2, 1)
data = data.to(device)


# run model on random data
with torch.no_grad():
    model.eval()
    output = model(data)
    # print(output.shape)
    # print(f"Output sparsity: {tensor_sparsity(output).item():.4f}")

# print(insp)
# print(osp)


sa1.mlp_convs.0 (Conv2d) - Input sparsity: 0.8501, Output sparsity: 0.4730
sa1.mlp_convs.1 (Conv2d) - Input sparsity: 0.4588, Output sparsity: 0.8510
sa1.mlp_convs.2 (Conv2d) - Input sparsity: 0.1592, Output sparsity: 0.7684
sa2.mlp_convs.0 (Conv2d) - Input sparsity: 0.5714, Output sparsity: 0.2998
sa2.mlp_convs.1 (Conv2d) - Input sparsity: 0.4123, Output sparsity: 0.5830
sa2.mlp_convs.2 (Conv2d) - Input sparsity: 0.5363, Output sparsity: 0.8186
sa3.mlp_convs.0 (Conv2d) - Input sparsity: 0.5933, Output sparsity: 0.4405
sa3.mlp_convs.1 (Conv2d) - Input sparsity: 0.6598, Output sparsity: 0.7056
sa3.mlp_convs.2 (Conv2d) - Input sparsity: 0.6446, Output sparsity: 0.4845
fc1 (Linear) - Input sparsity: 0.0682, Output sparsity: 0.5190
fc2 (Linear) - Input sparsity: 0.5338, Output sparsity: 0.4832
fc3 (Linear) - Input sparsity: 0.4870, Output sparsity: 0.8500


In [15]:
# Make a dataframe and store the sparsities
df = pd.DataFrame(list(zip(names, wsp, insp, osp)), columns =['Layer', 'Weight Sparsity', 'Input Sparsity', 'Output Sparsity'])

# compute the densities for each layer which is the inverse of the sparsity
df['Weight Density'] = 1 - df['Weight Sparsity']
df['Input Density'] = 1 - df['Input Sparsity']
df['Output Density'] = 1 - df['Output Sparsity']

# put layer number as column
df['Layer Number'] = df.index + 1

df.to_csv("Layer-wise-sparsities.csv",index=False)
reordered_columns = ['Layer',  'Layer Number', 'Input Density', 'Weight Density', 'Output Density', 'Weight Sparsity', 'Input Sparsity', 'Output Sparsity']

df2 = df[reordered_columns]
print(df2.shape)
df2.to_csv("Layer-wise-sparsities-2.csv",index=False)

(12, 8)
