In [35]:
import torch
from torchvision.models import resnet18, AlexNet
import torch_pruning as tp
from torchstat import stat
from torchinfo import summary
import os


def save_checkpoint(state, filepath, name):
    torch.save(state, os.path.join(filepath, name+'checkpoint.pth'))

model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
model.eval()


Using cache found in C:\Users\35679/.cache\torch\hub\pytorch_vision_v0.10.0


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [36]:
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [37]:
# Importance criteria
example_inputs = torch.randn(1, 3, 224, 224)
imp = tp.importance.TaylorImportance()

ignored_layers = []
for m in model.modules():
    if isinstance(m, torch.nn.Linear) and m.out_features == 1000:
        ignored_layers.append(m) # DO NOT prune the final classifier!

iterative_steps = 5 # progressive pruning
current_step = 1


# 0.015625 -> 1 0.03125 -> 2 0.0625 -> 4 0.125 -> 8 0.25 -> 16 0.5 -> 32
pruner = tp.pruner.MagnitudePruner(
    model,
    example_inputs,
    round_to=None,
    unwrapped_parameters=None,
    importance=imp,
    iterative_steps=iterative_steps,
    ch_sparsity = 0.015625, # remove 50% channels, ResNet18 = {64, 128, 256, 512} => ResNet18_Half = {32, 64, 128, 256}
    ignored_layers=ignored_layers,
)

base_macs, base_nparams = tp.utils.count_ops_and_params(model, example_inputs)


for i in range(iterative_steps):
    if isinstance(imp, tp.importance.TaylorImportance):
        # Taylor expansion requires gradients for importance estimation
        loss = model(example_inputs).sum() # a dummy loss for TaylorImportance
        loss.backward() # before pruner.step()
    pruner.step()
    macs, nparams = tp.utils.count_ops_and_params(model, example_inputs)
    print("Pruning step:", current_step, "multiply–accumulate (macs):", macs, "number of parameters", nparams)
    current_step += 1

classifier.4 [0, 1, 2, 3828, 4, 3830, 6, 7, 8, 9, 10, 11, 3831]
classifier.1 [0, 1, 2, 2828, 2829, 5, 2830, 2831, 8, 2833, 2834, 2836, 12]
features.10 [0]
features.8 [26]
features.6 [48, 83]
features.3 [68]
features.0 [36]
Pruning step: 1 multiply–accumulate (macs): 706115361.0 number of parameters 60688408
classifier.4 [510, 3814, 3815, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
classifier.1 [416, 2471, 2470, 2469, 2487, 2466, 2486, 7, 2465, 9, 2485, 11, 12]
features.10 [62]
features.8 [251]
features.6 [15]
features.3 [102]
Pruning step: 2 multiply–accumulate (macs): 701967322.0 number of parameters 60286501
classifier.4 [508, 3801, 3802, 3, 4, 5, 6, 7, 8, 9, 10, 11, 3803]
classifier.1 [508, 2795, 2797, 2798, 2800, 2801, 2806, 2811, 2813, 2814, 2815, 2817, 12]
features.10 [62]
features.8 [249]
features.6 [42]
Pruning step: 3 multiply–accumulate (macs): 699558025.0 number of parameters 59890918
classifier.4 [507, 3789, 3790, 3791, 4, 5, 6, 7, 8, 9, 3793, 11, 12]
classifier.1 [2778, 2781, 2782, 2

In [44]:

for idx, param in enumerate(model.features[0].parameters()):
    print(idx)

0
1


In [None]:
for idx, param in enumerate(model.conv.parameters()):
    if idx in feature_map_indices_to_freeze:
        param.requires_grad = False


In [40]:
pruner.get_history()

AttributeError: 'MagnitudePruner' object has no attribute 'get_history'

In [None]:
    # finetune your model here
    # finetune(model)
    # ...
# save the pruned state_dict, which includes both pruned parameters and modified attributes
state_dict = tp.state_dict(model) # the pruned model, e.g., a resnet-18-half
torch.save(state_dict, 'pruned.pth')

# create a new model, e.g. resnet18
new_model = resnet18().eval()

# load the pruned state_dict into the unpruned model.
loaded_state_dict = torch.load('pruned.pth', map_location='cpu')
tp.load_state_dict(new_model, state_dict=loaded_state_dict)
# print(new_model) # This will be a pruned model.

summary(new_model, (3, 224, 224))

In [31]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print (name, list(param.data.shape))

conv1.weight [32, 3, 7, 7]
bn1.weight [32]
bn1.bias [32]
layer1.0.conv1.weight [32, 32, 3, 3]
layer1.0.bn1.weight [32]
layer1.0.bn1.bias [32]
layer1.0.conv2.weight [32, 32, 3, 3]
layer1.0.bn2.weight [32]
layer1.0.bn2.bias [32]
layer1.1.conv1.weight [32, 32, 3, 3]
layer1.1.bn1.weight [32]
layer1.1.bn1.bias [32]
layer1.1.conv2.weight [32, 32, 3, 3]
layer1.1.bn2.weight [32]
layer1.1.bn2.bias [32]
layer2.0.conv1.weight [64, 32, 3, 3]
layer2.0.bn1.weight [64]
layer2.0.bn1.bias [64]
layer2.0.conv2.weight [64, 64, 3, 3]
layer2.0.bn2.weight [64]
layer2.0.bn2.bias [64]
layer2.0.downsample.0.weight [64, 32, 1, 1]
layer2.0.downsample.1.weight [64]
layer2.0.downsample.1.bias [64]
layer2.1.conv1.weight [64, 64, 3, 3]
layer2.1.bn1.weight [64]
layer2.1.bn1.bias [64]
layer2.1.conv2.weight [64, 64, 3, 3]
layer2.1.bn2.weight [64]
layer2.1.bn2.bias [64]
layer3.0.conv1.weight [128, 64, 3, 3]
layer3.0.bn1.weight [128]
layer3.0.bn1.bias [128]
layer3.0.conv2.weight [128, 128, 3, 3]
layer3.0.bn2.weight [128]


In [38]:
# from torchsummary import summary

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = model.cpu()

# summary(model, (3, 224, 224))

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [40]:
# model.zero_grad() # We don't want to store gradient information
# torch.save(model, 'model.pth') # without .state_dict
# model = torch.load('model.pth') # load the pruned model

# save the pruned state_dict, which includes both pruned parameters and modified attributes
state_dict = tp.state_dict(model) # the pruned model, e.g., a resnet-18-half
torch.save(state_dict, 'pruned.pth')

# create a new model, e.g. resnet18
new_model = resnet18().eval()

# load the pruned state_dict into the unpruned model.
loaded_state_dict = torch.load('pruned.pth', map_location='cpu')
tp.load_state_dict(new_model, state_dict=loaded_state_dict)
print(new_model) # This will be a pruned model.

AttributeError: Can't pickle local object 'summary.<locals>.register_hook.<locals>.hook'

In [21]:
from torchstat import stat
import torchvision.models as models

# model = models.resnet50()
stat(model, input_size=example_inputs)

AssertionError: 

In [4]:
import torch
from torchvision.models import resnet18
import torch_pruning as tp

model = resnet18(pretrained=True).eval()

# 1. build dependency graph for resnet18
DG = tp.DependencyGraph().build_dependency(model, example_inputs=torch.randn(1,3,224,224))

# # 2. Specify the to-be-pruned channels. Here we prune those channels indexed by [2, 6, 9].
# group = DG.get_pruning_group( model.conv1, tp.prune_conv_out_channels, idxs=[1] )

# # 3. prune all grouped layers that are coupled with model.conv1 (included).
# if DG.check_pruning_group(group): # avoid full pruning, i.e., channels=0.
#     group.prune()


# prune of channel 1 happens from start of conv.weights 6 groups for each channel
channels_to_prune = [x for x in range(48)]
tp.prune_conv_out_channels( model.conv1, idxs = channels_to_prune )
tp.prune_batchnorm_out_channels( model.bn1, idxs= channels_to_prune )
tp.prune_conv_in_channels(model.layer1[0].conv1, idxs= channels_to_prune )

print("Shape:", model.conv1.weight.shape)

for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        tp.prune_conv_out_channels( module, idxs = channels_to_prune )
                


# stat(model, (3, 224, 224))
# macs, nparams = tp.utils.count_ops_and_params(model, example_inputs)
# print("Pruning step:", current_step, "multiply–accumulate (macs):", macs, "number of parameters", nparams)
# tp.prune_conv_out_channels( model.conv1, idxs=[0] )

# tp.prune_batchnorm_out_channels( model.bn1, idxs=[1] )

# tp.prune_conv_in_channels( model.layer1[0].conv1, idxs=[1] )

# model
# model.conv1.weight.shape
# 1 , 3 , 7, 7
model





[MAdd]: AdaptiveAvgPool2d is not supported!
[Flops]: AdaptiveAvgPool2d is not supported!
[Memory]: AdaptiveAvgPool2d is not supported!
Shape: torch.Size([16, 3, 7, 7])


ResNet(
  (conv1): Conv2d(3, -32, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
 

In [32]:
# 2. Select some channels to prune. Here we prune the channels indexed by [2, 6, 9].
model = resnet18(pretrained=True).eval()
pruning_idxs = pruning_idxs=[2, 6, 9]
group = DG.get_pruning_group(model.conv1, tp.prune_conv_out_channels, idxs=pruning_idxs )
group.prune()


ValueError: Module Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) is not in the dependency graph.

In [31]:
DG.pruning_history()

for name, bl, ids in DG.pruning_history():
    print(name, ids)

conv1 [2, 6, 9]


In [10]:
model

ResNet(
  (conv1): Conv2d(3, 61, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(61, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(61, 61, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(61, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(61, 61, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(61, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(61, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [12]:
prune_amounts = [x / 64 for x in range(65)]
prune_amounts

[0.0,
 0.015625,
 0.03125,
 0.046875,
 0.0625,
 0.078125,
 0.09375,
 0.109375,
 0.125,
 0.140625,
 0.15625,
 0.171875,
 0.1875,
 0.203125,
 0.21875,
 0.234375,
 0.25,
 0.265625,
 0.28125,
 0.296875,
 0.3125,
 0.328125,
 0.34375,
 0.359375,
 0.375,
 0.390625,
 0.40625,
 0.421875,
 0.4375,
 0.453125,
 0.46875,
 0.484375,
 0.5,
 0.515625,
 0.53125,
 0.546875,
 0.5625,
 0.578125,
 0.59375,
 0.609375,
 0.625,
 0.640625,
 0.65625,
 0.671875,
 0.6875,
 0.703125,
 0.71875,
 0.734375,
 0.75,
 0.765625,
 0.78125,
 0.796875,
 0.8125,
 0.828125,
 0.84375,
 0.859375,
 0.875,
 0.890625,
 0.90625,
 0.921875,
 0.9375,
 0.953125,
 0.96875,
 0.984375,
 1.0]

In [22]:
model = resnet18(pretrained=True).eval()
example_inputs = torch.randn(1,3,224,224)
pruned_layer = model.conv1

# 1. build dependency graph for resnet18
DG = tp.DependencyGraph().build_dependency(model, example_inputs=example_inputs)

# 2. Select some channels to prune. Here we prune the channels indexed by [2, 6, 9].
pruning_idxs = pruning_idxs=[2, 6, 9]
group = DG.get_pruning_group(model.conv1, tp.prune_conv_out_channels, idxs=pruning_idxs )

group.prune()

group.items

group = DG.get_pruning_group(model.conv1, tp.prune_conv_out_channels, idxs=pruning_idxs)

conv1 [2, 6, 9]




[GroupItem(dep=prune_out_channels on conv1 (Conv2d(3, 61, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)) => prune_out_channels on conv1 (Conv2d(3, 61, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)), idxs=[2, 6, 9]),
 GroupItem(dep=prune_out_channels on conv1 (Conv2d(3, 61, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)) => prune_out_channels on bn1 (BatchNorm2d(61, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)), idxs=[2, 6, 9]),
 GroupItem(dep=prune_out_channels on bn1 (BatchNorm2d(61, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)) => prune_out_channels on _ElementWiseOp_20(ReluBackward0), idxs=[2, 6, 9]),
 GroupItem(dep=prune_out_channels on _ElementWiseOp_20(ReluBackward0) => prune_out_channels on _ElementWiseOp_19(MaxPool2DWithIndicesBackward0), idxs=[2, 6, 9]),
 GroupItem(dep=prune_out_channels on _ElementWiseOp_19(MaxPool2DWithIndicesBackward0) => prune_out_channels on _ElementWiseOp_18(AddBackwa

In [None]:
import torch
import torch.nn as nn

# Assuming you have a model with the pruned convolutional layer and dependent layers

# Get the pruned convolutional layer
pruned_layer = model.pruned_layer

# Get the number of filters in the pruned layer
num_filters_pruned = pruned_layer.out_channels

# Define the channel positions to add
channel_positions_to_add = [0, 2, 5, 8]  # Adjust this list as needed

# Calculate the number of filters to be added
new_filters = len(channel_positions_to_add)

# Create a new convolutional layer with the desired number of filters
new_conv_layer = nn.Conv2d(in_channels=num_filters_pruned + new_filters,
                           out_channels=num_filters_pruned,
                           kernel_size=pruned_layer.kernel_size,
                           stride=pruned_layer.stride,
                           padding=pruned_layer.padding,
                           bias=pruned_layer.bias is not None)

# Copy the weights from the pruned layer to the new layer
new_conv_layer.weight.data[:, :num_filters_pruned, :, :] = pruned_layer.weight.data.clone()

if pruned_layer.bias is not None:
    new_conv_layer.bias.data = pruned_layer.bias.data.clone()