# Target Dropout: Column wise target dropout based on the different PE-group sizes

According to the PE-wise threshold-based group lasso pruning(Yang, AAAI 2020), they consider the PE-group size as the basic pruning unit then perform the structured pruning to introduce the sparsity into the model.

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import seaborn as sns;sns.set()
torch.manual_seed(0)

<torch._C.Generator at 0x1109bd6d0>

In this report, I'm going to reshpe the 4-D weight tensor into 2-D matrix based on the different PE-group sizes. The PE-size varying from 16 to 2. Follow the Target Dropout paper, we first load the ResNet32 model:

In [2]:
check_point = torch.load('./decay0.0002_fp_fflf_resnet32/model_best.pth.tar', map_location='cpu')
param = check_point['state_dict']

layers = param.items()
conv_layers = {}

for k,v in layers:
    if len(v.size()) == 4:
        print(f"Layer: {k}, shape: {list(v.size())}")
        conv_layers.update({k:v})

Layer: conv_1_3x3.weight, shape: [16, 3, 3, 3]
Layer: stage_1.0.conv_a.weight, shape: [16, 16, 3, 3]
Layer: stage_1.0.conv_b.weight, shape: [16, 16, 3, 3]
Layer: stage_1.1.conv_a.weight, shape: [16, 16, 3, 3]
Layer: stage_1.1.conv_b.weight, shape: [16, 16, 3, 3]
Layer: stage_1.2.conv_a.weight, shape: [16, 16, 3, 3]
Layer: stage_1.2.conv_b.weight, shape: [16, 16, 3, 3]
Layer: stage_1.3.conv_a.weight, shape: [16, 16, 3, 3]
Layer: stage_1.3.conv_b.weight, shape: [16, 16, 3, 3]
Layer: stage_1.4.conv_a.weight, shape: [16, 16, 3, 3]
Layer: stage_1.4.conv_b.weight, shape: [16, 16, 3, 3]
Layer: stage_2.0.conv_a.weight, shape: [32, 16, 3, 3]
Layer: stage_2.0.conv_b.weight, shape: [32, 32, 3, 3]
Layer: stage_2.1.conv_a.weight, shape: [32, 32, 3, 3]
Layer: stage_2.1.conv_b.weight, shape: [32, 32, 3, 3]
Layer: stage_2.2.conv_a.weight, shape: [32, 32, 3, 3]
Layer: stage_2.2.conv_b.weight, shape: [32, 32, 3, 3]
Layer: stage_2.3.conv_a.weight, shape: [32, 32, 3, 3]
Layer: stage_2.3.conv_b.weight, sha

Use the second convolutional layer of the second stage as the example, reshape the 4-D tensor into 2-D matrix based on the different PE-group size.

In [3]:
w_l = conv_layers['stage_2.1.conv_a.weight']
print(f"example layer: {list(w_l.size())}")

example layer: [32, 32, 3, 3]


In [4]:
grp_size = [2, 4, 8, 16]
print(f"sweep group size: {grp_size}")

sweep group size: [2, 4, 8, 16]


In [5]:
def reshape_2_2D(input, g):
    w_i = input
    num_group = w_i.size(0) * w_i.size(1) // g 
    
    reshape_layer = w_i.view(num_group, g * w_i.size(2) * w_i.size(3))  # reshape the weight tensor into 4-D matrix
    return reshape_layer

for i, g in enumerate(grp_size):
    w_i = w_l
    
    reshape_layer = reshape_2_2D(w_i, g)
    print(f"group size={g}, shape={list(reshape_layer.size())}")

group size=2, shape=[512, 18]
group size=4, shape=[256, 36]
group size=8, shape=[128, 72]
group size=16, shape=[64, 144]


In [6]:
def forward(input, col_size=4, alpha=0.5, gamma=0.5):
    w_i = reshape_2_2D(input, col_size)
    print(f"group size={col_size}, shape={list(w_i.size())}")
    
    grp_values = w_i.norm(p=2, dim=1)
    print(f'grp values size={grp_values.size()}')
    
    sorted_col, indices = torch.sort(grp_values.contiguous().view(-1), dim=0)
    print(sorted_col.size())

    th_idx = int(grp_values.numel() * gamma)
    threshold = sorted_col[th_idx]
    print(f"threshold L2 norm: {threshold}, idx={th_idx}")
    
    mask_small = 1 - grp_values.gt(threshold).float() # mask for blocks candidates for pruning
    mask_dropout = torch.rand_like(grp_values).lt(alpha).float()
    
    mask_keep = 1 - mask_small * mask_dropout
    
    mask_keep_2d = mask_keep.view(w_i.size(0),1).expand(w_i.size()) 
    print(mask_keep)
    print(mask_keep_2d[6,:])

    
    mask_keep_original = mask_keep_2d.vi(input)
    mask_keep_2d_test = mask_keep_original.resize_as_(w_i)
    
    return mask_keep_original, mask_keep_2d_test

In [None]:
mask_keep_original_test, mask_keep_2d_test = forward(w_l)
print(mask_keep_2d_test[4,:].float())

In [None]:
# 2D to 4D: Try with random tensor
x = torch.randn((4,4,2,2))

print(x[0])

In [None]:
grp_size = 2
num_group = x.size(0) * x.size(1) // grp_size

x_2d = x.resize_(num_group, g * x.size(2) * x.size(3))
print(x_2d.size())