In [1]:
import torch
from torch import nn
import torch.nn.utils.prune as prune
import torch.nn.functional as F

In [2]:
torch.manual_seed(777)
torch.cuda.manual_seed_all(777)

GPU_NUM = 1 # 사용 할 GPU Num 설정
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())
print(torch.cuda.get_device_name(device))

print("cpu와 cuda 중 다음 기기로 학습함:", device, '\n')

Available devices  2
Current cuda device  1
GeForce RTX 2080 Ti
cpu와 cuda 중 다음 기기로 학습함: cuda:1 



In [3]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square conv kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5x5 image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, int(x.nelement() / x.shape[0]))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [4]:
# 서로 다른 2개의 model 생성
model1 = LeNet().to(device)
model2 = LeNet().to(device)

In [5]:
model1

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [6]:
model2

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [7]:
module1 = model1.conv1
print(list(module1.named_parameters()))

[('weight', Parameter containing:
tensor([[[[-0.2787, -0.0059, -0.0645],
          [-0.0761,  0.2542,  0.2541],
          [ 0.1495,  0.0022,  0.2166]]],


        [[[-0.1577, -0.1258,  0.0632],
          [-0.2605,  0.0809,  0.1567],
          [ 0.3265,  0.2777, -0.1169]]],


        [[[ 0.2477,  0.2583,  0.2690],
          [-0.1864, -0.0669, -0.1780],
          [ 0.2941,  0.2786,  0.2635]]],


        [[[-0.0440, -0.2318, -0.1231],
          [-0.0819,  0.0778,  0.0237],
          [ 0.3314, -0.0815,  0.3027]]],


        [[[ 0.2188,  0.0973,  0.0363],
          [-0.0865, -0.2869, -0.0754],
          [ 0.1529,  0.2711, -0.3294]]],


        [[[-0.3166, -0.0336, -0.1902],
          [-0.0712,  0.0088,  0.3326],
          [-0.2331, -0.1208,  0.2693]]]], device='cuda:1', requires_grad=True)), ('bias', Parameter containing:
tensor([ 0.1990,  0.2356, -0.2919,  0.1017,  0.3330,  0.0286], device='cuda:1',
       requires_grad=True))]


In [8]:
module2 = model2.conv1
print(list(module2.named_parameters()))

[('weight', Parameter containing:
tensor([[[[-0.0514, -0.2635,  0.2106],
          [-0.1219,  0.2052,  0.0304],
          [ 0.3253, -0.0604,  0.1738]]],


        [[[ 0.2386, -0.0527,  0.1259],
          [-0.0564, -0.2797,  0.0983],
          [ 0.0872, -0.0875, -0.1945]]],


        [[[-0.3241,  0.2657, -0.0745],
          [ 0.1609, -0.2248, -0.1198],
          [-0.2865, -0.2915, -0.1375]]],


        [[[-0.3062,  0.2593, -0.2413],
          [-0.1232,  0.1282,  0.2384],
          [-0.2139, -0.1481, -0.2750]]],


        [[[-0.3030, -0.1592,  0.2061],
          [-0.1907, -0.0852,  0.2517],
          [ 0.1138,  0.2115, -0.0989]]],


        [[[ 0.3036,  0.1325,  0.1421],
          [ 0.2825,  0.0858, -0.3250],
          [-0.0609,  0.3233, -0.2708]]]], device='cuda:1', requires_grad=True)), ('bias', Parameter containing:
tensor([ 0.1596, -0.2679, -0.2416,  0.1971, -0.1939,  0.2151], device='cuda:1',
       requires_grad=True))]


In [9]:
# model1 conv1에 대하여 random pruning 진행
prune.random_unstructured(module1, name='weight', amount=0.3)
prune.random_unstructured(module1, name='bias', amount=0.3)

Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))

In [10]:
# mask 확인
print(list(module1.named_buffers()))

[('weight_mask', tensor([[[[1., 1., 0.],
          [0., 1., 1.],
          [1., 1., 0.]]],


        [[[1., 1., 1.],
          [1., 1., 1.],
          [0., 0., 0.]]],


        [[[1., 1., 0.],
          [1., 0., 1.],
          [1., 1., 1.]]],


        [[[1., 0., 0.],
          [0., 1., 1.],
          [1., 1., 1.]]],


        [[[1., 1., 1.],
          [1., 0., 1.],
          [1., 1., 0.]]],


        [[[0., 0., 1.],
          [1., 1., 1.],
          [1., 0., 1.]]]], device='cuda:1')), ('bias_mask', tensor([1., 1., 1., 0., 0., 1.], device='cuda:1'))]


In [11]:
# mask에 따라 pruning된 parameter 확인
print(module1.weight)

tensor([[[[-0.2787, -0.0059, -0.0000],
          [-0.0000,  0.2542,  0.2541],
          [ 0.1495,  0.0022,  0.0000]]],


        [[[-0.1577, -0.1258,  0.0632],
          [-0.2605,  0.0809,  0.1567],
          [ 0.0000,  0.0000, -0.0000]]],


        [[[ 0.2477,  0.2583,  0.0000],
          [-0.1864, -0.0000, -0.1780],
          [ 0.2941,  0.2786,  0.2635]]],


        [[[-0.0440, -0.0000, -0.0000],
          [-0.0000,  0.0778,  0.0237],
          [ 0.3314, -0.0815,  0.3027]]],


        [[[ 0.2188,  0.0973,  0.0363],
          [-0.0865, -0.0000, -0.0754],
          [ 0.1529,  0.2711, -0.0000]]],


        [[[-0.0000, -0.0000, -0.1902],
          [-0.0712,  0.0088,  0.3326],
          [-0.2331, -0.0000,  0.2693]]]], device='cuda:1',
       grad_fn=<MulBackward0>)


In [12]:
print(module1.bias)

tensor([ 0.1990,  0.2356, -0.2919,  0.0000,  0.0000,  0.0286], device='cuda:1',
       grad_fn=<MulBackward0>)


In [13]:
print(model1.state_dict().keys())

odict_keys(['conv1.weight_orig', 'conv1.bias_orig', 'conv1.weight_mask', 'conv1.bias_mask', 'conv2.weight', 'conv2.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias'])


In [14]:
print(module1.state_dict().keys())

odict_keys(['weight_orig', 'bias_orig', 'weight_mask', 'bias_mask'])


In [15]:
print(model2.conv1.bias_mask)

AttributeError: 'Conv2d' object has no attribute 'bias_mask'

In [16]:
class FooBarPruningMethod(prune.BasePruningMethod):
    """Prune every other entry in a tensor
    """
    PRUNING_TYPE = 'structured'

    def compute_mask(self, t, default_mask):
        mask = default_mask.clone()
        mask.view(-1)[::2] = 0
        return mask
    
def foobar_unstructured(module, name):
    """Prunes tensor corresponding to parameter called `name` in `module`
    by removing every other entry in the tensors.
    Modifies module in place (and also return the modified module)
    by:
    1) adding a named buffer called `name+'_mask'` corresponding to the
    binary mask applied to the parameter `name` by the pruning method.
    The parameter `name` is replaced by its pruned version, while the
    original (unpruned) parameter is stored in a new parameter named
    `name+'_orig'`.

    Args:
        module (nn.Module): module containing the tensor to prune
        name (string): parameter name within `module` on which pruning
                will act.

    Returns:
        module (nn.Module): modified (i.e. pruned) version of the input
            module

    Examples:
        >>> m = nn.Linear(3, 4)
        >>> foobar_unstructured(m, name='bias')
    """
    FooBarPruningMethod.apply(module, name)
    return module

In [17]:
foobar_unstructured(model2.conv1, name='bias')
print(model2.conv1.bias_mask)

tensor([0., 1., 0., 1., 0., 1.], device='cuda:1')


In [18]:
model2.conv1.bias_mask = model1.conv1.bias_mask

In [19]:
print(model2.conv1.bias_mask)

tensor([1., 1., 1., 0., 0., 1.], device='cuda:1')


In [None]:
print(module1.state_dict().keys())

In [20]:
print(module2.state_dict().keys())

odict_keys(['weight', 'bias_orig', 'bias_mask'])


In [23]:
print(module2.bias_mask)

tensor([ 0.0000, -0.2679, -0.0000,  0.1971, -0.0000,  0.2151], device='cuda:1',
       grad_fn=<MulBackward0>)


In [24]:
module2.bias = module1.bias

In [25]:
print(module2.bias)

tensor([ 0.1990,  0.2356, -0.2919,  0.0000,  0.0000,  0.0286], device='cuda:1',
       grad_fn=<MulBackward0>)


In [26]:
print(module2.bias)

tensor([ 0.1990,  0.2356, -0.2919,  0.0000,  0.0000,  0.0286], device='cuda:1',
       grad_fn=<MulBackward0>)


In [28]:
print(module2._forward_pre_hooks)

OrderedDict([(2, <__main__.FooBarPruningMethod object at 0x7f3b91e98810>)])


In [29]:
print(module2.bias)

tensor([ 0.1990,  0.2356, -0.2919,  0.0000,  0.0000,  0.0286], device='cuda:1',
       grad_fn=<MulBackward0>)


In [None]:
foobar_unstructured(model2.conv1, name='weight')

In [None]:
print(model2.conv1.weight_mask)

In [None]:
print(module2.state_dict().keys())

In [None]:
model2.conv1.weight_mask = model1.conv1.weight_mask

In [None]:
print(model2.conv1.weight_mask)

In [None]:
print(model2.conv1.weight)

In [None]:
print(module1.weight)

In [None]:
print()