<b>Pruning</b>

In [1]:
import numpy as np
import torch

seed = 100
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [2]:
# Dataset

from torchvision import datasets, transforms
import torch.utils as utils

transform = transforms.Compose([
    transforms.ToTensor()])

dataset_train = datasets.FashionMNIST(
    './data', 
    train=True, 
    download=True, 
    transform=transform)
dataset_test  = datasets.FashionMNIST(
    './data', 
    train=False, 
    download=True, 
    transform=transform)

batch_size = 1000

dataloader_train = utils.data.DataLoader(dataset_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=4)
dataloader_test  = utils.data.DataLoader(dataset_test,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=4)

In [3]:
# Network

import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv = nn.Sequential(OrderedDict([
        ("conv1", nn.Conv2d(1, 4, kernel_size=5)),
        ("relu1", nn.ReLU()),
        ("pool1", nn.MaxPool2d(2)),
        ("conv2", nn.Conv2d(4, 8, kernel_size=5)),
        ("relu2", nn.ReLU()),
        ("pool2", nn.MaxPool2d(2)),
    ]))
    self.fc = nn.Sequential(OrderedDict([
        ("fc1"  , nn.Linear(8 * 4 * 4, 100)),
        ("relu1", nn.ReLU()),
        ("fc2"  , nn.Linear(100, 10)),
    ]))

  def forward(self, x1):
    x2 = self.conv(x1)
    x3 = x2.view(x2.size()[0], -1)
    y  = self.fc(x3)
    return y

In [4]:
# Training

from torch import optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

for i in range(20):
  print(f"EPOCH: {i+1}")

  ### Train ###
  model.train()
  for x, t in dataloader_train:
    x = x.to(device)
    t = t.to(device)
    model.zero_grad()
    y = model(x)
    loss = criterion(y, t)
    loss.backward()
    optimizer.step()

  model.eval()
  sum_loss = 0.0
  sum_correct = 0
  sum_iter = 0
  for x, t in dataloader_train:
    x = x.to(device)
    t = t.to(device)
    y = model(x)
    loss = criterion(y, t)
    _, predicted = y.max(1)
    sum_loss += loss.cpu().detach().numpy()
    sum_correct += (predicted == t).sum().item()
    sum_iter += 1
  print(f"  train loss: {sum_loss/sum_iter}")
  print(f"  train acc : {sum_correct/(sum_iter*batch_size)}")

  ### Test ###
  model.eval()
  sum_loss = 0.0
  sum_correct = 0
  sum_iter = 0
  for x, t in dataloader_test:
    x = x.to(device)
    t = t.to(device)
    y = model(x)
    loss = criterion(y, t)
    _, predicted = y.max(1)
    sum_loss += loss.cpu().detach().numpy()
    sum_correct += (predicted == t).sum().item()
    sum_iter += 1
  print(f"  test  loss: {sum_loss/sum_iter}")
  print(f"  test  acc : {sum_correct/(sum_iter*batch_size)}")

EPOCH: 1
  train loss: 0.916900364557902
  train acc : 0.6814
  test  loss: 0.9292918026447297
  test  acc : 0.6745
EPOCH: 2
  train loss: 0.6961195518573126
  train acc : 0.7428
  test  loss: 0.7134989440441132
  test  acc : 0.7348
EPOCH: 3
  train loss: 0.6218432913223902
  train acc : 0.7687333333333334
  test  loss: 0.6417601704597473
  test  acc : 0.7603
EPOCH: 4
  train loss: 0.5812292496363322
  train acc : 0.7846833333333333
  test  loss: 0.6023806750774383
  test  acc : 0.7779
EPOCH: 5
  train loss: 0.5511354799071948
  train acc : 0.7942166666666667
  test  loss: 0.5718656063079834
  test  acc : 0.786
EPOCH: 6
  train loss: 0.5284350504477818
  train acc : 0.8028166666666666
  test  loss: 0.5491849839687347
  test  acc : 0.7965
EPOCH: 7
  train loss: 0.5044272114833196
  train acc : 0.8188666666666666
  test  loss: 0.5298647999763488
  test  acc : 0.8099
EPOCH: 8
  train loss: 0.4894048606355985
  train acc : 0.8225
  test  loss: 0.5142547398805618
  test  acc : 0.8147
EPOCH:

In [5]:
# Check Parameter after Training

print(model.conv.conv1.weight)

Parameter containing:
tensor([[[[-0.3643, -0.0470, -0.3058, -0.2179, -0.1069],
          [ 0.0260, -0.1455, -0.2726, -0.2356, -0.2645],
          [-0.1380,  0.1869,  0.2053,  0.1421, -0.0680],
          [ 0.2623,  0.3449,  0.2439, -0.0539, -0.0551],
          [-0.0584,  0.2615,  0.1393,  0.2441,  0.0101]]],


        [[[ 0.2303,  0.0603,  0.1114, -0.0284,  0.1350],
          [ 0.0573, -0.1712,  0.2318,  0.0889, -0.0943],
          [-0.2137, -0.0237,  0.1287, -0.0534,  0.2629],
          [-0.1983, -0.5207,  0.0641,  0.0929,  0.1013],
          [-0.2412, -0.5509, -0.0127,  0.3171,  0.1571]]],


        [[[ 0.0833,  0.2642,  0.1289,  0.1272,  0.2217],
          [ 0.1154,  0.2130,  0.0286,  0.2911,  0.2739],
          [-0.0471,  0.1686,  0.1004,  0.2821,  0.2578],
          [ 0.2181,  0.2648,  0.2042,  0.0778,  0.0007],
          [ 0.3234,  0.2484, -0.0234,  0.2185,  0.0533]]],


        [[[-0.2854,  0.3726,  0.2014,  0.3022,  0.1529],
          [-0.2578,  0.3927,  0.1203, -0.0054,  0.3152

In [6]:
# Pruning

import torch.nn.utils.prune as prune

parameters_to_prune = (
    (model.conv.conv1, "weight"),
    (model.conv.conv2, "weight"),
    (model.fc.fc1,     "weight"),
    (model.fc.fc2,     "weight"),
)

prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.9,
)

In [7]:
# Check Parameter after Pruning

print(model.conv.conv1.weight)

tensor([[[[-0.3643, -0.0000, -0.3058, -0.2179, -0.0000],
          [ 0.0000, -0.0000, -0.2726, -0.2356, -0.2645],
          [-0.0000,  0.1869,  0.2053,  0.0000, -0.0000],
          [ 0.2623,  0.3449,  0.2439, -0.0000, -0.0000],
          [-0.0000,  0.2615,  0.0000,  0.2441,  0.0000]]],


        [[[ 0.2303,  0.0000,  0.0000, -0.0000,  0.0000],
          [ 0.0000, -0.1712,  0.2318,  0.0000, -0.0000],
          [-0.2137, -0.0000,  0.0000, -0.0000,  0.2629],
          [-0.1983, -0.5207,  0.0000,  0.0000,  0.0000],
          [-0.2412, -0.5509, -0.0000,  0.3171,  0.1571]]],


        [[[ 0.0000,  0.2642,  0.0000,  0.0000,  0.2217],
          [ 0.0000,  0.2130,  0.0000,  0.2911,  0.2739],
          [-0.0000,  0.1686,  0.0000,  0.2821,  0.2578],
          [ 0.2181,  0.2648,  0.2042,  0.0000,  0.0000],
          [ 0.3234,  0.2484, -0.0000,  0.2185,  0.0000]]],


        [[[-0.2854,  0.3726,  0.2014,  0.3022,  0.0000],
          [-0.2578,  0.3927,  0.0000, -0.0000,  0.3152],
          [-0.0000,

In [8]:
# Retraining

for i in range(20):
  print(f"EPOCH: {i+1}")

  ### Train ###
  model.train()
  for x, t in dataloader_train:
    x = x.to(device)
    t = t.to(device)
    model.zero_grad()
    y = model(x)
    loss = criterion(y, t)
    loss.backward()
    optimizer.step()

  model.eval()
  sum_loss = 0.0
  sum_correct = 0
  sum_iter = 0
  for x, t in dataloader_train:
    x = x.to(device)
    t = t.to(device)
    y = model(x)
    loss = criterion(y, t)
    _, predicted = y.max(1)
    sum_loss += loss.cpu().detach().numpy()
    sum_correct += (predicted == t).sum().item()
    sum_iter += 1
  print(f"  train loss: {sum_loss/sum_iter}")
  print(f"  train acc : {sum_correct/(sum_iter*batch_size)}")

  ### Test ###
  model.eval()
  sum_loss = 0.0
  sum_correct = 0
  sum_iter = 0
  for x, t in dataloader_test:
    x = x.to(device)
    t = t.to(device)
    y = model(x)
    loss = criterion(y, t)
    _, predicted = y.max(1)
    sum_loss += loss.cpu().detach().numpy()
    sum_correct += (predicted == t).sum().item()
    sum_iter += 1
  print(f"  test  loss: {sum_loss/sum_iter}")
  print(f"  test  acc : {sum_correct/(sum_iter*batch_size)}")

EPOCH: 1
  train loss: 0.6670460641384125
  train acc : 0.7783333333333333
  test  loss: 0.6772859036922455
  test  acc : 0.7749
EPOCH: 2
  train loss: 0.5438341716925303
  train acc : 0.8072
  test  loss: 0.5615907907485962
  test  acc : 0.8017
EPOCH: 3
  train loss: 0.5016207868854204
  train acc : 0.8201333333333334
  test  loss: 0.5223968356847764
  test  acc : 0.8145
EPOCH: 4
  train loss: 0.48009795347849527
  train acc : 0.8272333333333334
  test  loss: 0.5029191106557847
  test  acc : 0.8206
EPOCH: 5
  train loss: 0.4636027877529462
  train acc : 0.8336666666666667
  test  loss: 0.4869105309247971
  test  acc : 0.8245
EPOCH: 6
  train loss: 0.45313053379456203
  train acc : 0.8367833333333333
  test  loss: 0.47716168463230135
  test  acc : 0.8267
EPOCH: 7
  train loss: 0.44468495398759844
  train acc : 0.84295
  test  loss: 0.47002136409282685
  test  acc : 0.8326
EPOCH: 8
  train loss: 0.4373081142703692
  train acc : 0.84595
  test  loss: 0.461372309923172
  test  acc : 0.834

In [9]:
# Check Parameter after Retraining

print(model.conv.conv1.weight)

tensor([[[[-0.5071, -0.0000, -0.3762, -0.3596, -0.0000],
          [ 0.0000, -0.0000, -0.3131, -0.3259, -0.2814],
          [-0.0000,  0.2081,  0.2791,  0.0000, -0.0000],
          [ 0.3514,  0.3764,  0.3313, -0.0000, -0.0000],
          [-0.0000,  0.3709,  0.0000,  0.3380,  0.0000]]],


        [[[ 0.2791,  0.0000,  0.0000, -0.0000,  0.0000],
          [ 0.0000, -0.1832,  0.3585,  0.0000, -0.0000],
          [-0.2535, -0.0000,  0.0000, -0.0000,  0.4631],
          [-0.1945, -0.5588,  0.0000,  0.0000,  0.0000],
          [-0.3488, -0.7348, -0.0000,  0.5591,  0.3718]]],


        [[[ 0.0000,  0.4769,  0.0000,  0.0000,  0.4905],
          [ 0.0000,  0.3406,  0.0000,  0.5878,  0.5339],
          [-0.0000,  0.3267,  0.0000,  0.5620,  0.4551],
          [ 0.3397,  0.4492,  0.5112,  0.0000, -0.0000],
          [ 0.4447,  0.3937, -0.0000,  0.4002,  0.0000]]],


        [[[-0.1943,  0.4619,  0.4217,  0.5668,  0.0000],
          [-0.2825,  0.4541,  0.0000, -0.0000,  0.4516],
          [-0.0000,

参考<br>
https://pytorch.org/tutorials/intermediate/pruning_tutorial.html<br>
https://pytorch.org/docs/master/generated/torch.nn.utils.prune.global_unstructured.html