Pruners refactor (#1820)

microsoft · Dec 12, 2019 · ac6f420 · ac6f420
1 parent 9484efb
commit ac6f420
Show file tree

Hide file tree

Showing 6 changed files with 295 additions and 311 deletions.
diff --git a/..._compress/L1_filter_pruner_torch_vgg16.py → examples/model_compress/L1_torch_cifar10.py b/..._compress/L1_filter_pruner_torch_vgg16.py → examples/model_compress/L1_torch_cifar10.py
@@ -4,59 +4,7 @@
 import torch.nn.functional as F
 from torchvision import datasets, transforms
 from nni.compression.torch import L1FilterPruner
-
-
-class vgg(nn.Module):
-    def __init__(self, init_weights=True):
-        super(vgg, self).__init__()
-        cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512]
-        self.cfg = cfg
-        self.feature = self.make_layers(cfg, True)
-        num_classes = 10
-        self.classifier = nn.Sequential(
-            nn.Linear(cfg[-1], 512),
-            nn.BatchNorm1d(512),
-            nn.ReLU(inplace=True),
-            nn.Linear(512, num_classes)
-        )
-        if init_weights:
-            self._initialize_weights()
-
-    def make_layers(self, cfg, batch_norm=True):
-        layers = []
-        in_channels = 3
-        for v in cfg:
-            if v == 'M':
-                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
-            else:
-                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False)
-                if batch_norm:
-                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
-                else:
-                    layers += [conv2d, nn.ReLU(inplace=True)]
-                in_channels = v
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.feature(x)
-        x = nn.AvgPool2d(2)(x)
-        x = x.view(x.size(0), -1)
-        y = self.classifier(x)
-        return y
-
-    def _initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-                if m.bias is not None:
-                    m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(0.5)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.Linear):
-                m.weight.data.normal_(0, 0.01)
-                m.bias.data.zero_()
+from models.cifar10.vgg import VGG
 
 
 def train(model, device, train_loader, optimizer):
@@ -111,7 +59,7 @@ def main():
         ])),
         batch_size=200, shuffle=False)
 
-    model = vgg()
+    model = VGG(depth=16)
     model.to(device)
 
     # Train the base VGG-16 model
@@ -162,7 +110,7 @@ def main():
 
     # Test the exported model
     print('=' * 10 + 'Test on the pruned model after fine tune' + '=' * 10)
-    new_model = vgg()
+    new_model = VGG(depth=16)
     new_model.to(device)
     new_model.load_state_dict(torch.load('pruned_vgg16_cifar10.pth'))
     test(new_model, device, test_loader)

diff --git a/examples/model_compress/models/cifar10/vgg.py b/examples/model_compress/models/cifar10/vgg.py
@@ -0,0 +1,63 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+defaultcfg = {
+    11: [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512],
+    13: [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512],
+    16: [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512],
+    19: [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512],
+}
+
+
+class VGG(nn.Module):
+    def __init__(self, depth=16):
+        super(VGG, self).__init__()
+        cfg = defaultcfg[depth]
+        self.cfg = cfg
+        self.feature = self.make_layers(cfg, True)
+        num_classes = 10
+        self.classifier = nn.Sequential(
+            nn.Linear(cfg[-1], 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(inplace=True),
+            nn.Linear(512, num_classes)
+        )
+        self._initialize_weights()
+
+    def make_layers(self, cfg, batch_norm=False):
+        layers = []
+        in_channels = 3
+        for v in cfg:
+            if v == 'M':
+                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
+            else:
+                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False)
+                if batch_norm:
+                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
+                else:
+                    layers += [conv2d, nn.ReLU(inplace=True)]
+                in_channels = v
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.feature(x)
+        x = nn.AvgPool2d(2)(x)
+        x = x.view(x.size(0), -1)
+        y = self.classifier(x)
+        return y
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(0.5)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                m.weight.data.normal_(0, 0.01)
+                m.bias.data.zero_()
diff --git a/examples/model_compress/pruning_kd.py b/examples/model_compress/pruning_kd.py
@@ -5,59 +5,7 @@
 from torchvision import datasets, transforms
 from nni.compression.torch import L1FilterPruner
 from knowledge_distill.knowledge_distill import KnowledgeDistill
-
-
-class vgg(nn.Module):
-    def __init__(self, init_weights=True):
-        super(vgg, self).__init__()
-        cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512]
-        self.cfg = cfg
-        self.feature = self.make_layers(cfg, True)
-        num_classes = 10
-        self.classifier = nn.Sequential(
-            nn.Linear(cfg[-1], 512),
-            nn.BatchNorm1d(512),
-            nn.ReLU(inplace=True),
-            nn.Linear(512, num_classes)
-        )
-        if init_weights:
-            self._initialize_weights()
-
-    def make_layers(self, cfg, batch_norm=True):
-        layers = []
-        in_channels = 3
-        for v in cfg:
-            if v == 'M':
-                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
-            else:
-                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False)
-                if batch_norm:
-                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
-                else:
-                    layers += [conv2d, nn.ReLU(inplace=True)]
-                in_channels = v
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.feature(x)
-        x = nn.AvgPool2d(2)(x)
-        x = x.view(x.size(0), -1)
-        y = self.classifier(x)
-        return y
-
-    def _initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-                if m.bias is not None:
-                    m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(0.5)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.Linear):
-                m.weight.data.normal_(0, 0.01)
-                m.bias.data.zero_()
+from models.cifar10.vgg import VGG
 
 
 def train(model, device, train_loader, optimizer, kd=None):
@@ -119,7 +67,7 @@ def main():
         ])),
         batch_size=200, shuffle=False)
 
-    model = vgg()
+    model = VGG(depth=16)
     model.to(device)
 
     # Train the base VGG-16 model
@@ -156,7 +104,7 @@ def main():
     print('=' * 10 + 'Fine tuning' + '=' * 10)
     optimizer_finetune = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
     best_top1 = 0
-    kd_teacher_model = vgg()
+    kd_teacher_model = VGG(depth=16)
     kd_teacher_model.to(device)
     kd_teacher_model.load_state_dict(torch.load('vgg16_cifar10.pth'))
     kd = KnowledgeDistill(kd_teacher_model, kd_T=5)
@@ -173,7 +121,7 @@ def main():
 
     # Test the exported model
     print('=' * 10 + 'Test on the pruned model after fine tune' + '=' * 10)
-    new_model = vgg()
+    new_model = VGG(depth=16)
     new_model.to(device)
     new_model.load_state_dict(torch.load('pruned_vgg16_cifar10.pth'))
     test(new_model, device, test_loader)

diff --git a/...model_compress/slim_pruner_torch_vgg19.py → ...ples/model_compress/slim_torch_cifar10.py b/...model_compress/slim_pruner_torch_vgg19.py → ...ples/model_compress/slim_torch_cifar10.py
@@ -4,53 +4,7 @@
 import torch.nn.functional as F
 from torchvision import datasets, transforms
 from nni.compression.torch import SlimPruner
-
-
-class vgg(nn.Module):
-    def __init__(self, init_weights=True):
-        super(vgg, self).__init__()
-        cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512]
-        self.feature = self.make_layers(cfg, True)
-        num_classes = 10
-        self.classifier = nn.Linear(cfg[-1], num_classes)
-        if init_weights:
-            self._initialize_weights()
-
-    def make_layers(self, cfg, batch_norm=False):
-        layers = []
-        in_channels = 3
-        for v in cfg:
-            if v == 'M':
-                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
-            else:
-                conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False)
-                if batch_norm:
-                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
-                else:
-                    layers += [conv2d, nn.ReLU(inplace=True)]
-                in_channels = v
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.feature(x)
-        x = nn.AvgPool2d(2)(x)
-        x = x.view(x.size(0), -1)
-        y = self.classifier(x)
-        return y
-
-    def _initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-                if m.bias is not None:
-                    m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(0.5)
-                m.bias.data.zero_()
-            elif isinstance(m, nn.Linear):
-                m.weight.data.normal_(0, 0.01)
-                m.bias.data.zero_()
+from models.cifar10.vgg import VGG
 
 
 def updateBN(model):
@@ -114,7 +68,7 @@ def main():
         ])),
         batch_size=200, shuffle=False)
 
-    model = vgg()
+    model = VGG(depth=19)
     model.to(device)
 
     # Train the base VGG-19 model
@@ -165,7 +119,7 @@ def main():
 
     # Test the exported model
     print('=' * 10 + 'Test the export pruned model after fine tune' + '=' * 10)
-    new_model = vgg()
+    new_model = VGG(depth=19)
     new_model.to(device)
     new_model.load_state_dict(torch.load('pruned_vgg19_cifar10.pth'))
     test(new_model, device, test_loader)