In [73]:
'''ResNet in PyTorch.

For Pre-activation ResNet, see 'preact_resnet.py'.

Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet2(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet2, self).__init__()
        self.in_planes = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16,  num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 16, num_blocks[1], stride=1)
        self.layer3 = self._make_layer(block, 16, num_blocks[2], stride=1)
        self.layer4 = self._make_layer(block, 16, num_blocks[3], stride=1)
        self.linear = nn.Linear(1024*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        #print ('out = {}'.format(out.shape))
        out = F.avg_pool2d(out, 4)
        #print ('2d = {}'.format(out.shape))
        out = out.view(out.size(0), -1)
        #print ('view = {}'.format(out.shape))
        out = self.linear(out)
        return out

class ResNet3(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet3, self).__init__()
        self.in_planes = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16,  num_blocks[0], stride=2)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=1)
        self.layer4 = self._make_layer(block, 128, num_blocks[3], stride=1)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        #print ('out = {}'.format(out.shape))
        out = F.avg_pool2d(out, 4)
        #print ('2d = {}'.format(out.shape))
        out = out.view(out.size(0), -1)
        #print ('view = {}'.format(out.shape))
        out = self.linear(out)
        return out

class ResNet4(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet4, self).__init__()
        self.in_planes = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 64,  num_blocks[0], stride=2)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=1)
        self.layer4 = self._make_layer(block, 128, num_blocks[3], stride=1)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        #print ('out = {}'.format(out.shape))
        out = F.avg_pool2d(out, 4)
        #print ('2d = {}'.format(out.shape))
        out = out.view(out.size(0), -1)
        #print ('view = {}'.format(out.shape))
        out = self.linear(out)
        return out    

class ResNet5(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet5, self).__init__()
        self.in_planes = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 32,  num_blocks[0], stride=2)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=1)
        self.layer4 = self._make_layer(block, 64, num_blocks[3], stride=1)
        self.linear = nn.Linear(256*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        #print ('out = {}'.format(out.shape))
        out = F.avg_pool2d(out, 4)
        #print ('2d = {}'.format(out.shape))
        out = out.view(out.size(0), -1)
        #print ('view = {}'.format(out.shape))
        out = self.linear(out)
        return out

def ResNet_4():
    return ResNet4(BasicBlock, [2,2,2,2])
def ResNet_5():
    return ResNet5(BasicBlock, [2,2,2,2])
def ResNet_2():
    return ResNet2(BasicBlock, [2,2,2,2])

def ResNet_3():
    return ResNet3(BasicBlock, [2,2,2,2])

def ResNet18_original():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])


def test():
    net = ResNet18()
    y = net(torch.randn(1,3,32,32))
    print(y.size())

    
def test4():
    net = ResNet_4()
    y = net(torch.randn(1,3,32,32))
    print(y.size())
    print ('size = {}'.format(sum(p.numel() for p in net.parameters() if p.requires_grad)))
    
def test5():
    net = ResNet_5()
    y = net(torch.randn(1,3,32,32))
    print(y.size())
    print ('size = {}'.format(sum(p.numel() for p in net.parameters() if p.requires_grad)))
# test()

In [74]:
print ('4')
test4()
print ('\n5')
test5()

4
torch.Size([1, 10])
size = 952794

5
torch.Size([1, 10])
size = 354010


In [28]:
'''ResNet in PyTorch.

For Pre-activation ResNet, see 'preact_resnet.py'.

Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 16

        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16,  num_blocks[0], stride=2)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=1)
        self.layer4 = self._make_layer(block, 128, num_blocks[3], stride=1)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        #print ('out = {}'.format(out.shape))
        out = F.avg_pool2d(out, 4)
        #print ('2d = {}'.format(out.shape))
        out = out.view(out.size(0), -1)
        #print ('view = {}'.format(out.shape))
        out = self.linear(out)
        return out
    

def ResNet18_original():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])


def test():
    net = ResNet18()
    y = net(torch.randn(1,3,32,32))
    print(y.size())

# test()

In [39]:
teacher_model = ResNet_3()

sum(p.numel() for p in teacher_model.parameters() if p.requires_grad)

705594

In [102]:
11173962 / 48090

232

In [99]:
test()

out = torch.Size([1, 16, 32, 32])
2d = torch.Size([1, 16, 8, 8])
view = torch.Size([1, 1024])
torch.Size([1, 10])


In [108]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.num_channels = 32
        self.conv1 = nn.Conv2d(3, self.num_channels, 3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(self.num_channels)
        self.conv2 = nn.Conv2d(self.num_channels, self.num_channels * 4, 3, stride=2, padding=1)
        self.bn2 = nn.BatchNorm2d(self.num_channels * 4)
        self.fc1 = nn.Linear(4 * 4 * self.num_channels * 4, 10)
        self.dropout_rate = 0.5

    def forward(self, x):
        x = self.bn1(self.conv1(x))  # batch_size x num_channels x 32 x 32
        x = F.relu(F.max_pool2d(x, 2))  # batch_size x num_channels x 16 x 16
        x = self.bn2(self.conv2(x))  # batch_size x num_channels*2 x 16 x 16
        x = F.relu(F.max_pool2d(x, 2))  # batch_size x num_channels*2 x 8 x 8
        x = x.view(-1, 4 * 4 * self.num_channels * 4)  # batch_size x 4*4*num_channels*4
        x = self.fc1(x)
        return x

class Net4(nn.Module):
    def __init__(self):
        super(Net4, self).__init__()
        self.num_channels = 150
        self.conv1 = nn.Conv2d(3, self.num_channels, 3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(self.num_channels)
        self.conv2 = nn.Conv2d(self.num_channels, self.num_channels * 4, 3, stride=2, padding=1)
        self.bn2 = nn.BatchNorm2d(self.num_channels * 4)
        self.fc1 = nn.Linear(4 * 4 * self.num_channels * 4, 10)
        self.dropout_rate = 0.5

    def forward(self, x):
        x = self.bn1(self.conv1(x))  # batch_size x num_channels x 32 x 32
        x = F.relu(F.max_pool2d(x, 2))  # batch_size x num_channels x 16 x 16
        x = self.bn2(self.conv2(x))  # batch_size x num_channels*2 x 16 x 16
        x = F.relu(F.max_pool2d(x, 2))  # batch_size x num_channels*2 x 8 x 8
        x = x.view(-1, 4 * 4 * self.num_channels * 4)  # batch_size x 4*4*num_channels*4
        x = self.fc1(x)
        return x

class Net5(nn.Module):
    def __init__(self):
        super(Net5, self).__init__()
        self.num_channels = 90
        self.conv1 = nn.Conv2d(3, self.num_channels, 3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(self.num_channels)
        self.conv2 = nn.Conv2d(self.num_channels, self.num_channels * 4, 3, stride=2, padding=1)
        self.bn2 = nn.BatchNorm2d(self.num_channels * 4)
        self.fc1 = nn.Linear(4 * 4 * self.num_channels * 4, 10)
        self.dropout_rate = 0.5

    def forward(self, x):
        x = self.bn1(self.conv1(x))  # batch_size x num_channels x 32 x 32
        x = F.relu(F.max_pool2d(x, 2))  # batch_size x num_channels x 16 x 16
        x = self.bn2(self.conv2(x))  # batch_size x num_channels*2 x 16 x 16
        x = F.relu(F.max_pool2d(x, 2))  # batch_size x num_channels*2 x 8 x 8
        x = x.view(-1, 4 * 4 * self.num_channels * 4)  # batch_size x 4*4*num_channels*4
        x = self.fc1(x)
        return x

def t():
    net = Net4()
    y = net(torch.randn(2,3,32,32))
    print sum(p.numel() for p in net.parameters() if p.requires_grad)
    print(y.size())
    print ('\n')
    net = Net5()
    y = net(torch.randn(2,3,32,32))
    print sum(p.numel() for p in net.parameters() if p.requires_grad)
    print(y.size())

In [109]:
t()

912310
torch.Size([2, 10])


352990
torch.Size([2, 10])


In [None]:
580, 128, 32