<a href="https://colab.research.google.com/github/moh2236945/CNNs/blob/master/Implementations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch.nn as nn
import torch.utils.model_zoo as model_zoo

__help__="you can call VGGnet(kind='vgg16',num_classes=1000,batch_norm=False,pretrained=False) to get a vgg net,\
         you can use __all__ to get the compelete vggnet choose.\
         if you want to use vggxx_bn you should not give the parameter kind='vggxx_bn',\
         you should also give the kind='vggxx_bn' but another parameter batch_norm=True"

__all__=[
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]


model_urls = {
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
    'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
    'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
    'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
    'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
}

class VGG(nn.Module):
    
    def __init__(self,features,num_classes=1000,init_weights=True):
        super(VGG, self).__init__()
        self.features=features
        self.classifier=nn.Sequential(
            nn.Linear(512*7*7,4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096,4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096,num_classes)
            )
        self.conv1x1=nn.Conv2d(512,num_classes,kernel_size=1,stride=1)
        if init_weights:
            self._initialize_weights()

    def forward(self,x):
        x=self.features(x)
        x=self.conv1x1(x)
        x=x.view(x.size(0),-1)
        #x=self.classifier(x)
        
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m,nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias,0)
            elif isinstance(m,nn.BatchNorm2d):
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)
            elif isinstance(m,nn.Linear):
                nn.init.normal_(m.weight,0,0.01)
                nn.init.constant_(m.bias,0)

cfg={
    'vgg11':[64,'M',128,'M',256,256,'M',512,512,'M',512,512,'M'], #11 weight layers
    'vgg13':[64,64,'M',128,128,'M',256,256,'M',512,512,'M',512,512,'M'], #13 weight layers
    'vgg16':[64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,'M'], #16 weight layers
    'vgg19':[64,64,'M',128,128,'M',256,256,256,256,'M',512,512,512,512,'M',512,512,512,512,'M'], #19 weight layers
}
def make_layers(cfg,batch_norm=False):
    layers=[]
    in_channels=3
    for v in cfg:
        if v=='M':
            layers+=[nn.MaxPool2d(kernel_size=2,stride=2)]
        else:
            if batch_norm:
                layers+=[nn.Conv2d(in_channels,v,kernel_size=3,padding=1,stride=1,bias=False),
                        nn.BatchNorm2d(v),nn.ReLU(True)]
            else:
                layers+=[nn.Conv2d(in_channels,v,kernel_size=3,padding=1,stride=1),nn.ReLU(True)]
            in_channels=v
    return nn.Sequential(*layers)

def VGGnet(kind='vgg16',num_classes=1000,batch_norm=False,pretrained=False,**kwargs):
    if pretrained:
        kwargs['init_weights']=False
        assert num_classes==1000,\
            'pretrained model only on ImageNet which num classes is 1000 but got{}'.format(num_classes)
    model=VGG(make_layers(cfg[kind],batch_norm),num_classes,**kwargs)
    if pretrained:
        name=kind
        if batch_norm==True:
            name+='_bn'
        model.load_state_dict(model_zoo.load_url(model_urls[name]))
    return model

if __name__ == '__main__':
    a=nn.Conv2d(1,2,kernel_size=1,bias=False)
    print(a.bias)
    model=VGGnet(kind='vgg16',num_classes=10,batch_norm=True)
    print(model)

None
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, d

In [1]:
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F

__all__ = ['DenseNet', 'densenet121', 'densenet169', 'densenet201', 'densenet264', 'densenet29', 'densenet45',
           'densenet85']


class _DenseLayer(nn.Sequential):
    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
        super(_DenseLayer, self).__init__()
        self.add_module('norm1', nn.BatchNorm2d(num_input_features))
        self.add_module('relu1', nn.ReLU(inplace=True))
        self.add_module('conv1', nn.Conv2d(num_input_features,
                                           bn_size * growth_rate, kernel_size=1, stride=1, bias=False))
        self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate))
        self.add_module('relu2', nn.ReLU(inplace=True))
        self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
                                           kernel_size=3, stride=1, padding=1, bias=False))
        self.drop_rate = drop_rate

    def forward(self, input):
        new_features = super(_DenseLayer, self).forward(input)
        if self.drop_rate > 0:
            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
        return torch.cat([input, new_features], 1)


class _DenseBlock(nn.Sequential):
    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
        super(_DenseBlock, self).__init__()
        for i in range(num_layers):
            layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
            self.add_module('denselayer%d' % (i + 1), layer)


class _Transition(nn.Sequential):
    def __init__(self, num_input_features, num_output_features):
        super(_Transition, self).__init__()
        self.add_module('norm', nn.BatchNorm2d(num_input_features))
        self.add_module('relu', nn.ReLU(inplace=True))
        self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
                                          kernel_size=1, stride=1, bias=False))
        self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))


class DenseNet(nn.Module):
    """
    growth_rate (int) - how many filters to add each layer (`k` in paper)
    block_config (list of 4 ints) - how many layers in each pooling block
    num_init_features (int) - the number of filters to learn in the first convolution layer
    bn_size (int) - multiplicative factor for number of bottle neck layers
      (i.e. bn_size * k features in the bottleneck layer)
    drop_rate (float) - dropout rate after each dense layer
    num_classes (int) - number of classification classes
    """

    def __init__(self, growth_rate=12, block_config=(6, 12, 24, 16),
                 num_init_feature=24, bn_size=4, drop_rate=0, num_classes=1000):
        super(DenseNet, self).__init__()

        # Firsrt convolution before dense block
        self.features = nn.Sequential(OrderedDict([
            ('conv0', nn.Conv2d(3, num_init_feature, kernel_size=7, stride=2, padding=3, bias=False)),
            ('norm0', nn.BatchNorm2d(num_init_feature)),
            ('relu0', nn.ReLU(inplace=True)),
            ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))])
        )

        num_features = num_init_feature
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
                                bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
            self.features.add_module('denseblock%d' % (i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
                self.features.add_module('transition%d' % (i + 1), trans)
                num_features = num_features // 2

        self.features.add_module('norm5', nn.BatchNorm2d(num_features))
        self.features.add_module('relu5', nn.ReLU(inplace=True))
        self.features.add_module('avgpool', nn.AdaptiveAvgPool2d((1, 1)))

        self.classifier = nn.Conv2d(num_features, num_classes)

        for m in self.modules():
            if isinstance(m,nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
            elif isinstance(m,nn.BatchNorm2d):
                nn.init.constant_(m.weight,1)
            elif isinstance(m,nn.Linear):
                nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')

            nn.init.constant_(m.bias, 0)

    def forward(self, input):
        features = self.features(input)
        out = self.classifier(features).view(input.size(0), -1)
        return out


def densenet121(pretrained=False, **kwargs):
    model = DenseNet(num_init_feature=64, growth_rate=32, block_config=(6, 12, 24, 16), **kwargs)
    return model


def densenet169(pretrained=False, **kwargs):
    model = DenseNet(num_init_feature=64, growth_rate=32, block_config=(6, 12, 32, 32), **kwargs)
    return model


def densenet201(pretrained=False, **kwargs):
    model = DenseNet(num_init_feature=64, growth_rate=32, block_config=(6, 12, 48, 32), **kwargs)
    return model


def densenet264(pretrained=False, **kwargs):
    model = DenseNet(num_init_feature=64, growth_rate=32, block_config=(6, 12, 64, 48), **kwargs)
    return model


class DenseNet_CIFAR10(nn.Module):
    """
    growth_rate (int) - how many filters to add each layer (`k` in paper)
    block_config (list of 4 ints) - how many layers in each pooling block
    num_init_features (int) - the number of filters to learn in the first convolution layer
    bn_size (int) - multiplicative factor for number of bottle neck layers
      (i.e. bn_size * k features in the bottleneck layer)
    drop_rate (float) - dropout rate after each dense layer
    num_classes (int) - number of classification classes
    """

    def __init__(self, growth_rate=12, block_config=(6, 12, 24, 12),
                 num_init_feature=24, bn_size=4, drop_rate=0, num_classes=10):
        super(DenseNet_CIFAR10, self).__init__()

        # Firsrt convolution before dense block
        self.features = nn.Sequential(OrderedDict([
            ('conv0', nn.Conv2d(3, num_init_feature, kernel_size=3, stride=1, padding=1, bias=False)),
            ('norm0', nn.BatchNorm2d(num_init_feature)),
            ('relu0', nn.ReLU(inplace=True))])
        )

        num_features = num_init_feature
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
                                bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
            self.features.add_module('denseblock%d' % (i + 1), block)
            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2)
                self.features.add_module('transition%d' % (i + 1), trans)
                num_features = num_features // 2

        self.features.add_module('norm5', nn.BatchNorm2d(num_features))
        self.features.add_module('relu5', nn.ReLU(inplace=True))
        self.features.add_module('avgpool', nn.AdaptiveAvgPool2d((1, 1)))

        self.classifier = nn.Conv2d(num_features, num_classes,kernel_size=1,stride=1)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                ps = list(m.parameters())
                if len(ps) == 2:
                    nn.init.constant_(m.bias,0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                nn.init.constant_(m.bias, 0)



    def forward(self, input):
        features = self.features(input)
        out = self.classifier(features).view(input.size(0), -1)
        return out


def densenet29(pretrained=False, **kwargs):
    model = DenseNet_CIFAR10(num_init_feature=48, growth_rate=24, block_config=(6, 6, 6, 6), **kwargs)
    return model


def densenet45(pretrained=False, **kwargs):
    model = DenseNet_CIFAR10(num_init_feature=48, growth_rate=24, block_config=(10, 10, 10, 10), **kwargs)
    return model


def densenet85(pretrained=False, **kwargs):
    model = DenseNet_CIFAR10(num_init_feature=48, growth_rate=24, block_config=(20, 20, 20, 20), **kwargs)
    return model


if __name__ == '__main__':
    net = densenet29().to("cuda:0")
    import torchsummary

    torchsummary.summary(net, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 48, 32, 32]           1,296
       BatchNorm2d-2           [-1, 48, 32, 32]              96
              ReLU-3           [-1, 48, 32, 32]               0
       BatchNorm2d-4           [-1, 48, 32, 32]              96
              ReLU-5           [-1, 48, 32, 32]               0
            Conv2d-6           [-1, 96, 32, 32]           4,608
       BatchNorm2d-7           [-1, 96, 32, 32]             192
              ReLU-8           [-1, 96, 32, 32]               0
            Conv2d-9           [-1, 24, 32, 32]          20,736
      BatchNorm2d-10           [-1, 72, 32, 32]             144
             ReLU-11           [-1, 72, 32, 32]               0
           Conv2d-12           [-1, 96, 32, 32]           6,912
      BatchNorm2d-13           [-1, 96, 32, 32]             192
             ReLU-14           [-1, 96,

In [2]:
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch

class BasicConv2d(nn.Module):

    def __init__(self, in_channels, out_channels, **kwargs):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        return x

class Inception(nn.Module):
    def __init__(self,channel,batch_norm=False):
        super(Inception, self).__init__()
        if batch_norm==False:
            self.branch1x1=nn.Conv2d(channel[0],channel[1],kernel_size=(1,1),stride=1)

            self.branch3x3_1=nn.Conv2d(channel[0],channel[2],kernel_size=(1,1),stride=1)
            self.branch3x3_2=nn.Conv2d(channel[2],channel[3],kernel_size=(3,3),stride=1,padding=1)

            self.branch5x5_1=nn.Conv2d(channel[0],channel[4],kernel_size=(1,1),stride=1)
            self.branch5x5_2=nn.Conv2d(channel[4],channel[5],kernel_size=(5,5),stride=1,padding=2)

            self.branchM_1=nn.MaxPool2d(kernel_size=3,stride=1,padding=1)
            self.branchM_2=nn.Conv2d(channel[0],channel[6],kernel_size=(1,1),stride=1)
        else:
            self.branch1x1=BasicConv2d(channel[0],channel[1],kernel_size=(1,1),stride=1)

            self.branch3x3_1=BasicConv2d(channel[0],channel[2],kernel_size=(1,1),stride=1)
            self.branch3x3_2=BasicConv2d(channel[2],channel[3],kernel_size=(3,3),stride=1,padding=1)

            self.branch5x5_1=BasicConv2d(channel[0],channel[4],kernel_size=(1,1),stride=1)
            self.branch5x5_2=BasicConv2d(channel[4],channel[5],kernel_size=(5,5),stride=1,padding=2)

            self.branchM_1=nn.MaxPool2d(kernel_size=3,stride=1,padding=1)
            self.branchM_2=BasicConv2d(channel[0],channel[6],kernel_size=(1,1),stride=1)

        self.relu=nn.ReLU(True)

    def forward(self,x):
        branch1x1=self.relu(self.branch1x1(x))

        branch3x3_1=self.relu(self.branch3x3_1(x))
        branch3x3_2=self.relu(self.branch3x3_2(branch3x3_1))

        branch5x5_1=self.relu(self.branch5x5_1(x))
        branch5x5_2=self.relu(self.branch5x5_2(branch5x5_1))

        branchM_1=self.relu(self.branchM_1(x))
        branchM_2=self.relu(self.branchM_2(branchM_1))

        outputs = [branch1x1, branch3x3_2, branch5x5_2, branchM_2]

        return torch.cat(outputs,1)


channel=[
    [192, 64, 96,128, 16, 32, 32],#3a
    [256,128,128,192, 32, 96, 64],#3b
    [480,192, 96,208, 16, 48, 64],#4a
    [512,160,112,224, 24, 64, 64],#4b
    [512,128,128,256, 24, 64, 64],#4c
    [512,112,144,288, 32, 64, 64],#4d
    [528,256,160,320, 32,128,128],#4e
    [832,256,160,320, 32,128,128],#5a
    [832,384,192,384, 48,128,128] #5b
]
class InceptionNet(nn.Module):
    def __init__(self,num_classes=1000,batch_norm=False):
        super(InceptionNet, self).__init__()
        
        if num_classes==10:
            channel[0][0]=64
            self.begin=nn.Sequential(
                nn.Conv2d(3,64,kernel_size=3,stride=1),
                nn.ReLU(True),
                nn.Conv2d(64,64,kernel_size=3,stride=1),
                nn.ReLU(True)
            )

            self.auxout1=nn.Sequential(
                nn.Conv2d(512,512,kernel_size=5,stride=3), #4x4x512
                nn.ReLU(True),
                nn.Conv2d(512,128,kernel_size=1),          #4x4x128
                nn.ReLU(True),
                nn.Conv2d(128, 10,kernel_size=4)           #1x1x10
            )
            self.auxout2=nn.Sequential(
                nn.Conv2d(528,528,kernel_size=5,stride=3), #4x4x528,
                nn.ReLU(True),
                nn.Conv2d(528,128,kernel_size=1),          #4x4x128,
                nn.ReLU(True),
                nn.Conv2d(128, 10,kernel_size=4)           #1x1x10
            )
        else:
            self.begin=nn.Sequential(
                nn.Conv2d(3,64,kernel_size=7,stride=2,padding=3),
                nn.ReLU(True),
                nn.MaxPool2d(kernel_size=3,stride=2,padding=1),
                nn.Conv2d(64,192,kernel_size=3,stride=1,padding=1),
                nn.ReLU(True),
                nn.MaxPool2d(kernel_size=3,stride=2,padding=1),
            )
            self.auxout1=nn.Sequential(
                nn.Conv2d(512,512,kernel_size=5,stride=3),#4x4x512
                nn.ReLU(True),
                nn.Conv2d(512,128,kernel_size=1),        #4x4x128 
                nn.ReLU(True)  
            )
            self.auxout12=nn.Sequential(
                nn.Linear(2048,1024),           
                nn.Dropout(0.5),
                nn.linear(1024,num_classes)  
            )
                
            self.auxout2=nn.Sequential(
                nn.Conv2d(528,528,kernel_size=5,stride=3),#4x4x528
                nn.ReLU(True),
                nn.Conv2d(528,128,kernel_size=1),         #4x4x128   
                nn.ReLU(True)
            )
            self.auxout22=nn.Sequential(
                nn.Linear(2048,1024),           
                nn.Dropout(0.5),
                nn.linear(1024,num_classes)  
            )

        self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)

        self.inception3a=Inception(channel[0],batch_norm)
        self.inception3b=Inception(channel[1],batch_norm)

        self.inception4a=Inception(channel[2],batch_norm)
        self.inception4b=Inception(channel[3],batch_norm)
        self.inception4c=Inception(channel[4],batch_norm)
        self.inception4d=Inception(channel[5],batch_norm)
        self.inception4e=Inception(channel[6],batch_norm)
        
        self.inception5a=Inception(channel[7],batch_norm)
        self.inception5b=Inception(channel[8],batch_norm)

        self.avgpool=nn.AdaptiveAvgPool2d((1,1))
        
        self.conv1x1=nn.Conv2d(1024,num_classes,kernel_size=1)
        
        self._initialize_weights()

        '''
        #follow the original papar,but for the computation ,I do not use it
        self.drop=nn.Dropout()
        self.linear=nn.Linear(1024,1000)
        '''
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m,nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias,0)
            elif isinstance(m,nn.BatchNorm2d):
                nn.init.constant_(m.weight,1)
                nn.init.constant_(m.bias,0)
            elif isinstance(m,nn.Linear):
                nn.init.normal_(m.weight,0,0.01)
                nn.init.constant_(m.bias,0)

    def forward(self,x):
        x=self.begin(x)

        x=self.inception3a(x)
        x=self.inception3b(x)
        x=self.maxpool(x)

        x=self.inception4a(x)
        auxout1=self.auxout1(x)
        auxout1=auxout1.view(auxout1.size(0),-1)
        #if you use this network to train on ImageNet you should add this code
        #auxout1=self.auxout12(auxout1)
        x=self.inception4b(x)
        x=self.inception4c(x)
        x=self.inception4d(x)

        auxout2=self.auxout2(x)
        auxout2=auxout2.view(auxout2.size(0),-1)
        #if you use this network to train on ImageNet you should add this code
        #auxout2=self.auxout22(auxout2)
        x=self.inception4e(x)
        x=self.maxpool(x)

        x=self.inception5a(x)
        x=self.inception5b(x)
        x=self.avgpool(x)

        outputs=self.conv1x1(x)
        outputs=outputs.view(outputs.size(0),-1)

        return outputs,auxout1,auxout2

if __name__ == '__main__':
    net=InceptionNet(num_classes=10,batch_norm=True)
    print(net)

InceptionNet(
  (begin): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
    (3): ReLU(inplace=True)
  )
  (auxout1): Sequential(
    (0): Conv2d(512, 512, kernel_size=(5, 5), stride=(3, 3))
    (1): ReLU(inplace=True)
    (2): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(128, 10, kernel_size=(4, 4), stride=(1, 1))
  )
  (auxout2): Sequential(
    (0): Conv2d(528, 528, kernel_size=(5, 5), stride=(3, 3))
    (1): ReLU(inplace=True)
    (2): Conv2d(528, 128, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(128, 10, kernel_size=(4, 4), stride=(1, 1))
  )
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (inception3a): Inception(
    (branch1x1): BasicConv2d(
      (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, ep

In [8]:
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.utils.model_zoo as model_zoo


__all__ = ['SqueezeNet', 'squeezenet1_0', 'squeezenet1_1']


model_urls = {
    'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth',
    'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',
}


class Fire(nn.Module):

    def __init__(self, inplanes, squeeze_planes,
                 expand1x1_planes, expand3x3_planes):
        super(Fire, self).__init__()
        self.inplanes = inplanes
        self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
        self.squeeze_activation = nn.ReLU(inplace=True)
        self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
                                   kernel_size=1)
        self.expand1x1_activation = nn.ReLU(inplace=True)
        self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
                                   kernel_size=3, padding=1)
        self.expand3x3_activation = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.squeeze_activation(self.squeeze(x))
        return torch.cat([
            self.expand1x1_activation(self.expand1x1(x)),
            self.expand3x3_activation(self.expand3x3(x))
        ], 1)


class SqueezeNet(nn.Module):

    def __init__(self, version=1.0, num_classes=1000):
        super(SqueezeNet, self).__init__()
        if version not in [1.0, 1.1]:
            raise ValueError("Unsupported SqueezeNet version {version}:"
                             "1.0 or 1.1 expected".format(version=version))
        self.num_classes = num_classes
        if version == 1.0:
            self.features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(96, 16, 64, 64),
                Fire(128, 16, 64, 64),
                Fire(128, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 32, 128, 128),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(512, 64, 256, 256),
            )
        else:
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
            )
        # Final convolution is initialized differently form the rest
        final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            final_conv,
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1, 1))
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if m is final_conv:
                    init.normal_(m.weight, mean=0.0, std=0.01)
                else:
                    init.kaiming_uniform_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x.view(x.size(0), self.num_classes)


def squeezenet1_0(pretrained=False, **kwargs):
    r"""SqueezeNet model architecture from the `"SqueezeNet: AlexNet-level
    accuracy with 50x fewer parameters and <0.5MB model size"
    <https://arxiv.org/abs/1602.07360>`_ paper.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = SqueezeNet(version=1.0, **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['squeezenet1_0']))
    return model


def squeezenet1_1(pretrained=False, **kwargs):
    r"""SqueezeNet 1.1 model from the `official SqueezeNet repo
    <https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1>`_.
    SqueezeNet 1.1 has 2.4x less computation and slightly fewer parameters
    than SqueezeNet 1.0, without sacrificing accuracy.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = SqueezeNet(version=1.1, **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['squeezenet1_1']))
    return model

    if __name__ == '__main__':
        net=SqueezeNet(version=1.1, num_classes=100)
    print(net)