In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

https://www.bigdata-navi.com/aidrops/2611/
を実装する

![](https://www.bigdata-navi.com/aidrops/wp-content/uploads/2020/03/2-1.png)

# 設計方針
* blockを 3x, 4x, 6x, 3x回繰り返す。

* 最後にaverage poolする。

* res34はvgg34でres50は1x1で小さくしてからconvしてFLOP増加を抑えている。

# basic blockを作る

![](https://www.bigdata-navi.com/aidrops/wp-content/uploads/2020/03/3-300x266.png)

In [25]:
class block(nn.Module):
    def __init__(self, inchannel, outchannel):
        super().__init__()
        channel = outchannel//4
        
        # 1x1 conv
        self.conv1 = nn.Conv2d(inchannel, channel, 1)
        self.bn1 = nn.BatchNorm2d(channel)
        self.relu1 = nn.ReLU(True)
        
        # 3x3 conv
        self.conv2 = nn.Conv2d(channel, channel, 3, padding=1)
        self.bn2 = nn.BatchNorm2d(channel)
        self.relu2 = nn.ReLU(True)
        
        # 1x1 conv
        self.conv3 = nn.Conv2d(channel, outchannel, 1)
        self.bn3 = nn.BatchNorm2d(outchannel)
        self.relu3 = nn.ReLU(True)
        
        # residual
        if inchannel == outchannel:
            self.shortcut = lambda x: x # just copy
        else:
            self.shortcut = nn.Conv2d(inchannel, outchannel, 1, padding=0) # 1x1 to adjust channels
        
    def forward(self, x):
        x_cpy = x
        # 1x1
        x = self.relu1(self.bn1(self.conv1(x)))
        # 3x3
        x = self.relu2(self.bn2(self.conv2(x)))
        # 1x1
        x = self.bn3(self.conv3(x))
        # residual
        x += self.shortcut(x_cpy)
        x = self.relu3(x)
        return x

In [27]:
# same channels
model = block(128, 128)
model(torch.rand(1,128,100,100)).size()

torch.Size([1, 128, 100, 100])

In [29]:
# dif channels
model = block(64, 128)
model(torch.rand(1,64,100,100)).size()

torch.Size([1, 128, 100, 100])

# Make basic block

In [84]:
class ResNet50(nn.Module):
    def __init__(self, ch=1):
        super().__init__()
        
        # 1st conv
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1, stride=2)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU()
        
        # 2nd block 3 times
        layers = []
        layers += [self._building_block(256, 64)]
        layers += [self._building_block(256) for _ in range(2)]
        self.block2 = nn.ModuleList(layers)
        
        # 3rd block 4 times
        layers = []
        layers += [self._building_block(512, 256)]
        layers += [self._building_block(512) for _ in range(3)]
        self.block3 = nn.ModuleList(layers)
        
        # 4rd block 6 times
        layers = []
        layers += [self._building_block(1024, 512)]
        layers += [self._building_block(1024) for _ in range(5)]
        self.block4 = nn.ModuleList(layers)
        
        # 5th block 3 times
        layers = []
        layers += [self._building_block(2048, 1024)]
        layers += [self._building_block(2048) for _ in range(2)]
        self.block5 = nn.ModuleList(layers)
        
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(2048, ch)
        
    def forward(self, x):
        # 1st
        x = self.relu1(self.bn1(self.conv1(x)))
        
        # 2nd
        for block in self.block2:
            x = block(x)
        x = x[:,:,::2,::2]
        
        # 3rd
        for block in self.block3:
            x = block(x)
        x = x[:,:,::2,::2]
        print(x.size())
        
        # 4th
        for block in self.block4:
            x = block(x)
        x = x[:,:,::2,::2]
        
        # 5th
        for block in self.block5:
            x = block(x)
        
        # fc
        x = self.gap(x).squeeze(2).squeeze(2)
        x = self.fc(x)        
        return x
    
    def _make_layer(self):
        layers = []
    
    def _building_block(self,
                        channel_out,
                        channel_in=None):
        if channel_in is None:
            channel_in = channel_out
        return block(channel_in, channel_out)

In [85]:
model = ResNet50()
model

ResNet50(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (block2): ModuleList(
    (0): block(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu2): ReLU(inplace=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu3): ReLU(inplace=True)
      (shortcut): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
    )
    (1): block(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
      (bn1): Bat

In [86]:
model(torch.rand(1,3,128,128)).size()

torch.Size([1, 512, 16, 16])


torch.Size([1, 1])

# PACT resnet

In [93]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from module import ActFn, Conv2d, Linear
from torch.autograd import Variable

__all__ = ['ResNet', 'resnet20', 'resnet34', "resnet50"]

def _weights_init(m):
    classname = m.__class__.__name__
    #print(classname)
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight)

class LambdaLayer(nn.Module):
    def __init__(self, lambd):
        super(LambdaLayer, self).__init__()
        self.lambd = lambd

    def forward(self, x):
        return self.lambd(x)



class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, k=8, expansion=1, snr=0.1, inference=False, res50=False):
        super(BasicBlock, self).__init__()
        self.k = k
        self.expansion = expansion
        self.res50 = res50
        
        if not res50:
            self.conv1 = Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False, bitwidth = k, noise=snr, inference=inference)
            self.bn1 = nn.BatchNorm2d(planes)           
            self.conv2 = Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False, bitwidth = k, noise=snr, inference=inference)
            self.bn2 = nn.BatchNorm2d(planes)
        else:
            # 1x1, 3x3, 1x1 block
            outchannel = planes
            inchannel = in_planes
            channel = outchannel//4            
            # 1x1 conv
            self.conv1 = nn.Conv2d(inchannel, channel, 1)
            self.bn1 = nn.BatchNorm2d(channel)
            self.relu1 = nn.ReLU(True)
            # 3x3 conv
            self.conv2 = nn.Conv2d(channel, channel, 3, padding=1, stride=stride,)
            self.bn2 = nn.BatchNorm2d(channel)
            self.relu2 = nn.ReLU(True)
            # 1x1 conv
            self.conv3 = nn.Conv2d(channel, outchannel, 1)
            self.bn3 = nn.BatchNorm2d(outchannel)
            self.relu3 = nn.ReLU(True)
        
        # PACT
        self.alpha1 = nn.Parameter(torch.tensor(10.))
        self.alpha2 = nn.Parameter(torch.tensor(10.))
        self.ActFn = ActFn.apply
        self.snr = snr

        if stride != 1 or in_planes != planes:
              # original resnet shortcut
              self.shortcut = nn.Sequential(
                    # nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                    Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                    nn.BatchNorm2d(self.expansion * planes)
              )
        else: # nothing done if stride or inplanes do not differ
          self.shortcut = nn.Sequential()

    def forward(self, x):
        if not self.res50:
            out = self.ActFn(self.bn1(self.conv1(x)), self.alpha1, self.k)
            out = self.bn2(self.conv2(out))
        else:
            # 1x1
            out = self.relu1(self.bn1(self.conv1(x)))
            # 3x3
            out = self.relu2(self.bn2(self.conv2(out)))
            # 1x1
            out = self.bn3(self.conv3(out))
        # residue
        out += self.shortcut(x)
        out = self.ActFn(out, self.alpha2, self.k)

        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10, K=8, snr=0, inference=False, conv1_noise=True, linear_noise=True, res50=False):
        super(ResNet, self).__init__()
        self.in_planes = 64
        self.k = K
        self.snr = snr
        self.inference = inference
        self.res50 = res50

        # 1st layers
        self.conv1 = Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False, bitwidth = 8, noise=snr*float(conv1_noise), inference=inference)
        self.bn1 = nn.BatchNorm2d(64)
        self.alpha1 = nn.Parameter(torch.tensor(10.))
        self.ActFn = ActFn.apply
        
        # Blocks
        if not res50:
            self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1, expansion=1)
            self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2, expansion=1)
            self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2, expansion=1)
            self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2, expansion=1)
        else:
            self.layer1 = self._make_layer(block, 256, num_blocks[0], stride=1, expansion=1)
            self.layer2 = self._make_layer(block, 512, num_blocks[1], stride=2, expansion=1)
            self.layer3 = self._make_layer(block, 1024, num_blocks[2], stride=2, expansion=1)
            self.layer4 = self._make_layer(block, 2048, num_blocks[3], stride=2, expansion=1)
        
        # FCs
        if not res50:
            self.linear = Linear(512, num_classes, bitwidth = 8, noise=snr*float(linear_noise), inference=inference)
        else:
            self.linear = Linear(2048, num_classes, bitwidth = 8, noise=snr*float(linear_noise), inference=inference)
        self.apply(_weights_init)       

    def _make_layer(self, block, planes, num_blocks, stride, expansion):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, self.k, expansion, self.snr, inference=self.inference, res50=self.res50))
            self.in_planes = planes * block.expansion

        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.ActFn(self.bn1(self.conv1(x)), self.alpha1, self.k)
        # layers
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, out.size()[3])
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [94]:
model = ResNet(BasicBlock, [3, 4, 6, 3], res50=True)

In [95]:
model(torch.rand(1,3,28,28))

tensor([[-6.1927,  7.8933, -1.7759,  5.1656, -1.0439, -3.0961, -5.2665,  5.1290,
         18.7735,  5.0228]], grad_fn=<AddmmBackward>)