# ResNet(Deep Residulal Learning for Image Recognition)  
paper URL: https://arxiv.org/abs/1512.03385  

ショートカットにはprojectionを用いている. 

$$projection:\quad y = F(x, {W_i}) + W_sx$$  

$$identity:\quad y = F(x, {W_i}) + x.$$  

identityでは入力サイズと出力サイズが異なっていると計算ができないため, その際にはzero paddingを用いてサイズをそろえる.  

In [1]:
import typing 

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, BatchNormalization,\
    Activation, Conv2D, MaxPooling2D, Add, GlobalAveragePooling2D
from tensorflow.keras.models import Model

In [2]:
class Redisual_block(Model):
    def __init__(self, input_channel, output_channel):
        super().__init__()
        self.input_channel = input_channel
        self.output_channel = output_channel
        self.conv1 = Conv2D(filters=self.input_channel, kernel_size=(3, 3),
                            strides=1, padding='same', use_bias=False)
        self.bn1 = BatchNormalization()
        self.av1 = Activation('relu')
        self.conv2 = Conv2D(filters=self.output_channel, kernel_size=(3, 3),
                            strides=1, padding='same', use_bias=False)
        self.bn2 = BatchNormalization()
        self.add = Add()
        self.out_av = Activation('relu')

    def _shortcut(self, x):
        if self.input_channel != self.output_channel:
            x = Conv2D(filters=self.output_channel, kernel_size=(1, 1),
                       strides=1, padding='same', use_bias=False)(x)
            x = BatchNormalization()(x)
            x = Activation('relu')(x)
            return x
        else:
            return x

    def call(self, x):
        h1 = self.conv1(x)
        h1 = self.bn1(h1)
        h1 = self.av1(h1)
        h1 = self.conv2(h1)
        h1 = self.bn2(h1)
        x = self._shortcut(x)
        y = self.add([x, h1])
        y = self.out_av(y)
        return y

In [3]:
class Bottleneck_block(Model):
    def __init__(self, input_channel, output_channel):
        super().__init__()
        self.input_channel = input_channel
        self.output_channel = output_channel
        self.conv1 = Conv2D(filters=self.input_channel/4, kernel_size=(1, 1),
                            strides=1, padding='valid', use_bias=False)
        self.bn1 = BatchNormalization()
        self.av1 = Activation('relu')
        self.conv2 = Conv2D(filters=self.input_channel/4, kernel_size=(3, 3),
                            strides=1, padding='same', use_bias=False)
        self.bn2 = BatchNormalization()
        self.av2 = Activation('relu')
        self.conv3 = Conv2D(filters=self.output_channel, kernel_size=(1, 1),
                            strides=1, padding='valid', use_bias=False)
        self.bn3 = BatchNormalization()
        self.add = Add()
        self.out_av = Activation('relu')

    def _shortcut(self, x):
        if self.input_channel != self.output_channel:
            x = Conv2D(filters=self.output_channel, kernel_size=(1, 1),
                       strides=1, padding='same', use_bias=False)(x)
            x = BatchNormalization()(x)
            x = Activation('relu')(x)
            return x
        else:
            return x

    def call(self, x):
        h1 = self.conv1(x)
        h1 = self.bn1(h1)
        h1 = self.av1(h1)
        h2 = self.conv2(h1)
        h2 = self.bn2(h2)
        h2 = self.av2(h2)
        h3 = self.conv3(h2)
        h3 = self.bn3(h3)
        x = self._shortcut(x)
        y = self.add([x, h3])
        y = self.out_av(y)
        return y

とりあえず愚直に層を重ねて, モデルを作る. 繰り返しなどでコードを見やすくできるところがあれば随時修正する.  

論文を読み込んで, 画像サイズの変更をどうするのかを考える.  