In [4]:
import tensorflow as tf
import numpy as np
import os
print(tf.__version__)

2.0.0


In [7]:
class BasicBlock(tf.keras.layers.Layer):
# 定义BasicBlock模块，由两个3*3卷积层组成的基本模块
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, use_bias=False):
        super(BasicBlock, self).__init__()

        self.conv1 = tf.keras.layers.Conv2D(out_channels, kernel_size=3, strides=stride, padding="same", use_bias=use_bias)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(out_channels, kernel_size=3, strides=1, padding="same", use_bias=use_bias)
        self.bn2 = tf.keras.layers.BatchNormalization()

        # 判断stride是否等于1,如果为1就是没有降采样。
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = tf.keras.Sequential([tf.keras.layers.Conv2D(self.expansion * out_channels, kernel_size=1, strides=stride, use_bias=use_bias),
                                        tf.keras.layers.BatchNormalization()])
        else:
            self.shortcut = lambda x, _: x


    def call(self, inputs, training=False):
        out = self.conv1(inputs)
        out = self.bn1(out, training=training)
        out = tf.nn.relu(out)
        out = self.conv2(out)
        out = self.bn2(out, training=training)
        out += self.shortcut(inputs, training)
        out = tf.nn.relu(out)

        return out


class Bottleneck(tf.keras.layers.Layer):
# 定义BasicBlock模块，由一个1*1卷积层，一个3*3卷积层，一个1*1卷积层组成的基本模块
    expansion = 4

    def __init__(self, in_channels, out_channels, strides=1, use_bias=False):
        super(Bottleneck, self).__init__()

        self.conv1 = tf.keras.layers.Conv2D(out_channels, 1, 1, use_bias=use_bias)
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(out_channels, 3, strides, padding="same", use_bias=use_bias)
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.conv3 = tf.keras.layers.Conv2D(out_channels*self.expansion, 1, 1, use_bias=use_bias)
        self.bn3 = tf.keras.layers.BatchNormalization()

        if strides != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = tf.keras.Sequential([tf.keras.layers.Conv2D(self.expansion * out_channels, kernel_size=1, strides=strides, use_bias=use_bias),
                                        tf.keras.layers.BatchNormalization()])
        else:
            self.shortcut = lambda x,_: x


    def call(self, inputs, training=False):
        out = self.conv1(inputs)
        out = self.bn1(out, training=training)
        out = tf.nn.relu(out)
        out = self.conv2(out)
        out = self.bn2(out, training=training)
        out = tf.nn.relu(out)
        out = self.conv3(out)
        out = self.bn3(out, training=training)
        out = tf.nn.relu(out)
        out += self.shortcut(inputs, training)
        out = tf.nn.relu(out)

        return out


class ResNet(tf.keras.Model):
# ResBlock 模块。继承keras.Model或者keras.Layer都可以

    # 第一个参数layer_dims：[[32,2,1], [64,2,2], [128,2,2], [256,2,2]] 4个Res Block，layer_dims[:][0]表示卷积核个数，layer_dims[:][1]表示blocks数目，layer_dims[:][2]表示stride步数
    # 第二个参数num_classes：我们的全连接输出，取决于输出有多少类。
    def __init__(self, blocks, layer_dims, num_classes=10, use_bias=False):
        super(ResNet, self).__init__()
        
        #检查参数
        if(self.check_param(blocks, layer_dims, num_classes, use_bias) == False):
            return None
        
        self.in_channels = layer_dims[0][0]
        self.use_bias = use_bias

        # 0. 预处理卷积层；实现比较灵活可以加MAXPool2D，或者不加，这里没加。注意这里的channels需要和layer1的channels是一样的，不然能add。
        self.stem = tf.keras.Sequential([tf.keras.layers.Conv2D(self.in_channels, 3, 1, padding="same", use_bias=use_bias),
                                tf.keras.layers.BatchNormalization()])
        self.pool = tf.keras.layers.MaxPool2D(pool_size=[3, 3], strides=2, padding='same')

        # 1. 创建4个ResBlock
        self.layer1 = self.build_resblock(blocks, out_channels = layer_dims[0][0], num_blocks = layer_dims[0][1], stride = layer_dims[0][2])
        self.layer2 = self.build_resblock(blocks, out_channels = layer_dims[1][0], num_blocks = layer_dims[1][1], stride = layer_dims[1][2])
        self.layer3 = self.build_resblock(blocks, out_channels = layer_dims[2][0], num_blocks = layer_dims[2][1], stride = layer_dims[2][2])
        self.layer4 = self.build_resblock(blocks, out_channels = layer_dims[3][0], num_blocks = layer_dims[3][1], stride = layer_dims[3][2])
        
        self.final_bn  = tf.keras.layers.BatchNormalization()

        self.avgpool = tf.keras.layers.GlobalAveragePooling2D()
        self.dense = tf.keras.layers.Dense(num_classes, activation=tf.nn.softmax)


    # 2. 创建ResBlock
    def build_resblock(self, blocks, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        res_blocks = tf.keras.Sequential()
        for stride in strides:
            res_blocks.add(blocks(self.in_channels, out_channels, stride, self.use_bias))
            self.in_channels = out_channels

        return res_blocks


    def call(self, inputs, training=False):
        # __init__中准备工作完毕；下面完成前向运算过程。
        out = self.stem(inputs, training)
        out = tf.nn.relu(out)
        out = self.pool(out)

        out = self.layer1(out, training=training)
        out = self.layer2(out, training=training)
        out = self.layer3(out, training=training)
        out = self.layer4(out, training=training)

        out = self.final_bn(out, training=training)

        out = tf.nn.relu(out)
        out = self.avgpool(out)
        out = self.dense(out)

        return out

    def check_param(self, blocks, layer_dims, num_classes, use_bias):
        returntype = False
        if(blocks != BasicBlock and blocks != Bottleneck):
            print('Parameter Error: blocks must be BasicBlock or Bottleneck.')
        elif(np.shape(layer_dims) != (4, 3)):
            print('Parameter Error: layer_dims shape must be (4, 3).')
        elif(type(num_classes) != int or num_classes <= 0):
             print('Parameter Error: num_classes must be greater than  0.')
        elif(type(use_bias) != bool):
             print('Parameter Error: use_bias must be True or False.')
        else:
             returntype = True

        return returntype
        

########################################################################################################################
""" Resnet18 """
def ResNet18():
    return ResNet(BasicBlock, [[64,2,1], [128,2,2], [256,2,2], [512,2,2]])

""" ResNet34 """
def ResNet34():
    return ResNet(BasicBlock, [[64,3,1], [128,4,2], [256,6,2], [512,3,2]])

""" Resnet50 """
def ResNet50():
    return ResNet(Bottleneck, [[64,3,1], [128,4,2], [256,6,2], [512,3,2]])

""" Resnet101 """
def ResNet101():
    return ResNet(Bottleneck, [[64,3,1], [128,4,2], [256,23,2], [512,3,2]])

""" Resnet152 """
def ResNet152():
    return ResNet(Bottleneck, [[64,3,1], [128,8,2], [256,36,2], [512,3,2]])

def ResNet24_bb(): 
    return ResNet(BasicBlock, [[8,2,1], [16,2,2], [32,4,2], [64,3,2]])

def ResNet24_bn(): 
    return ResNet(Bottleneck, [[8,2,1], [16,2,2], [32,4,2], [64,3,2]])
########################################################################################################################

class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()
        # MNIST中的图像默认为uint8（0-255的数字）。以下代码将其归一化到0-1之间的浮点数，并在最后增加一维作为颜色通道
        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)   #[60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)   #[10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)  #[60000]
        self.test_label = self.test_label.astype(np.int32)   #[10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):
        # 从数据集中随机取出batch_size个元素并返回
        index = np.random.randint(0, np.shape(self.train_data)[0], size=batch_size)
        return self.train_data[index, :], self.train_label[index]

num_epochs = 5
batch_size = 128
learning_rate = 0.001

def show_sumary():
    model = ResNet34()
    model.build(input_shape=(None, 256, 256, 3))
    model.summary()

def train_ResNet24_bb():
    checkpoint_path = "training_ResNet_sample_ResNet24_bb_1/cp-{epoch:04d}.ckpt"
    checkpoint_dir = os.path.dirname(checkpoint_path)
    model = ResNet24_bb()
    model.build(input_shape=(None, 28, 28, 1))
    model.summary()
    
    cp_callback_mc = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                        save_weights_only=True,
                                                        verbose=0)
    latest = tf.train.latest_checkpoint(checkpoint_dir)
    if(latest != None):
        model.load_weights(latest) 
        
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                    loss=tf.keras.losses.sparse_categorical_crossentropy,
                    metrics=[tf.keras.metrics.sparse_categorical_accuracy])
    
    model.fit(
         train_dataset,
         epochs=num_epochs,
         verbose=1,
         callbacks=[cp_callback_mc]
         #callbacks=[cp_callback_mc, tensorboard_callback]
         )

def train_ResNet50():
    checkpoint_path = "training_ResNet_sample_ResNet50_1/cp-{epoch:04d}.ckpt"
    checkpoint_dir = os.path.dirname(checkpoint_path)
    model = ResNet50()
    model.build(input_shape=(None, 28, 28, 1))
    model.summary()
    
    cp_callback_mc = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                        save_weights_only=True,
                                                        verbose=0)
    latest = tf.train.latest_checkpoint(checkpoint_dir)
    if(latest != None):
        model.load_weights(latest) 
        
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                    loss=tf.keras.losses.sparse_categorical_crossentropy,
                    metrics=[tf.keras.metrics.sparse_categorical_accuracy])
    
    model.fit(
         train_dataset,
         epochs=num_epochs,
         verbose=1,
         callbacks=[cp_callback_mc]
         #callbacks=[cp_callback_mc, tensorboard_callback]
         )

def test():
    model = ResNet34()
    checkpoint_path = "training_ResNet_sample_ResNet34_1/cp-{epoch:04d}.ckpt"
    #checkpoint_path = "training_ResNet_sample_ResNet50_1/cp-{epoch:04d}.ckpt"
    checkpoint_dir = os.path.dirname(checkpoint_path)
    latest = tf.train.latest_checkpoint(checkpoint_dir)
    if(latest != None):
        model.load_weights(latest) 
    loss_and_metrics = model.evaluate(test_dataset)
    print(loss_and_metrics)

In [2]:
if __name__ == '__main__':
    #checkpoint_path = "training_bn_sample_1/cp-{epoch:04d}.ckpt"
    #checkpoint_dir = os.path.dirname(checkpoint_path)
    
    data_loader = MNISTLoader()
    train_dataset = tf.data.Dataset.from_tensor_slices((data_loader.train_data, data_loader.train_label))
    train_dataset = train_dataset.shuffle(buffer_size=23000)
    train_dataset = train_dataset.batch(batch_size)
    train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)
    test_dataset = tf.data.Dataset.from_tensor_slices((data_loader.test_data, data_loader.test_label))
    test_dataset = test_dataset.shuffle(buffer_size=23000)
    test_dataset = test_dataset.batch(batch_size)
    test_dataset = test_dataset.prefetch(tf.data.experimental.AUTOTUNE)
"""  
    cp_callback_mc = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                        save_weights_only=True,
                                                        verbose=0)
    latest = tf.train.latest_checkpoint(checkpoint_dir)
    if(latest != None):
        model.load_weights(latest) 
"""

NameError: name 'MNISTLoader' is not defined

显存占用计算方法：
训练参数：Trainable params * 4 * 4字节
   每个参数占用4字节，再加上adam训练所需
每一层输出用到的参数：Non-trainable params * 4 * batch_size * 2
   训练时需要正传播和反向传播所以乘以2
两者相加为所需要的显存数量

例：ResNet152()
Total params: 105,526,720
Trainable params: 105,283,136
Non-trainable params: 243,584
105,283,136 * 4 * 4 + 243,584 * 4 * 2 * 128 = 1933960192 (1.8GB)

In [3]:
    train_ResNet24_bb()

NameError: name 'train_ResNet24_bb' is not defined

In [31]:
    train_ResNet24_bn()

Model: "res_net_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_237 (Sequential)  multiple                  832       
_________________________________________________________________
sequential_238 (Sequential)  multiple                  351744    
_________________________________________________________________
sequential_242 (Sequential)  multiple                  2019328   
_________________________________________________________________
sequential_247 (Sequential)  multiple                  12382208  
_________________________________________________________________
sequential_254 (Sequential)  multiple                  23392256  
_________________________________________________________________
batch_normalization_972 (Bat multiple                  8192      
Total params: 38,154,560
Trainable params: 38,074,816
Non-trainable params: 79,744
_______________________________________

InvalidArgumentError:  Incompatible shapes: [128,1] vs. [128,4,4]
	 [[node metrics/sparse_categorical_accuracy/Equal (defined at e:\program files\python\python37-64\lib\site-packages\tensorflow_core\python\framework\ops.py:1751) ]] [Op:__inference_distributed_function_167795]

Function call stack:
distributed_function


In [11]:
show_sumary()

Model: "res_net_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_32 (Sequential)   multiple                  1984      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 multiple                  0         
_________________________________________________________________
sequential_33 (Sequential)   multiple                  222720    
_________________________________________________________________
sequential_34 (Sequential)   multiple                  1118720   
_________________________________________________________________
sequential_36 (Sequential)   multiple                  6829056   
_________________________________________________________________
sequential_38 (Sequential)   multiple                  13121536  
_________________________________________________________________
batch_normalization_184 (Bat multiple                  20