In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,Sequential,losses,optimizers,datasets

In [2]:
# 获取所有GPU 设备列表
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # 设置GPU 显存占用为按需分配
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # 异常处理
        print(e)

1 Physical GPUs, 1 Logical GPUs


## 全连接网络的问题

In [3]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [4]:
model=keras.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(256,activation='relu'),
    layers.Dense(256,activation='relu'),
    layers.Dense(256,activation='relu'),
    layers.Dense(10),
]
)

In [5]:
model.build(input_shape=(4,784))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 256)               200960    
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                2570      
Total params: 335,114
Trainable params: 335,114
Non-trainable params: 0
_________________________________________________________________


In [13]:
model.compile(optimizer='adam',
              loss=losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'],)

In [14]:
model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test,  y_test, verbose=2)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
10000/1 - 1s - loss: 0.0489 - accuracy: 0.9741


[0.09734694455809659, 0.9741]

## 卷积层实现

### 自定义 张量形式实现

In [7]:
x = tf.random.normal([2,5,5,4]) # 模拟输入，3通道，高宽为5
# 需要根据[k,k,cin,cout]格式创建，4个卷积核
w = tf.random.normal([3,3,4,4]) 
# 步长为1, padding为0,
out = tf.nn.conv2d(x,w,strides=1,padding=[[0,0],[0,0],[0,0],[0,0]])
print(out.shape)

(2, 3, 3, 4)


In [16]:
x = tf.random.normal([2,5,5,3]) # 模拟输入，3通道，高宽为5
# 需要根据[k,k,cin,cout]格式创建，4个卷积核
w = tf.random.normal([3,3,3,4])
# 步长为1, padding为1,
out = tf.nn.conv2d(x,w,strides=1,padding=[[0,0],[1,1],[1,1],[0,0]])
print(out.shape)

(2, 5, 5, 4)


In [17]:
x = tf.random.normal([2,5,5,3]) # 模拟输入，3通道，高宽为5
w = tf.random.normal([3,3,3,4]) # 4个3x3大小的卷积核
# 步长为,padding设置为输出、输入同大小
# 需要注意的是, padding=same只有在strides=1时才是同大小
out = tf.nn.conv2d(x,w,strides=1,padding='SAME')
print(out.shape)

(2, 5, 5, 4)


In [18]:
x = tf.random.normal([2,5,5,3])
w = tf.random.normal([3,3,3,4])
# 高宽按3倍减少
out = tf.nn.conv2d(x,w,strides=3,padding='SAME')
print(out.shape)

(2, 2, 2, 4)


In [19]:
# 根据[cout]格式创建偏置向量
b = tf.zeros([4])
# 在卷积输出上叠加偏置向量，它会自动broadcasting为[b,h',w',cout]
out = out + b
print(out.shape)

(2, 2, 2, 4)


### 卷积层类实现

In [13]:
# 创建卷积层类
layer = layers.Conv2D(4,kernel_size=(3,4),strides=(2,1),padding='SAME')
out = layer(x) # 前向计算
print(out.shape)

(2, 3, 5, 4)


In [25]:
layer.kernel,layer.bias
# 返回所有待优化张量列表
layer.trainable_variables

[<tf.Variable 'conv2d_1/kernel:0' shape=(3, 4, 3, 4) dtype=float32, numpy=
 array([[[[ 0.02870092,  0.05202577, -0.23144433, -0.11545415],
          [-0.05260587, -0.25014585,  0.00097996,  0.20519614],
          [-0.08497144,  0.09365889, -0.14309603, -0.25663304]],
 
         [[ 0.23720178,  0.04914492,  0.2582697 , -0.11836813],
          [ 0.16496718,  0.11108315,  0.10814771, -0.08780807],
          [ 0.21826416, -0.05005611,  0.04483968,  0.11844498]],
 
         [[-0.10930988, -0.11488794, -0.2559034 , -0.23735589],
          [ 0.11511153,  0.11254972, -0.24527349,  0.06609675],
          [ 0.02759695,  0.16435611, -0.20593114, -0.06365964]],
 
         [[ 0.15198109, -0.02942209, -0.15500818, -0.09969759],
          [-0.0033367 , -0.03676398,  0.21261054, -0.10208614],
          [ 0.10273162,  0.256018  , -0.06240423,  0.08979797]]],
 
 
        [[[-0.16935061, -0.1285987 ,  0.01809508,  0.12173331],
          [ 0.20492691, -0.14370725,  0.2656764 , -0.09339543],
          [-0.

## LeNet-5 实战

### keras高级API实现

In [29]:
network = Sequential([ # 网络容器
    layers.Conv2D(6,kernel_size=3,strides=1), # 第一个卷积层, 6个3x3卷积核
    layers.MaxPooling2D(pool_size=2,strides=2), # 高宽各减半的池化层
    layers.ReLU(), # 激活函数
    layers.Conv2D(16,kernel_size=3,strides=1), # 第二个卷积层, 16个3x3卷积核
    layers.MaxPooling2D(pool_size=2,strides=2), # 高宽各减半的池化层
    layers.ReLU(), # 激活函数
    layers.Flatten(), # 打平层，方便全连接层处理

    layers.Dense(120, activation='relu'), # 全连接层，120个节点
    layers.Dense(84, activation='relu'), # 全连接层，84节点
    layers.Dense(10) # 全连接层，10个节点
                    ])
# build一次网络模型，给输入X的形状，其中4为随意给的batchsz
network.build(input_shape=(4, 28, 28, 1))
# 统计网络信息
network.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            multiple                  60        
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 multiple                  0         
_________________________________________________________________
re_lu_2 (ReLU)               multiple                  0         
_________________________________________________________________
conv2d_5 (Conv2D)            multiple                  880       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 multiple                  0         
_________________________________________________________________
re_lu_3 (ReLU)               multiple                  0         
_________________________________________________________________
flatten_3 (Flatten)          multiple                 

In [27]:
network.compile(optimizer='adam',
              loss=losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'],)

In [28]:
network.fit(x_train, y_train, epochs=5)

network.evaluate(x_test,  y_test, verbose=2)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
10000/1 - 1s - loss: 0.0465 - accuracy: 0.9783


[0.09296019066644755, 0.9783]

### 手动实现

In [14]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [15]:
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

In [16]:
network = Sequential([ # 网络容器
    layers.Conv2D(6,kernel_size=3,strides=1), # 第一个卷积层, 6个3x3卷积核
    layers.MaxPooling2D(pool_size=2,strides=2), # 高宽各减半的池化层
    layers.ReLU(), # 激活函数
    layers.Conv2D(16,kernel_size=3,strides=1), # 第二个卷积层, 16个3x3卷积核
    layers.MaxPooling2D(pool_size=2,strides=2), # 高宽各减半的池化层
    layers.ReLU(), # 激活函数
    layers.Flatten(), # 打平层，方便全连接层处理

    layers.Dense(120, activation='relu'), # 全连接层，120个节点
    layers.Dense(84, activation='relu'), # 全连接层，84节点
    layers.Dense(10) # 全连接层，10个节点
                    ])
# build一次网络模型，给输入X的形状，其中4为随意给的batchsz
network.build(input_shape=(4, 28, 28, 1))
# 统计网络信息
network.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            multiple                  60        
_________________________________________________________________
max_pooling2d (MaxPooling2D) multiple                  0         
_________________________________________________________________
re_lu (ReLU)                 multiple                  0         
_________________________________________________________________
conv2d_7 (Conv2D)            multiple                  880       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 multiple                  0         
_________________________________________________________________
re_lu_1 (ReLU)               multiple                  0         
_________________________________________________________________
flatten_1 (Flatten)          multiple                 

In [17]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optimizer = tf.keras.optimizers.Adam()

In [18]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [19]:
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = network(images)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, network.trainable_variables)
    optimizer.apply_gradients(zip(gradients, network.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)

In [20]:
@tf.function
def test_step(images, labels):
    predictions = network(images)
    t_loss = loss_object(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [21]:
EPOCHS = 5

for epoch in range(EPOCHS):
    for images, labels in train_ds:
        images=tf.expand_dims(images,axis=3)
        train_step(images, labels)

    for test_images, test_labels in test_ds:
        test_images=tf.expand_dims(test_images,axis=3)
        test_step(test_images, test_labels)

    template = 'Epoch {}, Loss: {:.4f}, Accuracy: {:.4f}, Test Loss: {:.4f}, Test Accuracy: {:.4f}'
    print (template.format(epoch+1,
                         train_loss.result(),
                         train_accuracy.result()*100,
                         test_loss.result(),
                         test_accuracy.result()*100))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 1, Loss: 0.1978, Accuracy: 93.9900, Test Loss: 0.0790, Test Accuracy: 97.4700
Epoch 2, Loss: 0.1331, Accuracy: 95.9350, Test Loss: 0.0686, Test Accuracy: 97.7400
Epoch 3, Loss: 0.1056, Accuracy: 96.7700, Test Loss: 0.0581, Test Accuracy: 98.0833
Epoch 4, Loss: 0.0887, Accuracy: 97.2950, Test Loss: 0.0543, Test Accuracy: 98.2025
Epoch 5, Loss: 0.0771, Accuracy: 97.6383, Test Loss: 0.0516, Test Accuracy: 98.2900


### 加入BN层

In [60]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [61]:
network = Sequential([ # 网络容器
    layers.Conv2D(6,kernel_size=3,strides=1),
    # 插入BN层
    layers.BatchNormalization(),
    layers.MaxPooling2D(pool_size=2,strides=2),
    layers.ReLU(),
    layers.Conv2D(16,kernel_size=3,strides=1),
    # 插入BN层
    layers.BatchNormalization(),
    layers.MaxPooling2D(pool_size=2,strides=2),
    layers.ReLU(),
    layers.Flatten(),
    layers.Dense(120, activation='relu'),
    # 此处也可以插入BN层
    layers.Dense(84, activation='relu'), 
    # 此处也可以插入BN层
    layers.Dense(10)
])
# build一次网络模型，给输入X的形状，其中4为随意给的batchsz
network.build(input_shape=(4, 28, 28, 1))
# 统计网络信息
network.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           multiple                  60        
_________________________________________________________________
batch_normalization (BatchNo multiple                  24        
_________________________________________________________________
max_pooling2d_10 (MaxPooling multiple                  0         
_________________________________________________________________
re_lu_10 (ReLU)              multiple                  0         
_________________________________________________________________
conv2d_13 (Conv2D)           multiple                  880       
_________________________________________________________________
batch_normalization_1 (Batch multiple                  64        
_________________________________________________________________
max_pooling2d_11 (MaxPooling multiple                 

In [62]:
model.compile(optimizer='adam',
              loss=losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'],)

In [63]:
model.fit(x_train, y_train, epochs=5)

model.evaluate(x_test,  y_test, verbose=2)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
10000/1 - 1s - loss: 0.0713 - accuracy: 0.9811


[0.1426826808776987, 0.9811]

## cifar10+VGG

In [24]:
import  tensorflow as tf
from    tensorflow.keras import layers, optimizers, datasets, Sequential
import  os

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
tf.random.set_seed(2345)


In [25]:
def preprocess(x, y):
    # [0~1]
    x = 2*tf.cast(x, dtype=tf.float32) / 255.-1
    y = tf.cast(y, dtype=tf.int32)
    return x,y

In [26]:
(x,y), (x_test, y_test) = datasets.cifar10.load_data()
y = tf.squeeze(y, axis=1)
y_test = tf.squeeze(y_test, axis=1)
print(x.shape, y.shape, x_test.shape, y_test.shape)

(50000, 32, 32, 3) (50000,) (10000, 32, 32, 3) (10000,)


In [27]:
train_db = tf.data.Dataset.from_tensor_slices((x,y))
train_db = train_db.shuffle(1000).map(preprocess).batch(128)

test_db = tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db = test_db.map(preprocess).batch(64)

sample = next(iter(train_db))
print('sample:', sample[0].shape, sample[1].shape,
      tf.reduce_min(sample[0]), tf.reduce_max(sample[0]))

sample: (128, 32, 32, 3) (128,) tf.Tensor(-1.0, shape=(), dtype=float32) tf.Tensor(1.0, shape=(), dtype=float32)


In [153]:
network=Sequential([
     # unit 1
    layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 2
    layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 3
    layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 4
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),

    # unit 5
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.Conv2D(512, kernel_size=[3, 3], padding="same", activation=tf.nn.relu),
    layers.MaxPool2D(pool_size=[2, 2], strides=2, padding='same'),
    layers.Reshape([512]),
    
    layers.Dense(256, activation=tf.nn.relu),
    layers.Dense(128, activation=tf.nn.relu),
    layers.Dense(10, activation=None),
])

In [154]:
network.compile(optimizer='adam',
              loss=losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'],)

In [156]:
network.fit(train_db, epochs=50,validation_data=test_db,validation_freq=5)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x20ec57cff98>