In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)


2.0.0
sys.version_info(major=3, minor=7, micro=6, releaselevel='final', serial=0)
matplotlib 3.1.3
numpy 1.18.1
pandas 1.0.1
sklearn 0.22.1
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf


## 设置GPU

In [None]:
tf.debugging.set_log_device_placement(True)   # 打印模型的GPU分布
gpus = tf.config.experimental.list_physical_devices('GPU')  # 获取所有的GPU
# GPU设置内存自增长
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
print(len(gpus))
logical_gpu = tf.config.experimental.list_logical_devices('GPU')  # 获取所有的逻辑GPU
print(len(logical_gpu))

## 处理数据

### 读取数据

In [None]:
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]

print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(
    x_train.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28, 1)
x_valid_scaled = scaler.transform(
    x_valid.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28, 1)
x_test_scaled = scaler.transform(
    x_test.astype(np.float32).reshape(-1, 1)).reshape(-1, 28, 28, 1)


### 生成dataset

In [None]:
def make_dataset(images, labels, epochs, batsh_size, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if shuffle:
        dataset = dataset.shuffle(10000)
    dataset = dataset.repeat(epochs).batch(batch_size).prefetch(50)   # prefetch: 从训练集中预先取出多少个数据为训练做准备，用来加速
    return dataset

strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    epochs = 1
    batch_size = 128
    train_dataset = make_dataset(x_train_scaled, y_train, epochs, batch_size)
    valid_dataset = make_dataset(x_valid_scaled, y_valid, epochs, batch_size)
    train_dataset_distribute = strategy.experimental_distribute_dataset(train_dataset)
    valid_dataset_distribute = strategy.experimental_distribute_dataset(valid_dataset)

## 实现卷积神经网络模型

In [None]:
with strategy.scope():
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(filters=32, 
                                  kernel_size=3, 
                                  padding='same', 
                                  activation='relu', 
                                  input_shape=(28, 28, 1)))
    model.add(keras.layers.Conv2D(filters=32, 
                                  kernel_size=3, 
                                  padding='same', 
                                  activation='relu', 
                                  input_shape=(28, 28, 1)))
    model.add(keras.layers.MaxPool2D(pool_size=2))
    model.add(keras.layers.Conv2D(filters=64,    # 在maxpooling后一般会将filters的值翻倍，以缓解maxpooling带来的参数损失 
                                  kernel_size=3, 
                                  padding='same', 
                                  activation='relu'))
    model.add(keras.layers.Conv2D(filters=64, 
                                  kernel_size=3, 
                                  padding='same', 
                                  activation='relu', 
                                  input_shape=(28, 28, 1)))
    model.add(keras.layers.MaxPool2D(pool_size=2))
    model.add(keras.layers.Conv2D(filters=128, 
                                  kernel_size=3, 
                                  padding='same', 
                                  activation='relu', 
                                  input_shape=(28, 28, 1)))
    model.add(keras.layers.Conv2D(filters=128, 
                                  kernel_size=3, 
                                  padding='same', 
                                  activation='relu'))
    model.add(keras.layers.MaxPool2D(pool_size=2))

    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dense(10, activation='softmax'))

## 自定义流程去掉model.compile函数，并手写fit函数

In [None]:
# 自定义训练流程
# 1.define loss function
# 2.define function train_step(for training)
# 3.define function test_step(for evaluate)
# 4.for-loop trainint loop

with strategy.scope():
    # 1.define loss function
    loss_func = keras.losses.SparseCategoricalCrossentropy(
        reduction=keras.losses.Reduction.None   # reduction:计算完一个batch所有样本的损失函数之后如何进行聚合，这里指定求和
    )
    # 定义累计值（keras.metrics里面的函数都可以累计）
    test_loss = keras.metrics.Mean(name='test_loss')
    train_accuracy = keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    test_accuracy = keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

    # 2.define function train_step(for training)
    optimizer = keras.optimizers.SGD(lr=0.01)
    @tf.function  # 和图计算有关，可以用@tf.function加速
    def train_step(inputs):
        images, labels = inputs
        # 计算梯度
        with tf.GradientTape() as tape:
            predictions = model(images, training=True)
            loss = loss_func(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        # 更新train_accuracy
        train_accuracy.update_state(labels, predictions)
        return loss

    # 3.define function test_step(for evaluate)
    @tf.function
    def test_step(inputs):
        images, labels = inputs
        predictions = model(images)
        loss = loss_func(labels, predictions)
        test_loss.update_state(loss)
        test_accuracy.update_state(labels, predictions)

    epochs = 10
    for epoch in range(epochs):
        total_loss = 0.0
        num_batches = 0
        for x in train_dataset:
            start_time = time.time()
            total_loss += train_step(x)
            runtime = time.time()-start_time
            num_batches += 1
            print('\rtotal loss: %3.3f, num_batches: %d, average_loss: %3.3f, time: %3.3f' % 
                  (total_loss, num_batches, total_loss / num_batches, runtime), end='')
        train_loss = total_loss / num_batches
        for x in valid_dataset:
            test_step(x)
        print('\rEpoch: %d, Loss: %3.3f, Acc: %3.3f, Val_Loss: %3.3f, Val_Acc: %3.3f' % 
             (epoch+1, train_loss, train_accuracy.result(), test_loss.result(), test_accuracy.result()))
        test_loss.reset_states()
        train_accuracy.reset_states()
        test_accuracy.reset_states()