# 函数式API简介

In [3]:
from keras import Input, layers
from keras.models import Sequential, Model

In [4]:
seq_model = Sequential()
seq_model.add(layers.Dense(32, activation='relu', input_shape=(64,)))
seq_model.add(layers.Dense(32, activation='relu'))
seq_model.add(layers.Dense(10, activation='softmax'))
seq_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                2080      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_2 (Dense)              (None, 10)                330       
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


In [5]:
input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.Dense(32, activation='relu')(x)
output_tensor = layers.Dense(10, activation='softmax')(x)
model = Model(input_tensor, output_tensor)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 64)]              0         
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_4 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_5 (Dense)              (None, 10)                330       
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


## 用函数式API实现双输入问答模型

In [6]:
from keras.models import Model
from keras import layers
from keras import Input

In [9]:
text_vocab_size = 10000
ques_vocab_size = 10000
ans_vocab_size = 500

text_input = Input(shape=(None,), dtype='int32', name='text')
embedding_text = layers.Embedding(text_vocab_size, 64)(text_input)
encoded_text = layers.LSTM(32)(embedding_text)

ques_input = Input(shape=(None,), dtype='int32', name='question')
embedded_ques = layers.Embedding(ques_vocab_size, 32)(ques_input)
encoded_ques = layers.LSTM(16)(embedded_ques)

concatenated = layers.concatenate([encoded_text, encoded_ques], axis=-1)
answer = layers.Dense(ans_vocab_size, activation='softmax')(concatenated)

model = Model([text_input, ques_input], answer)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               [(None, None)]       0                                            
__________________________________________________________________________________________________
question (InputLayer)           [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 64)     640000      text[0][0]                       
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 32)     320000      question[0][0]                   
____________________________________________________________________________________________

## 将数据输入到多输入模型中

In [14]:
import numpy as np
import keras


num_samples = 10000
max_length = 100

text = np.random.randint(1, text_vocab_size, size=(num_samples, max_length))  # 生成虚构的Numpy数据
ques = np.random.randint(1, ques_vocab_size, size=(num_samples, max_length))

ans = np.random.randint(ans_vocab_size, size=(num_samples))
ans = keras.utils.to_categorical(ans, ans_vocab_size)

model.fit([text, ques], ans, epochs=10, batch_size=128)
# model.fit({'text':text, 'question':ques}, ans, epochs=10, batch_size=128)  # 对输入有命名时可以这样用

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fa36aae1750>

# 多输出模型

In [16]:
from keras import layers
from keras import Input
from keras.models import Model

vocab_size = 50000
num_income_groups = 10

posts_input = Input(shape=(None,), dtype='int32', name='posts')
embedding_posts = layers.Embedding(vocab_size, 256)(posts_input)

x = layers.Conv1D(128, 5, activation='relu')(embedding_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

age_prediction = layers.Dense(1, name='age')(x)  # 注意，输出层都具有名字
income_prediction = layers.Dense(num_income_groups, activation='softmax', name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

model = Model(posts_input, [age_prediction, income_prediction, gender_prediction])

In [17]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
posts (InputLayer)              [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, None, 256)    12800000    posts[0][0]                      
__________________________________________________________________________________________________
conv1d_5 (Conv1D)               (None, None, 128)    163968      embedding_4[0][0]                
__________________________________________________________________________________________________
max_pooling1d_2 (MaxPooling1D)  (None, None, 128)    0           conv1d_5[0][0]                   
____________________________________________________________________________________________

## 多输出模型的多重损失

In [18]:
model.compile(optimizer='rmsprop', loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'])

# model.compile(optimizer='rmsprop', loss={'age':'mse', 'income':'categorical_crossentropy', 'gender':'binary_crossentropy'})

## 损失加权

In [19]:
model.compile(optimizer='rmsprop', loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'], loss_weights=[0.25, 1., 10.])

In [20]:
# model.compile(optimizer='rmsprop', loss={'age':'mse', 'income':'categorical_crossentropy', 'gender':'binary_crossentropy'}, loss_weights={'age':0.25, 'income':1., 'gender':10.})

## 将数据输入到多输出模型中

In [21]:
model.fit(posts, [age_targets, income_targets, gender_targets], epochs=10, batch_size=64)

NameError: name 'posts' is not defined

# 使用Keras回调函数和TensorBoard来检查并监控深度学习模型

In [22]:
import keras

In [23]:
callbacks_list = [
    keras.callbacks.EarlyStopping(  # 如果不再改善就中断训练
        monitor='acc',  #监控模型的验证精度
        patience=1,  # 如果精度在多于一轮的时间（即2轮）内不再改善就中断训练
    ),
    keras.callbacks.ModelCheckpoint(
        filepath='my_model.h5',  # 在每轮过后保存当前权重
        monitor='val_loss',  # 若val_loss没有改善，那么不需要覆盖模型文件，始终保存在训练过程中见到的最佳模型
        save_best_only=True,
    )
]

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

In [None]:
model.fit(x, y, epochs=10, batch_size=32, callbacks=callbacks_list, validation_data=(x_val, y_val))

ReduceLROnPlateau：若验证损失不再改善，可以使用这个回调函数来降低学习率

In [24]:
callback_list = [
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',  # 监控模型的验证损失
        factor=0.1,  # 触发时，将学习率*0.1
        patience=10,  # 若验证损失在10轮内都没有改善，那么就触发这个回调函数
    )
]

In [None]:
model.fit(x, y, epochs=10, batch_size=32, callbacks=callbacks_list, validation_data=(x_val, y_val))

## 编写自己的回调函数

In [25]:
class ActivationLogger(keras.callbacks.Callback):
    def set_model(self, model):
        self.model = model  # 训练之前由父模型调用，告诉回调函数是哪个模型在调用它
        layer_outputs = [layer.output for layer in model.layers]
        self.activations_model = keras.models.Model(model.input, layer_outputs)  # 模型实例，返回每层的激活
    
    def on_epoch_end(self, epoch, logs=None):
        if self.validation_data is None:
            raise RuntimeError('Requires validation_data.')
        validation_sample = self.validation_data[0][0:1]  # 获取验证数据的第一个输入样本
        activations = self.activations_model.predict(validation_sample)
        f = open('activations_at_epoch_' + str(epoch) + '.npz', 'w')  # 将数据保存到硬盘
        np.savez(f, activations)
        f.close()

# TensorBoard介绍
Tensorflow的可视化框架

In [26]:
import keras
from keras import layers
from keras.datasets import imdb
from keras.preprocessing import sequence

In [27]:
max_features = 2000
max_len = 500

(train_x, train_y), (test_x, test_y) = imdb.load_data(num_words=max_features)
train_x = sequence.pad_sequences(train_x, maxlen=max_len)
test_x = sequence.pad_sequences(test_x, maxlen=max_len)

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [29]:
model = keras.models.Sequential()
model.add(layers.Embedding(max_features, 128,
                          input_length=max_len,
                          name='embed'))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.MaxPooling1D(5))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(1))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embed (Embedding)            (None, 500, 128)          256000    
_________________________________________________________________
conv1d_10 (Conv1D)           (None, 494, 32)           28704     
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 98, 32)            0         
_________________________________________________________________
conv1d_11 (Conv1D)           (None, 92, 32)            7200      
_________________________________________________________________
global_max_pooling1d_2 (Glob (None, 32)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 33        
Total params: 291,937
Trainable params: 291,937
Non-trainable params: 0
________________________________________________

In [30]:
model.compile(optimizer='rmsprop',
             loss='binary_crossentropy',
             metrics=['acc'])

In [31]:
mkdir my_log_dir

In [32]:
ls

L2_神经网络与数学基础.ipynb
L3_神经网络入门.ipynb
L6_深度学习用于文本和序列.ipynb
L7_高级的深度学习最佳实践.ipynb
[34mmy_log_dir[m[m/


## 使用一个TensorBoard回调函数来训练模型

In [34]:
callbacks = [
    keras.callbacks.TensorBoard(
        log_dir = 'my_log_dir',  # 日志文件将被写入这个位置
        histogram_freq=1,  # 每一轮之后记录激活直方图
        embeddings_freq=1  # 每一轮之后记录嵌入数据
    )
]

In [35]:
history = model.fit(train_x, train_y,
                   epochs=20,
                   batch_size=128,
                   validation_split=0.2,
                   callbacks=callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## 通过命令行启动TensorBoard服务器
$ tensorboard --logfir=my_log_dir  
然后可以用浏览器打开 http://localhost:6006 查看模型的训练过程

## 显示模型结构
可以使用keras.utils.plot_model函数绘制由层组成的图，使用这个函数需要安装Python的pydot库和pydot-ng库还需要安装graphviz库

# 让模型性能发挥到极致
1. 在网络每一次变换之后都应该考虑数据标准化，即使输入层中的数据均值为0、方差为1，也没有理由假定网络输出的数据也是这样。可以使用批标准化（batch normalization），即使在训练过程中均值和方差随时间发生变化，也可以适应性的将数据标准化。 标准化的工作原理是，训练过程中在内部保存已读取每批数据均值和方差的指数移动平均值。标准化的主要效果是有助于梯度传播，因此运训更深的网络

In [None]:
conv_model.add(layers.Conv2D(32, 3, activation='relu'))
conv_model.add(layers.BatchNormalization())  # 在卷积层之后使用

dense_model.add(layers.Dense(32, activation='relu'))
dense_model.add(layers.BatchNormalization())  # 在Dense层后使用