# 在MNIST数据处理中加入注意力层

In [1]:
from tensorflow.keras.models import *
from tensorflow.keras.layers import Input, Dense, Multiply
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from keras import Model

### 1.加载数据

In [2]:
((x_train, y_train), (x_test, y_test)) = mnist.load_data()
x_train.shape

### 2.数据变换

In [3]:
x_train = x_train.reshape(60000,784)
x_test = x_test.reshape(10000,784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train/255.0
x_test = x_test/255.0

y_train = to_categorical(y_train,10)
y_test = to_categorical(y_test, 10)

### 3.构建模型

In [4]:
#输入层
inputs=Input(shape=(784,))
#w
probs=Dense(784,activation='softmax')(inputs)
mul=Multiply()([inputs,probs])
#fc层
mul=Dense(64)(mul)
#fc层
output=Dense(10,activation='relu')(mul)
model=Model(inputs=[inputs],outputs=output)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 784)]        0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 784)          615440      input_1[0][0]                    
__________________________________________________________________________________________________
multiply (Multiply)             (None, 784)          0           input_1[0][0]                    
                                                                 dense[0][0]                      
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 64)           50240       multiply[0][0]               

### 4.编译、训练

In [5]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=100, batch_size=32) 

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x253d0371cc0>

### 5.评估

In [6]:
model.evaluate(x_test, y_test, batch_size=32)



[0.1000538244843483, 0.9501000046730042]

# 用自注意力机制处理IMDB影评数据

In [10]:
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences 
from keras.layers import Embedding ,Dense,Input,LSTM,Permute,Softmax,Lambda,Flatten, GRU
from keras import Model
from tensorflow.keras.utils import to_categorical

### 1.加载并格式化数据

In [11]:
max_len=200
(train_data, train_labels), (test_data, test_labels) = imdb.load_data()
train_data_pad = pad_sequences(train_data,padding="post",maxlen = max_len )
test_data_pad = pad_sequences(test_data,padding="post",maxlen = max_len )
train_labels_input = to_categorical(train_labels)
test_labels_input = to_categorical(test_labels)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


### 2.构建模型

In [13]:
input_=Input(shape=(max_len,))
words=Embedding(100000,64)(input_)
sen=GRU(64,return_sequences=True)(words)

#注意力层
attention_pre=Dense(64,name='attention_vec')(sen)
attention_probs=Softmax()(attention_pre)
attention_mul=Lambda(lambda x:x[0]*x[1])([attention_probs,sen])

output=Flatten()(attention_mul)
output=Dense(32,activation="relu")(output)
output = Dense(2, activation='softmax')(output)
model=Model(inputs=input_,outputs=output)

In [14]:
model.compile(loss="categorical_crossentropy",optimizer="adam",metrics=["acc"])
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 200)]        0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 200, 64)      6400000     input_3[0][0]                    
__________________________________________________________________________________________________
gru_1 (GRU)                     (None, 200, 64)      24960       embedding_1[0][0]                
__________________________________________________________________________________________________
attention_vec (Dense)           (None, 200, 64)      4160        gru_1[0][0]                      
____________________________________________________________________________________________

### 3.训练模型

In [15]:
model.fit(train_data_pad,train_labels_input,batch_size=64,epochs=5,
          validation_data=(test_data_pad,test_labels_input))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x253da667da0>

### 4.评估模型

In [16]:
test_loss, test_acc = model.evaluate(test_data_pad,test_labels_input,verbose=2) #损失值和准确率
print('\nTest accuracy:', test_acc)
print('\nTest loss:', test_loss)

782/782 - 35s - loss: 0.7006 - acc: 0.8509

Test accuracy: 0.8509200215339661

Test loss: 0.7006161212921143
