In [29]:
import tensorflow as tf

top_words = 1000

(train_x, train_y), (test_x, test_y) = tf.keras.datasets.imdb.load_data(num_words = top_words)

print(train_x.shape, train_y.shape)
print(test_x.shape, test_y.shape)


(25000,) (25000,)
(25000,) (25000,)


In [30]:
from keras_preprocessing.sequence import pad_sequences

max_len_word = 100
train_x = pad_sequences(sequences = train_x, maxlen = max_len_word)
test_x = pad_sequences(sequences = test_x, maxlen = max_len_word)

print(train_x.shape, train_y.shape)
print(test_x.shape, test_y.shape)

(25000, 100) (25000,)
(25000, 100) (25000,)


In [3]:
from keras.layers import Input, Dense, Reshape
from keras.models import Model

endcoding_dim = 16

# encoder
encoder_input = Input(shape = (max_len_word, ))
encoder_layer_1 = Dense(units = endcoding_dim * 4)(encoder_input)
encoder_layer_2 = Dense(units = endcoding_dim * 2)(encoder_layer_1)
encoder_output = Dense(units = endcoding_dim, activation = 'relu')(encoder_layer_2)

vocab_size = 1000
# decoder
decoder_layer_1 = Dense(units = endcoding_dim * 2)(encoder_output)
decoder_layer_2 = Dense(units = endcoding_dim * 4)(decoder_layer_1)
decoder_layer_3 = Dense(units = max_len_word * vocab_size, activation = 'softmax')(decoder_layer_2)
decoder_output = Reshape(target_shape = (max_len_word, vocab_size))(decoder_layer_3)

autoencoder_dnn = Model(encoder_input, decoder_output)
autoencoder_dnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
autoencoder_dnn.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 100)]             0         
                                                                 
 dense (Dense)               (None, 64)                6464      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 16)                528       
                                                                 
 dense_3 (Dense)             (None, 32)                544       
                                                                 
 dense_4 (Dense)             (None, 64)                2112      
                                                                 
 dense_5 (Dense)             (None, 100000)            650000

In [4]:
autoencoder_dnn.fit(train_x, train_x, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21d99b8b970>

In [5]:
# encoder
encoder = Model(encoder_input, encoder_output)
encoder.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 100)]             0         
                                                                 
 dense (Dense)               (None, 64)                6464      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 16)                528       
                                                                 
Total params: 9,072
Trainable params: 9,072
Non-trainable params: 0
_________________________________________________________________


In [6]:
# 分類
class_model_input = Input(shape = (endcoding_dim, ))
class_model_layer_1 = Dense(units = endcoding_dim * 2)(class_model_input)
class_model_output = Dense(units = 2, activation = 'softmax')(class_model_layer_1)

class_model = Model(class_model_input, class_model_output)
class_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
class_model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 16)]              0         
                                                                 
 dense_6 (Dense)             (None, 32)                544       
                                                                 
 dense_7 (Dense)             (None, 2)                 66        
                                                                 
Total params: 610
Trainable params: 610
Non-trainable params: 0
_________________________________________________________________


In [7]:
class_model.fit(encoder.predict(train_x), train_y, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21da12dbbe0>

In [26]:
# LSTM
from keras.layers import LSTM, RepeatVector, Embedding

timesteps = 100
lastent_dim = 64

# encoder
encoder_input = Input(shape = (timesteps, ))
encoder_layer_1 = Embedding(top_words, lastent_dim, input_length = endcoding_dim)(encoder_input)
encoder_output = LSTM(units = lastent_dim)(encoder_layer_1)

# decoder
rv_layer = RepeatVector(n = timesteps)(encoder_output)
decoder_layer_1 = LSTM(units = lastent_dim, return_sequences = True)(rv_layer)
decoder_output = Dense(units = top_words, activation = 'softmax')(decoder_layer_1)

lstm_autoencoder = Model(encoder_input, decoder_output)
lstm_autoencoder.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
lstm_autoencoder.summary()

Model: "model_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, 100)]             0         
                                                                 
 embedding_4 (Embedding)     (None, 100, 64)           64000     
                                                                 
 lstm_8 (LSTM)               (None, 64)                33024     
                                                                 
 repeat_vector_4 (RepeatVect  (None, 100, 64)          0         
 or)                                                             
                                                                 
 lstm_9 (LSTM)               (None, 100, 64)           33024     
                                                                 
 dense_12 (Dense)            (None, 100, 1000)         65000     
                                                           

In [27]:
lstm_autoencoder.fit(train_x, train_x, epochs=10, batch_size=32, validation_split=0.2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x21ff3dc96c0>

In [35]:
lstm_result = lstm_autoencoder.evaluate(test_x, test_x)
print(lstm_result)

4.6845831871032715
