In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

In [2]:
imdb = keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

In [4]:
word_index = imdb.get_word_index() 

In [6]:
# 처음 몇 개 인덱스는 사전에 정의되어 있음.
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

In [7]:
train_data = keras.utils.pad_sequences(train_data, value=word_index["<PAD>"],
                                       padding='post')  # post : 뒤에 PAD를 붙이는 방식

test_data = keras.utils.pad_sequences(test_data, value=word_index["<PAD>"],
                                      padding='post')

In [31]:
train_data.shape

(25000, 2494)

In [12]:
vocab_size = 10000

In [45]:
model = keras.Sequential([
    keras.layers.Embedding(input_dim=vocab_size, output_dim=16),

    # 시퀀스 전체 출력
    keras.layers.LSTM(8, return_sequences=True),

    # LSTM 시퀀스 출력 평균Pooling
    keras.layers.GlobalAveragePooling1D(),

    # Dense 층
    keras.layers.Dense(16, activation='relu'),

    # 출력층
    keras.layers.Dense(1, activation='sigmoid')
])


In [46]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_8 (Embedding)     (None, None, 8)           80000     
                                                                 
 lstm_9 (LSTM)               (None, None, 8)           544       
                                                                 
 global_average_pooling1d_3   (None, 8)                0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dense_6 (Dense)             (None, 16)                144       
                                                                 
 dense_7 (Dense)             (None, 1)                 17        
                                                                 
Total params: 80,705
Trainable params: 80,705
Non-trainable params: 0
__________________________________________________

In [47]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [48]:
x_val = train_data[:10000]
partial_x_train = train_data[10000:]

y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]

In [49]:
history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=40,
                    batch_size=512,
                    validation_data=(x_val, y_val),
                    verbose=1)

Epoch 1/40
Epoch 2/40
 6/30 [=====>........................] - ETA: 1:42 - loss: 0.6931 - accuracy: 0.5003

KeyboardInterrupt: 