## GRU로 IMDB 리뷰 감성 분류
- 모든 단어가 아닌 빈도수 5000 까지
- 모든 문장이 아니라 500 단어까지

In [13]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow import keras
from keras.datasets import imdb

In [14]:
from keras.models import Sequential
from keras.layers import Dense, GRU, Embedding
from keras.preprocessing.sequence import pad_sequences

In [15]:
# 등장 빈도가 5,000번째까지의 단어들만 사용
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)

In [16]:
# 모델의 입력으로 사용하는 모든 리뷰의 길이를 동일하게 500으로 맞춤
max_len = 500
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

In [17]:
# GRU 모델
model = Sequential()
model.add(Embedding(5000, 120))
model.add(GRU(120))
model.add(Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 120)         600000    
_________________________________________________________________
gru_2 (GRU)                  (None, 120)               86760     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 121       
Total params: 686,881
Trainable params: 686,881
Non-trainable params: 0
_________________________________________________________________


In [18]:
# 모델의 컴파일
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [19]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint('model/imdb_gru_best_model.h5', monitor='val_accuracy', 
                     verbose=1, mode='max', save_best_only=True)

In [None]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10,
                    batch_size=64, callbacks=[mc, es])

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 25000 samples, validate on 25000 samples
Epoch 1/10

Epoch 00001: val_accuracy improved from -inf to 0.86060, saving model to model/imdb_gru_best_model.h5
Epoch 2/10

Epoch 00002: val_accuracy did not improve from 0.86060
Epoch 3/10

Epoch 00003: val_accuracy improved from 0.86060 to 0.87436, saving model to model/imdb_gru_best_model.h5
Epoch 4/10

Epoch 00004: val_accuracy improved from 0.87436 to 0.88492, saving model to model/imdb_gru_best_model.h5
Epoch 5/10

Epoch 00005: val_accuracy did not improve from 0.88492
Epoch 6/10

Epoch 00006: val_accuracy did not improve from 0.88492
Epoch 7/10

In [20]:
# Best Model 선택
from keras.models import load_model
del model
model = load_model('model/imdb_gru_best_model.h5')

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [21]:
print("\n 테스트 정확도: %.4f" % (model.evaluate(X_test, y_test, verbose=2)[1]))


 테스트 정확도: 0.8849


In [None]:
# 테스트셋
y_vloss = history.history['val_loss']
y_vacc = history.history['val_accuracy']
# 학습셋
y_loss = history.history['loss']
y_acc = history.history['accuracy']

In [None]:
x_len = np.arange(1, len(y_loss)+1)
fig, ax0 = plt.subplots(figsize=(10,8))
ax1 = ax0.twinx()
ax0.plot(x_len, y_loss, c="blue", label='Trainset_loss')
ax0.plot(x_len, y_vloss, c="red", label='Testset_loss')
ax0.set_ylabel('loss')
ax1.plot(x_len, y_acc, c="darkgreen", label='Trainset_acc')
ax1.plot(x_len, y_vacc, c="magenta", label='Testset_acc')
ax1.set_ylabel('accuracy')
ax0.set_xlabel('epoch')
ax0.legend(loc='lower center')
ax1.legend(loc='upper center')
plt.grid()
plt.show()