In [1]:
from __future__ import print_function
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM,SimpleRNN,GRU
from keras.datasets import imdb
import numpy as np

Using TensorFlow backend.


In [6]:
max_features = 20000
maxlen = 80
batch_size = 256

print('Loading data...')
(x_train_L, y_train_L), (x_test_L, y_test_L) = imdb.load_data(num_words=max_features)
print(len(x_train_L), 'train sequences')
print(len(x_test_L), 'test sequences')

Loading data...
25000 train sequences
25000 test sequences


In [7]:
# 今回は学習データをランダムに50％まで減らす
from sklearn.model_selection import train_test_split
x_train, _x, y_train, _y = train_test_split(x_train_L, y_train_L, test_size=0.5, random_state=42)
x_test, _X, y_test, _Y = train_test_split(x_test_L, y_test_L, test_size=0.5, random_state=42)
x_train.shape

(12500,)

In [9]:
# SimpleRNN
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)#シーケンスを同じ長さになるように詰めます．
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128))#固定長の分散表現に変換
model.add(SimpleRNN(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))#全結合

# 異なるオプティマイザーと異なるオプティマイザー設定を使用してみてください
model.compile(loss='binary_crossentropy',#2値分類のloss
              optimizer='adam',
              metrics=['accuracy'])

print('Train...')
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=3,
          validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Pad sequences (samples x time)
x_train shape: (12500, 80)
x_test shape: (12500, 80)
Build model...
Train...
Train on 12500 samples, validate on 12500 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Test score: 0.6694292150878907
Test accuracy: 0.5904799995231629


In [11]:
# GRU
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)#シーケンスを同じ長さになるように詰める（今回の1シーケンスは１レビュー．それを全レビューの中で最も単語数が多いシーケンスの長さに合わせる＝単語数をそろえる）
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128))#正の整数（インデックス）を固定次元の密ベクトルに変換
model.add(GRU(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))#全結合

# 異なるオプティマイザーと異なるオプティマイザー設定を使用してみてください
model.compile(loss='binary_crossentropy',#2値分類のloss
              optimizer='adam',
              metrics=['accuracy'])

print('Train...')
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=3,
          validation_data=(x_test, y_test))
score, acc = model.evaluate(x_test, y_test,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Pad sequences (samples x time)
x_train shape: (12500, 80)
x_test shape: (12500, 80)
Build model...
Train...
Train on 12500 samples, validate on 12500 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Test score: 0.46665385058403014
Test accuracy: 0.807040000076294
