In [None]:
# RNN

In [None]:
## 데이터 불러오기
from tensorflow.keras.datasets import imdb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000) #Load the IMDB dataset
print("Number of training samples:", len(X_train))
print("Number of test samples:", len(X_test))
print(X_test.shape, y_test.shape)



In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
X_train_pad = pad_sequences(X_train, maxlen=80, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test, maxlen=80, padding='post', truncating='post')

In [None]:
## 영화평 숫자 배열을 텍스트로 변환해서 확인하기
word_index = imdb.get_word_index()
print("Word index size:", len(word_index))
print(type(word_index))

reserve_word_index = {v: k for k, v in word_index.items()}  # Reverse the word index

decoded_review = ' '.join([reserve_word_index.get(i - 3, '?') for i in X_train[0]])
print("Decoded review:", decoded_review)

In [None]:
## DNN을 이용한 영화평 분류 - 0.7653200030326843

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Input, Flatten

model_dnn = Sequential()
model_dnn.add(Input(shape=(80,)))
model_dnn.add(Embedding(input_dim=10000, output_dim=32))
model_dnn.add(Flatten())
model_dnn.add(Dense(256, activation='relu'))
model_dnn.add(Dense(2, activation='softmax'))
model_dnn.summary()

In [None]:
model_dnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_dnn.fit(X_train_pad, y_train, epochs=10, batch_size=64)

In [None]:
model_dnn.evaluate(X_test_pad, y_test)

In [None]:
## RNN을 이용한 영화평 분류 - 0.7666800022125244

from tensorflow.keras import Sequential, layers
model = Sequential([
    layers.Input(shape=(80,)),
    layers.Embedding(input_dim=10000, output_dim=32), # <OOV> 토큰, <PAD> 토큰
    layers.SimpleRNN(64, activation='relu'),
    layers.Dense(2, activation='softmax') # 부정 또는 긍정 확률값이 출력
])
model.summary()

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model.evaluate(X_test_pad, y_test)

In [None]:
## CNN을 이용한 영화평 분류 - 0.77183997631073

model_cnn = Sequential([
    layers.Input(shape=(80,)),
    layers.Embedding(input_dim=10000, output_dim=32),
    layers.Conv1D(64, 3, activation='relu'),
    layers.GlobalMaxPooling1D(),
    layers.Dense(2, activation='softmax')
])
model_cnn.summary()
model_cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_cnn.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model_cnn.evaluate(X_test_pad, y_test)

In [None]:
## RNN층 추가하기 - 0.7457200288772583

from tensorflow.keras import Sequential, layers
model = Sequential([
    layers.Input(shape=(80,)),
    layers.Embedding(input_dim=10000, output_dim=32), # <OOV> 토큰, <PAD> 토큰
    layers.SimpleRNN(64, return_sequences=True, activation='relu'),
    layers.SimpleRNN(128, activation='relu'),
    layers.Dense(2, activation='softmax') # 부정 또는 긍정 확률값이 출력
])
model.summary()

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=10, batch_size=200)
model.evaluate(X_test_pad, y_test)