# Deep Learning - RNN(LSTM) + CNN

---

# 1 영화 리뷰(IMDB) 감성 분석 - LSTM + CNN

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.rc('figure', figsize=(10, 6))

from matplotlib import rcParams
rcParams['font.family'] = 'New Gulim'
rcParams['font.size'] = 10
rcParams['axes.unicode_minus'] = False

#### 패키지 임포트

In [None]:
from keras.preprocessing import sequence

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import Conv1D, MaxPooling1D

import tensorflow as tf

In [None]:
# seed 값 설정
np.random.seed(0)
tf.random.set_seed(3)

#### 데이터 로드 및 분할

In [None]:
# Reuters 뉴스 데이터 불러오기
from keras.datasets import imdb

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)

#### 데이터 확인하기

In [None]:
category = np.max(y_train) + 1
print('카테고리: ', category)
print('학습용 뉴스 기사: ', len(X_train))
print('테스트용 뉴스 기사: ', len(X_test))
print(X_train[0])

#### 데이터 전처리

In [None]:
X_train = sequence.pad_sequences(X_train, maxlen=100)
X_test  = sequence.pad_sequences(X_test,  maxlen=100)

#### LSTM + CNN 모델 생성 및 설정

In [None]:
model = Sequential()
model.add(Embedding(5000, 100))
model.add(Dropout(0.5))
model.add(Conv1D(64, 5, padding='valid', activation='relu',strides=1))
model.add(MaxPooling1D(pool_size=4))
model.add(LSTM(55))
model.add(Dense(1))
model.add(Activation('sigmoid'))

#### 모델 계층 확인

In [None]:
model.summary()

#### 모델 컴파일

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

#### 모델 학습 실행 및 저장

In [None]:
%%time
history = model.fit(X_train, y_train, batch_size=100, epochs=5, validation_data=(X_test, y_test))

#### 모델 평가

In [None]:
# 테스트 정확도 출력
print("\n Test Accuracy: %.4f" % (model.evaluate(X_test, y_test)[1]))

#### 학습 진행 과정
- history
 - loss: 훈련 손실값
 - accuracy: 훈련 정확도
 - val_loss: 검증 손실값
 - val_accuracy: 검증 정확도

In [None]:
# 그래프로 표현
plt.plot(history.history['val_loss'], marker='.', c='red', label='Validation Loss')
plt.plot(history.history['loss'],     marker='.', c='blue',label='Train Loss')

plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')

plt.title('학습 진행에 따른 학습 데이터와 검증 데이터의 에러')
plt.show()

#### 결과 예측

In [None]:
# 예측 확률
pred_prob = model.predict(X_test)
pred_prob

In [None]:
# 결과 예측
pred = np.where(pred_prob > 0.5, 1, 0).flatten()
pred

#### 결과 평가

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, pred, zero_division=True))

---

In [None]:
# End of file