In [3]:
import re
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model
from sklearn.linear_model import LogisticRegression

vocab_size = 10000
max_len = 500

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [4]:
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

In [5]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# 로지스틱 회귀 모델
lr_clf = LogisticRegression(random_state=42, solver='liblinear', max_iter=1000)
lr_clf.fit(X_train, y_train)
lr_pred = lr_clf.predict(X_test)
lr_prob = lr_clf.predict_proba(X_test)[:, 1]

print(f"Accuracy: {accuracy_score(y_test, lr_pred):.4f}")
print(f"Precision: {precision_score(y_test, lr_pred):.4f}")
print(f"Recall: {recall_score(y_test, lr_pred):.4f}")
print(f"F1-Score: {f1_score(y_test, lr_pred):.4f}")
print(f"ROC AUC: {roc_auc_score(y_test, lr_prob):.4f}")

Accuracy: 0.5146
Precision: 0.5176
Recall: 0.4291
F1-Score: 0.4692
ROC AUC: 0.5183


In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# CNN 모델
embedding_dim = 100

cnn_model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_len),
    Conv1D(128, 3, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(1, activation='sigmoid')
])

cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)
mc = ModelCheckpoint('best_cnn_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

cnn_model.summary()

# 모델 학습
history_cnn = cnn_model.fit(X_train, y_train, epochs=10, callbacks=[es, mc],
                            batch_size=64, validation_split=0.2)

# 모델 평가
from tensorflow.keras.models import load_model
loaded_cnn_model = load_model('best_cnn_model.h5')
cnn_loss, cnn_accuracy = loaded_cnn_model.evaluate(X_test, y_test, verbose=0)

cnn_prob = loaded_cnn_model.predict(X_test)
cnn_pred = (cnn_prob > 0.5).astype(int)

print(f"Accuracy: {cnn_accuracy:.4f}")
print(f"Precision: {precision_score(y_test, cnn_pred):.4f}")
print(f"Recall: {recall_score(y_test, cnn_pred):.4f}")
print(f"F1-Score: {f1_score(y_test, cnn_pred):.4f}")
print(f"ROC AUC: {roc_auc_score(y_test, cnn_prob):.4f}")



Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7319 - loss: 0.5304
Epoch 1: val_accuracy improved from -inf to 0.87940, saving model to best_cnn_model.h5




[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 6ms/step - accuracy: 0.7320 - loss: 0.5302 - val_accuracy: 0.8794 - val_loss: 0.2915
Epoch 2/10
[1m621/625[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.9107 - loss: 0.2272
Epoch 2: val_accuracy improved from 0.87940 to 0.89360, saving model to best_cnn_model.h5




[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9107 - loss: 0.2271 - val_accuracy: 0.8936 - val_loss: 0.2595
Epoch 3/10
[1m617/625[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 5ms/step - accuracy: 0.9645 - loss: 0.1139
Epoch 3: val_accuracy did not improve from 0.89360
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9645 - loss: 0.1140 - val_accuracy: 0.8926 - val_loss: 0.2726
Epoch 4/10
[1m619/625[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.9898 - loss: 0.0467
Epoch 4: val_accuracy did not improve from 0.89360
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9898 - loss: 0.0468 - val_accuracy: 0.8922 - val_loss: 0.3018
Epoch 5/10
[1m617/625[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.9987 - loss: 0.0167
Epoch 5: v



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
Accuracy: 0.8909
Precision: 0.8879
Recall: 0.8947
F1-Score: 0.8913
ROC AUC: 0.9569


In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

embedding_dim = 100
hidden_state_size = 128

lstm_model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_len),
    LSTM(hidden_state_size),
    Dense(1, activation='sigmoid')
])

lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3)
mc = ModelCheckpoint('best_lstm_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

lstm_model.summary()

# 모델 학습
history_lstm = lstm_model.fit(X_train, y_train, epochs=10, callbacks=[es, mc],
                             batch_size=64, validation_split=0.2)

# 모델 평가
from tensorflow.keras.models import load_model
loaded_lstm_model = load_model('best_lstm_model.h5')
lstm_loss, lstm_accuracy = loaded_lstm_model.evaluate(X_test, y_test, verbose=0)

lstm_prob = loaded_lstm_model.predict(X_test)
lstm_pred = (lstm_prob > 0.5).astype(int)

print(f"Accuracy: {lstm_accuracy:.4f}")
print(f"Precision: {precision_score(y_test, lstm_pred):.4f}")
print(f"Recall: {recall_score(y_test, lstm_pred):.4f}")
print(f"F1-Score: {f1_score(y_test, lstm_pred):.4f}")
print(f"ROC AUC: {roc_auc_score(y_test, lstm_prob):.4f}")



Epoch 1/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.6842 - loss: 0.5699
Epoch 1: val_accuracy improved from -inf to 0.82640, saving model to best_lstm_model.h5




[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 27ms/step - accuracy: 0.6845 - loss: 0.5696 - val_accuracy: 0.8264 - val_loss: 0.3939
Epoch 2/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.8762 - loss: 0.3167
Epoch 2: val_accuracy improved from 0.82640 to 0.87260, saving model to best_lstm_model.h5




[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 25ms/step - accuracy: 0.8762 - loss: 0.3167 - val_accuracy: 0.8726 - val_loss: 0.3142
Epoch 3/10
[1m312/313[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9178 - loss: 0.2171
Epoch 3: val_accuracy did not improve from 0.87260
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 26ms/step - accuracy: 0.9178 - loss: 0.2172 - val_accuracy: 0.8666 - val_loss: 0.3233
Epoch 4/10
[1m311/313[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 24ms/step - accuracy: 0.9438 - loss: 0.1566
Epoch 4: val_accuracy did not improve from 0.87260
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 26ms/step - accuracy: 0.9438 - loss: 0.1567 - val_accuracy: 0.8450 - val_loss: 0.3797
Epoch 5/10
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.9191 - loss: 0.2068
Epo



[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step
Accuracy: 0.8688
Precision: 0.8881
Recall: 0.8441
F1-Score: 0.8655
ROC AUC: 0.9405
