In [28]:
# tensorflow.keras의 데이터셋 중 reuters 데이터셋을 사용해서 분류 모델 개발
# 1. 데이터 가져오기
# 2. 데이터의 특성 파악
# 3. 입력 데이터 구성
# 4. 모델 개발
# 5. 모델 평가

In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras as tf_keras

In [30]:
# 데이터 준비
(X_train, y_train), (X_test, y_test) = tf_keras.datasets.reuters.load_data(num_words=10000) # 10000 개의 단어 집합 사용

In [31]:
# 단어 사전 확인
word_to_index = tf_keras.datasets.reuters.get_word_index()

In [32]:
# 단어 번호로 단어를 찾기 위한 맵 만들기
index_to_word = { value: key for key, value in word_to_index.items() }

In [33]:
# 각 문장을 BOW 형식으로 변환 -> (1, 10000)
def vectorize_sentences(sentences, dimension=10000, bow=True): # dimension : column, 전체단어갯수, sentences : 행, 문장들
    results = np.zeros((len(sentences), dimension))

    for i, sentence in enumerate(sentences):
        for word in sentence:
            if bow:
              results[i, word] += 1.  # 단어 위치에 발생 빈도 encoding
            else:
              results[i, word] = 1.   # 단어 위치에 1 encoding

    return results

In [34]:
# 입력 데이터 변환
X_train2 = vectorize_sentences(X_train, bow=False)
X_test2 = vectorize_sentences(X_test, bow=False)

X_train3 = vectorize_sentences(X_train, bow=True)
X_test3 = vectorize_sentences(X_test, bow=True)

In [35]:
# target 데이터 특성 확인
print(y_train.shape)
np.unique(y_train, return_counts=True)

(8982,)


(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
        34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45]),
 array([  55,  432,   74, 3159, 1949,   17,   48,   16,  139,  101,  124,
         390,   49,  172,   26,   20,  444,   39,   66,  549,  269,  100,
          15,   41,   62,   92,   24,   15,   48,   19,   45,   39,   32,
          11,   50,   10,   49,   19,   19,   24,   36,   30,   13,   21,
          12,   18]))

In [36]:
# 모델 구조 설계 1

base_model = tf_keras.models.Sequential()
base_model.add(tf_keras.layers.Input(shape=(10000,)))
base_model.add(tf_keras.layers.Dense(512, activation='relu'))
base_model.add(tf_keras.layers.Dense(256, activation='relu'))
base_model.add(tf_keras.layers.Dense(128, activation='relu'))
base_model.add(tf_keras.layers.Dense(46, activation='softmax')) # 다중분류인 경우 activation=softmax, 유닛갯수는 분류 갯수 사용

base_model.summary()

In [37]:
# 모델 구조 설계 2 : 가중치 초기화 설정

model1 = tf_keras.models.Sequential()
model1.add(tf_keras.layers.Input(shape=(10000,)))
model1.add(tf_keras.layers.Dense(512, activation='relu', kernel_initializer="he_uniform"))
model1.add(tf_keras.layers.Dense(256, activation='relu', kernel_initializer="he_uniform"))
model1.add(tf_keras.layers.Dense(128, activation='relu', kernel_initializer=tf_keras.initializers.HeUniform(seed=42)))
model1.add(tf_keras.layers.Dense(46, activation='softmax')) # 다중분류인 경우 activation=softmax, 유닛갯수는 분류 갯수 사용

model1.summary()

In [38]:
# 모델 구조 설계 3 : 배치 정규화 설정

model2 = tf_keras.models.Sequential()
model2.add(tf_keras.layers.Input(shape=(10000,)))
model2.add(tf_keras.layers.Dense(512))
model2.add(tf_keras.layers.BatchNormalization())
model2.add(tf_keras.layers.Activation('relu'))

model2.add(tf_keras.layers.Dense(256))
model2.add(tf_keras.layers.BatchNormalization())
model2.add(tf_keras.layers.Activation('relu'))

model2.add(tf_keras.layers.Dense(128))
model2.add(tf_keras.layers.BatchNormalization())
model2.add(tf_keras.layers.Activation('relu'))

model2.add(tf_keras.layers.Dense(46, activation='softmax')) # 다중분류인 경우 activation=softmax, 유닛갯수는 분류 갯수 사용

model2.summary()

In [48]:
# 모델 구조 설계 4 : 가중치 규제 (L1, L2)

model3 = tf_keras.models.Sequential()
model3.add(tf_keras.layers.Input(shape=(10000,)))
model3.add(tf_keras.layers.Dense(512, activation="relu", kernel_regularizer=tf_keras.regularizers.L2(0.001)))
model3.add(tf_keras.layers.Dense(256, activation="relu", kernel_regularizer=tf_keras.regularizers.L2(0.001)))
model3.add(tf_keras.layers.Dense(128, activation="relu", kernel_regularizer=tf_keras.regularizers.L2(0.001)))
model3.add(tf_keras.layers.Dense(46, activation='softmax')) # 다중분류인 경우 activation=softmax, 유닛갯수는 분류 갯수 사용

model3.summary()

In [52]:
# 모델 구조 설계 5 : 드롭아웃 적용

model4 = tf_keras.models.Sequential()
model4.add(tf_keras.layers.Input(shape=(10000,)))
model4.add(tf_keras.layers.Dense(512, activation="relu"))
model4.add(tf_keras.layers.Dropout(0.5, seed=42))
model4.add(tf_keras.layers.Dense(256, activation="relu"))
model4.add(tf_keras.layers.Dropout(0.5, seed=42))
model4.add(tf_keras.layers.Dense(128, activation="relu"))
model4.add(tf_keras.layers.Dropout(0.5, seed=42))
model4.add(tf_keras.layers.Dense(46, activation='softmax')) # 다중분류인 경우 activation=softmax, 유닛갯수는 분류 갯수 사용

model4.summary()

In [53]:
# 모델 훈련 설계

base_model.compile(loss='sparse_categorical_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])
model1.compile(loss='sparse_categorical_crossentropy',
               optimizer='adam',
               metrics=['accuracy'])
model2.compile(loss='sparse_categorical_crossentropy',
               optimizer='adam',
               metrics=['accuracy'])
model3.compile(loss='sparse_categorical_crossentropy',
               optimizer='adam',
               metrics=['accuracy'])
model4.compile(loss='sparse_categorical_crossentropy',
               optimizer='adam',
               metrics=['accuracy'])

In [54]:
# fit_history_base = base_model.fit(X_train2, y_train, epochs=10, batch_size=256, validation_split=0.2)
# fit_history_1 = model1.fit(X_train2, y_train, epochs=10, batch_size=256, validation_split=0.2)
# fit_history_2 = model2.fit(X_train2, y_train, epochs=10, batch_size=256, validation_split=0.2)
# fit_history_3 = model3.fit(X_train2, y_train, epochs=10, batch_size=256, validation_split=0.2)
fit_history_4 = model4.fit(X_train2, y_train, epochs=10, batch_size=256, validation_split=0.2)

Epoch 1/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 208ms/step - accuracy: 0.3104 - loss: 3.0356 - val_accuracy: 0.5882 - val_loss: 1.6857
Epoch 2/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 156ms/step - accuracy: 0.5906 - loss: 1.6949 - val_accuracy: 0.7017 - val_loss: 1.3630
Epoch 3/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 205ms/step - accuracy: 0.6834 - loss: 1.3553 - val_accuracy: 0.7168 - val_loss: 1.2249
Epoch 4/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 161ms/step - accuracy: 0.7271 - loss: 1.1270 - val_accuracy: 0.7318 - val_loss: 1.1818
Epoch 5/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 162ms/step - accuracy: 0.7591 - loss: 0.9619 - val_accuracy: 0.7618 - val_loss: 1.1346
Epoch 6/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 208ms/step - accuracy: 0.8030 - loss: 0.7926 - val_accuracy: 0.7718 - val_loss: 1.1114
Epoch 7/10
[1m29/29[0m [3

In [55]:
print( base_model.evaluate(X_train2, y_train), base_model.evaluate(X_test2, y_test) )
print( model1.evaluate(X_train2, y_train), model1.evaluate(X_test2, y_test) )
print( model2.evaluate(X_train2, y_train), model2.evaluate(X_test2, y_test) )
print( model3.evaluate(X_train2, y_train), model3.evaluate(X_test2, y_test) )
print( model4.evaluate(X_train2, y_train), model4.evaluate(X_test2, y_test) )

[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.0054 - loss: 3.8533
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.0070 - loss: 3.8496
[3.851823329925537, 0.006346025504171848] [3.850097179412842, 0.008014247752726078]
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.0233 - loss: 3.8524
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.0290 - loss: 3.8485
[3.8518729209899902, 0.02326875925064087] [3.851130247116089, 0.028495103120803833]
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.9670 - loss: 0.1112
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.7772 - loss: 0.9449
[0.2613303065299988, 0.9357604384422302] [0.9574209451675415, 0.7782725095748901]
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.9656 -