In [1]:
import tensorflow as tf
from tensorflow.keras import preprocessing

In [2]:
samples = ['너 오늘 이뻐 보인다', 
           '나는 오늘 기분이 더러워', 
           '끝내주는데, 좋은 일이 있나봐', 
           '나 좋은 일이 생겼어', 
           '아 오늘 진짜 짜증나', 
           '환상적인데, 정말 좋은거 같아']

labels = [[1], [0], [1], [1], [0], [1]]

In [7]:
tokenizer = preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(samples)
sequences = tokenizer.texts_to_sequences(samples)

word_index = tokenizer.word_index

print(sequences)
print(word_index)

[[4, 1, 5, 6], [7, 1, 8, 9], [10, 2, 3, 11], [12, 2, 3, 13], [14, 1, 15, 16], [17, 18, 19, 20]]
{'오늘': 1, '좋은': 2, '일이': 3, '너': 4, '이뻐': 5, '보인다': 6, '나는': 7, '기분이': 8, '더러워': 9, '끝내주는데': 10, '있나봐': 11, '나': 12, '생겼어': 13, '아': 14, '진짜': 15, '짜증나': 16, '환상적인데': 17, '정말': 18, '좋은거': 19, '같아': 20}


In [12]:
# 데이터셋 생성
EPOCH = 100

def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((sequences, labels))
    dataset = dataset.repeat(EPOCH)
    dataset = dataset.batch(1)
    dataset = dataset.shuffle(len(sequences))

    return dataset
    

In [28]:
import tensorflow as tf

VOCAB_SIZE = len(word_index) + 1 
EMB_SIZE = 128

def build_model(vocab_size, emb_size):
    # 입력 레이어
    input_layer = tf.keras.layers.Input(shape=(None,), dtype=tf.int32)

    # 임베딩 레이어
    embed_input = tf.keras.layers.Embedding(vocab_size, emb_size)(input_layer)
    
    # GlobalAveragePooling1D 사용 (axis 매개변수 제거)
    pooled_input = tf.keras.layers.GlobalAveragePooling1D()(embed_input)

    # 은닉층
    hidden_layer = tf.keras.layers.Dense(128, activation='relu')(pooled_input)

    # 출력층
    output_layer = tf.keras.layers.Dense(1, activation='sigmoid')(hidden_layer)

    # 모델 정의
    model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
    
    # 모델 컴파일
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss=tf.keras.losses.MeanSquaredError(),
                  metrics=['accuracy'])

    return model

# 모델 빌드
model = build_model(VOCAB_SIZE, EMB_SIZE)

# 모델 구조 확인
model.summary()


In [31]:
import os

DATA_OUT_PATH = './data_in/'

if not os.path.exists(DATA_OUT_PATH):
    os.makedirs(DATA_OUT_PATH)

# tensorflow 1.x 에서 적용된 모델 훈련 및 저장 코드
# estimator = tf.estimator.Estimator(model_fn = model_fn, model_dir = DATA_OUT_PATH + 'checkpoint/dnn')
# estimator.train(train_input_fn)

# tensorflow 2.x 부터 사용하는 코드
# 데이터셋 정의
train_dataset = train_input_fn()
# 모델 훈련
model.fit(train_dataset, epochs=10)

# 모델 저장
model.save(os.path.join(DATA_OUT_PATH, 'checkpoint/dnn.keras'))

Epoch 1/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 388us/step - accuracy: 1.0000 - loss: 9.6910e-09
Epoch 2/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 323us/step - accuracy: 1.0000 - loss: 7.1938e-09
Epoch 3/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 319us/step - accuracy: 1.0000 - loss: 5.4248e-09
Epoch 4/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 324us/step - accuracy: 1.0000 - loss: 4.0818e-09
Epoch 5/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 319us/step - accuracy: 1.0000 - loss: 3.0924e-09
Epoch 6/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 313us/step - accuracy: 1.0000 - loss: 2.3587e-09
Epoch 7/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 317us/step - accuracy: 1.0000 - loss: 1.8411e-09
Epoch 8/10
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 315us/step - accuracy: 1.0000 - loss: 1.4