In [1]:
# imdb dataset으로 감성 분류
# Transformer는 입력 데이터들간의 상호작용을 고려하는 self-attention

import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.models import Sequential

In [8]:
#encoder block
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    res = x + inputs  # Ensure shapes are compatible

    # Feed Forward NN
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation='relu')(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res  # Ensure shapes are compatible

def build_sentiment_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = keras.Input(shape=input_shape)
    embedding_layer = layers.Embedding(input_dim=10000, output_dim=64, input_length=input_shape[0])(inputs)
    x = embedding_layer + tf.random.normal(shape=tf.shape(embedding_layer))

    # Transformer blocks
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D()(x)

    # MLP
    for dim in mlp_units:
        x = layers.Dense(dim, activation='relu')(x)
        x = layers.Dropout(mlp_dropout)(x)

    outputs = layers.Dense(1, activation='sigmoid')(x)

    return keras.Model(inputs=inputs, outputs=outputs)

In [9]:
(x_train, y_train),(x_val, y_val) = keras.datasets.imdb.load_data(num_words=10000)
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=100)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=100)

# example usage
input_shape=(100,)
head_size = 256
num_heads = 4
ft_dim = 4
num_transformer_blocks = 4
mlp_units = [128]
dropout = 0.25
mlp_dropout=0.25

model = build_sentiment_model(
    input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout, mlp_dropout
)

print(model.summary())

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 100)]                0         []                            
                                                                                                  
 embedding_3 (Embedding)     (None, 100, 64)              640000    ['input_4[0][0]']             
                                                                                                  
 tf.compat.v1.shape_3 (TFOp  (3,)                         0         ['embedding_3[0][0]']         
 Lambda)                                                                                          
                                                                                                  
 tf.random.normal_3 (TFOpLa  (None, 100, 64)              0         ['tf.compat.v1.shape_3[0][

In [10]:
model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=1e-4), metrics=['acc'])

history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=10, batch_size=32, verbose=2)
print(history.history)

Epoch 1/10
782/782 - 86s - loss: 0.6959 - acc: 0.5033 - val_loss: 0.6919 - val_acc: 0.5229 - 86s/epoch - 110ms/step
Epoch 2/10
782/782 - 42s - loss: 0.6833 - acc: 0.5524 - val_loss: 0.6641 - val_acc: 0.5930 - 42s/epoch - 54ms/step
Epoch 3/10
782/782 - 40s - loss: 0.6418 - acc: 0.6302 - val_loss: 0.6302 - val_acc: 0.6510 - 40s/epoch - 51ms/step
Epoch 4/10
782/782 - 39s - loss: 0.6041 - acc: 0.6715 - val_loss: 0.6000 - val_acc: 0.6752 - 39s/epoch - 49ms/step
Epoch 5/10
782/782 - 37s - loss: 0.5733 - acc: 0.7004 - val_loss: 0.5780 - val_acc: 0.6989 - 37s/epoch - 47ms/step
Epoch 6/10
782/782 - 35s - loss: 0.5530 - acc: 0.7170 - val_loss: 0.5577 - val_acc: 0.7166 - 35s/epoch - 45ms/step
Epoch 7/10
782/782 - 39s - loss: 0.5292 - acc: 0.7360 - val_loss: 0.5378 - val_acc: 0.7278 - 39s/epoch - 50ms/step
Epoch 8/10
782/782 - 38s - loss: 0.5121 - acc: 0.7455 - val_loss: 0.5191 - val_acc: 0.7414 - 38s/epoch - 49ms/step
Epoch 9/10
782/782 - 36s - loss: 0.4933 - acc: 0.7602 - val_loss: 0.5086 - val_

In [12]:
from sre_constants import MAX_REPEAT
text_sample = ["I loved movie", "Tis movie sucks, It's so boring."]
max_len=100
tok = keras.datasets.imdb.get_word_index()
text_sequence = [[tok[word] if word in tok else 0 for word in sample.split()] for sample in text_sample]
text_sequence = keras.preprocessing.sequence.pad_sequences(text_sequence, maxlen=max_len)
# print(text_sequence)

pred = model.predict(text_sequence)

b_pred = (pred > 0.5).astype(int)

for i, samp in enumerate(text_sample):
  print(f"sample : {samp}")
  print(f"predicted : {'Positive' if b_pred[i] == 1 else 'Negative'}")
  print(f"confidence : {pred[i][0]:.3f}\n")

sample : I loved movie
predicted : Positive
confidence : 0.725

sample : Tis movie sucks, It's so boring.
predicted : Positive
confidence : 0.866

