<a href="https://colab.research.google.com/github/juhumkwon/source_code/blob/main/Transfomer_%ED%95%99%EC%8A%B5%EC%98%88%EC%A0%9C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import tensorflow as tf
import numpy as np

# 1. 데이터 준비
raw_data = [
    ("나는 학생이다.", "I am a student."),
    ("그는 집에 갔다.", "He went home."),
    ("오늘 날씨가 좋다.", "The weather is nice today."),
    ("그녀는 책을 읽고 있다.", "She is reading a book."),
    ("우리는 학교에 간다.", "We go to school."),
]

input_texts, target_texts = zip(*raw_data)

# 2. 토큰화
tokenizer_ko = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token='<unk>')
tokenizer_en = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token='<unk>')

tokenizer_ko.fit_on_texts(input_texts)
tokenizer_en.fit_on_texts(target_texts)

input_sequences = tokenizer_ko.texts_to_sequences(input_texts)
target_sequences = tokenizer_en.texts_to_sequences(target_texts)

max_len = max(max(len(seq) for seq in input_sequences), max(len(seq) for seq in target_sequences))
input_sequences = tf.keras.preprocessing.sequence.pad_sequences(input_sequences, maxlen=max_len, padding='post')
target_sequences = tf.keras.preprocessing.sequence.pad_sequences(target_sequences, maxlen=max_len, padding='post')

vocab_size_ko = len(tokenizer_ko.word_index) + 1
vocab_size_en = len(tokenizer_en.word_index) + 1

# 3. Transformer 모델 정의
class Transformer(tf.keras.Model):
    def __init__(self, vocab_size_ko, vocab_size_en, d_model=128, num_heads=4, dff=512):
        super(Transformer, self).__init__()
        self.embedding_ko = tf.keras.layers.Embedding(vocab_size_ko, d_model)
        self.embedding_en = tf.keras.layers.Embedding(vocab_size_en, d_model)
        self.encoder_layer = tf.keras.layers.MultiHeadAttention(num_heads, key_dim=d_model)
        self.decoder_layer = tf.keras.layers.MultiHeadAttention(num_heads, key_dim=d_model)
        self.ffn = tf.keras.Sequential([
            tf.keras.layers.Dense(dff, activation='relu'),
            tf.keras.layers.Dense(d_model)
        ])
        self.final_layer = tf.keras.layers.Dense(vocab_size_en)

    def call(self, encoder_input, decoder_input):
        enc_embed = self.embedding_ko(encoder_input)
        enc_output = self.encoder_layer(enc_embed, enc_embed, enc_embed)
        dec_embed = self.embedding_en(decoder_input)
        dec_output = self.decoder_layer(dec_embed, enc_output, enc_output)
        final_output = self.ffn(dec_output)
        return self.final_layer(final_output)

# 4. 모델 생성 및 학습 설정
transformer = Transformer(vocab_size_ko, vocab_size_en)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

input_sequences = np.array(input_sequences)
target_sequences = np.array(target_sequences)

# 5. 학습 실행
epochs = 100
for epoch in range(epochs):
    with tf.GradientTape() as tape:
        predictions = transformer(input_sequences, target_sequences[:, :-1])
        loss = loss_fn(target_sequences[:, 1:], predictions)
    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.numpy():.4f}")

# 6. 번역 함수
def translate(input_sentence):
    input_seq = tokenizer_ko.texts_to_sequences([input_sentence])
    input_seq = tf.keras.preprocessing.sequence.pad_sequences(input_seq, maxlen=max_len, padding='post')
    decoder_input = np.zeros((1, max_len))
    for i in range(max_len):
        predictions = transformer(input_seq, decoder_input)
        predicted_id = np.argmax(predictions[0, i])
        decoder_input[0, i] = predicted_id
        if predicted_id == 0:
            break
    translated_tokens = tokenizer_en.sequences_to_texts(decoder_input)
    return translated_tokens[0]

# 7. 테스트 실행
print(translate("나는 학생이다."))
print(translate("그는 집에 갔다."))

Epoch 0, Loss: 3.0445
Epoch 10, Loss: 2.6871
Epoch 20, Loss: 2.3337
Epoch 30, Loss: 1.8612
Epoch 40, Loss: 1.4572
Epoch 50, Loss: 1.3457
Epoch 60, Loss: 1.3257
Epoch 70, Loss: 1.3214
Epoch 80, Loss: 1.3192
Epoch 90, Loss: 1.3179
student. student. student. student. student.
<unk> <unk> <unk> <unk> <unk>
