<a href="https://colab.research.google.com/github/juhumkwon/source_code/blob/main/Transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dense, LayerNormalization, MultiHeadAttention
import numpy as np

# **데이터 준비**
def tokenize_sentence(sentence, max_length=10):
    # Create a vocabulary of unique characters
    vocabulary = set(" ".join(source_sentences + target_sentences))
    # Create a mapping from character to index
    char_to_index = {char: index for index, char in enumerate(vocabulary)}
    # Tokenize the sentence using the character to index mapping
    tokens = [char_to_index.get(char, 0) for char in sentence.ljust(max_length)]  # Use 0 for unknown characters
    return np.array(tokens, dtype=np.int32)

source_sentences = ["I am a student", "Hello world"]
target_sentences = ["Je suis étudiant", "Bonjour le monde"]

# Calculate the maximum length of all sentences
max_length = max(len(s) for s in source_sentences + target_sentences)

src_data = np.array([tokenize_sentence(s, max_length=max_length) for s in source_sentences])
tgt_data = np.array([tokenize_sentence(s, max_length=max_length) for s in target_sentences])


# **트랜스포머 레이어 정의**
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super(TransformerBlock, self).__init__()
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim)
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)

    def call(self, x, training):
        attn_output = self.attention(x, x)
        out1 = self.layernorm1(x + attn_output)
        ffn_output = self.ffn(out1)
        return self.layernorm2(out1 + ffn_output)

# **트랜스포머 모델**
class Transformer(tf.keras.Model):
    def __init__(self, vocab_size, embed_dim=16, num_heads=2, ff_dim=64):
        super(Transformer, self).__init__()
        self.embedding = Embedding(vocab_size, embed_dim)
        self.encoder = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.decoder = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.final_layer = Dense(vocab_size)

    def call(self, src, tgt, training=False): # Added training argument with default False
        src = self.embedding(src)
        src = self.encoder(src, training=training) # Pass training to encoder

        tgt = self.embedding(tgt)
        tgt = self.decoder(tgt, training=training) # Pass training to decoder

        return self.final_layer(tgt)

# **모델 초기화**
vocab_size = 128  # ASCII 문자 개수
model = Transformer(vocab_size)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

# **모델 학습 (Training)**
batch_size = 2
epochs = 1000
for epoch in range(epochs):
    with tf.GradientTape() as tape:
        output = model(src_data, tgt_data[:, :-1])
        loss = loss_fn(tgt_data[:, 1:], output)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    print(f"Epoch {epoch+1}: Loss = {loss.numpy():.4f}")

# **추론 함수 (Inference)**
def translate(model, sentence, max_length=10):
    src = tokenize_sentence(sentence, max_length).reshape(1, -1)
    src_emb = model.embedding(src)
    src_enc = model.encoder(src_emb, training=False) # Pass training=False to encoder

    tgt_tokens = [tokenize_sentence("<sos>", max_length)[0]]
    for _ in range(max_length):
        tgt = np.array(tgt_tokens).reshape(1, -1)
        tgt_emb = model.embedding(tgt)
        tgt_dec = model.decoder(tgt_emb, training=False) # Pass training=False to decoder
        output = model.final_layer(tgt_dec)
        next_token = np.argmax(output[:, -1, :])
        if next_token == tokenize_sentence("<eos>", max_length)[0]:
            break
        tgt_tokens.append(next_token)

    return "".join([chr(i) for i in tgt_tokens])


# **번역 실행**
sentence = "I am a student"
translation = translate(model, sentence)
print("Translated:", translation)




Epoch 1: Loss = 4.9523
Epoch 2: Loss = 4.8139
Epoch 3: Loss = 4.6896
Epoch 4: Loss = 4.5804
Epoch 5: Loss = 4.4868
Epoch 6: Loss = 4.4068
Epoch 7: Loss = 4.3384
Epoch 8: Loss = 4.2780
Epoch 9: Loss = 4.2234
Epoch 10: Loss = 4.1737
Epoch 11: Loss = 4.1273
Epoch 12: Loss = 4.0830
Epoch 13: Loss = 4.0405
Epoch 14: Loss = 3.9987
Epoch 15: Loss = 3.9575
Epoch 16: Loss = 3.9167
Epoch 17: Loss = 3.8762
Epoch 18: Loss = 3.8360
Epoch 19: Loss = 3.7961
Epoch 20: Loss = 3.7571
Epoch 21: Loss = 3.7188
Epoch 22: Loss = 3.6813
Epoch 23: Loss = 3.6448
Epoch 24: Loss = 3.6091
Epoch 25: Loss = 3.5740
Epoch 26: Loss = 3.5393
Epoch 27: Loss = 3.5051
Epoch 28: Loss = 3.4713
Epoch 29: Loss = 3.4384
Epoch 30: Loss = 3.4064
Epoch 31: Loss = 3.3753
Epoch 32: Loss = 3.3452
Epoch 33: Loss = 3.3157
Epoch 34: Loss = 3.2866
Epoch 35: Loss = 3.2578
Epoch 36: Loss = 3.2291
Epoch 37: Loss = 3.2005
Epoch 38: Loss = 3.1721
Epoch 39: Loss = 3.1440
Epoch 40: Loss = 3.1163
Epoch 41: Loss = 3.0891
Epoch 42: Loss = 3.0623
E