<a href="https://colab.research.google.com/github/juhumkwon/source_code/blob/main/Transformer_%EC%98%88%EC%A0%9C.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import tensorflow as tf
from tensorflow.keras.layers import Embedding, Dense, LayerNormalization, MultiHeadAttention
import numpy as np

# **데이터 준비**
def tokenize_sentence(sentence, max_length=10):
    tokens = [ord(c) for c in sentence.ljust(max_length)]
    return np.array(tokens, dtype=np.int32)

source_sentences = ["I am a student", "Hello world"]
target_sentences = ["Je suis étudiant", "Bonjour le monde"]

src_data = np.array([tokenize_sentence(s) for s in source_sentences])
tgt_data = np.array([tokenize_sentence(s) for s in target_sentences])

# **트랜스포머 레이어 정의**
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim):
        super(TransformerBlock, self).__init__()
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"),
            Dense(embed_dim)
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)

    def call(self, x, training):
        attn_output = self.attention(x, x)
        out1 = self.layernorm1(x + attn_output)
        ffn_output = self.ffn(out1)
        return self.layernorm2(out1 + ffn_output)

# **트랜스포머 모델**
class Transformer(tf.keras.Model):
    def __init__(self, vocab_size, embed_dim=16, num_heads=2, ff_dim=64):
        super(Transformer, self).__init__()
        self.embedding = Embedding(vocab_size, embed_dim)
        self.encoder = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.decoder = TransformerBlock(embed_dim, num_heads, ff_dim)
        self.final_layer = Dense(vocab_size)

    def call(self, src, tgt):
        src = self.embedding(src)
        src = self.encoder(src)

        tgt = self.embedding(tgt)
        tgt = self.decoder(tgt)

        return self.final_layer(tgt)

# **모델 초기화**
vocab_size = 128  # ASCII 문자 개수
model = Transformer(vocab_size)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

# **모델 학습 (Training)**
batch_size = 2
epochs = 5
for epoch in range(epochs):
    with tf.GradientTape() as tape:
        output = model(src_data, tgt_data[:, :-1])
        loss = loss_fn(tgt_data[:, 1:], output)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    print(f"Epoch {epoch+1}: Loss = {loss.numpy():.4f}")

# **추론 함수 (Inference)**
def translate(model, sentence, max_length=10):
    src = tokenize_sentence(sentence, max_length).reshape(1, -1)
    src_emb = model.embedding(src)
    src_enc = model.encoder(src_emb)

    tgt_tokens = [tokenize_sentence("<sos>", max_length)[0]]
    for _ in range(max_length):
        tgt = np.array(tgt_tokens).reshape(1, -1)
        tgt_emb = model.embedding(tgt)
        tgt_dec = model.decoder(tgt_emb)
        output = model.final_layer(tgt_dec)
        next_token = np.argmax(output[:, -1, :])
        if next_token == tokenize_sentence("<eos>", max_length)[0]:
            break
        tgt_tokens.append(next_token)

    return "".join([chr(i) for i in tgt_tokens])

# **번역 실행**
sentence = "I am a student"
translation = translate(model, sentence)
print("Translated:", translation)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.