<a href="https://colab.research.google.com/github/juhumkwon/Defense_Cloud/blob/main/9_1_Transformaer_Encoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# 1. 필요한 라이브러리 불러오기

import tensorflow as tf
from tensorflow.keras.layers import Dense, LayerNormalization, Dropout, Embedding
import numpy as np

In [5]:
# 2. 스케일드 닷-프로덕트 어텐션

def scaled_dot_product_attention(q, k, v, mask=None):
    matmul_qk = tf.matmul(q, k, transpose_b=True)
    d_k = tf.cast(tf.shape(k)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(d_k)

    if mask is not None:
        scaled_attention_logits += (mask * -1e9)

    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
    output = tf.matmul(attention_weights, v)
    return output, attention_weights


In [6]:
# 3. 멀티헤드 어텐션 레이어

class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super().__init__()
        assert d_model % num_heads == 0

        self.num_heads = num_heads
        self.depth = d_model // num_heads

        self.wq = Dense(d_model)
        self.wk = Dense(d_model)
        self.wv = Dense(d_model)
        self.dense = Dense(d_model)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])  # [B, heads, seq_len, depth]

    def call(self, q, k, v, mask=None):
        batch_size = tf.shape(q)[0]

        q = self.split_heads(self.wq(q), batch_size)
        k = self.split_heads(self.wk(k), batch_size)
        v = self.split_heads(self.wv(v), batch_size)

        scaled_attention, _ = scaled_dot_product_attention(q, k, v, mask)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])  # [B, seq_len, heads, depth]

        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.num_heads * self.depth))
        output = self.dense(concat_attention)
        return output


In [7]:
# 4. 포지션 와이즈 피드포워드 네트워크

def point_wise_feed_forward_network(d_model, dff):
    return tf.keras.Sequential([
        Dense(dff, activation='relu'),  # 확장
        Dense(d_model)                  # 원래 차원으로 축소
    ])


In [8]:
# 5. 인코더 레이어

class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
        super().__init__()
        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = point_wise_feed_forward_network(d_model, dff)

        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)

        self.dropout1 = Dropout(dropout_rate)
        self.dropout2 = Dropout(dropout_rate)

    def call(self, x, training, mask=None):
        attn_output = self.mha(x, x, x, mask)
        out1 = self.layernorm1(x + self.dropout1(attn_output, training=training))

        ffn_output = self.ffn(out1)
        out2 = self.layernorm2(out1 + self.dropout2(ffn_output, training=training))
        return out2


In [9]:
# 6. 포지셔널 인코딩

def positional_encoding(position, d_model):
    def get_angles(pos, i, d_model):
        angles = pos / np.power(10000, (2 * (i//2)) / np.float32(d_model))
        return angles

    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                            np.arange(d_model)[np.newaxis, :],
                            d_model)

    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])  # even
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])  # odd

    pos_encoding = angle_rads[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)


In [13]:
# 7. 전체 인코더

class Encoder(tf.keras.layers.Layer):
    def __init__(self, num_layers, d_model, num_heads, dff,
                 input_vocab_size, maximum_position_encoding, dropout_rate=0.1):
        super().__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = Embedding(input_vocab_size, d_model)
        self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)

        self.enc_layers = [EncoderLayer(d_model, num_heads, dff, dropout_rate)
                           for _ in range(num_layers)]
        self.dropout = Dropout(dropout_rate)

    def call(self, x, training, mask=None):
        seq_len = tf.shape(x)[1]
        x = self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.pos_encoding[:, :seq_len, :]

        x = self.dropout(x, training=training)

        for enc_layer in self.enc_layers:
            # Pass 'training' as a keyword argument
            x = enc_layer(x, training=training, mask=mask)

        return x  # [batch_size, input_seq_len, d_model]

In [14]:
import tensorflow as tf
import numpy as np

# ======== (1) 인코더 클래스 및 관련 함수 정의 (위 코드 사용) ========
# scaled_dot_product_attention, MultiHeadAttention, point_wise_feed_forward_network,
# EncoderLayer, positional_encoding, Encoder
# => 여기에 위에서 작성한 코드 그대로 복사해서 넣으세요.

# ======== (2) 간단한 예제 설정 ========
# 가상의 텍스트 입력 (숫자로 변환된 토큰들)
sample_input = tf.constant([[1, 2, 3, 4, 0, 0]])  # [batch_size=1, seq_len=6]

# 하이퍼파라미터 설정
num_layers = 2
d_model = 64
num_heads = 8
dff = 256
input_vocab_size = 1000
max_pos_encoding = 100
dropout_rate = 0.1

# ======== (3) 인코더 모델 생성 ========
encoder = Encoder(num_layers, d_model, num_heads, dff,
                  input_vocab_size, max_pos_encoding, dropout_rate)

# ======== (4) 출력 확인 ========
output = encoder(sample_input, training=False)

print("입력 시퀀스:")
print(sample_input.numpy())
print("\n인코더 출력 (shape: {}):".format(output.shape))
print(output.numpy())


입력 시퀀스:
[[1 2 3 4 0 0]]

인코더 출력 (shape: (1, 6, 64)):
[[[-4.76292521e-03 -1.08566022e+00  2.85200998e-02  6.39593065e-01
   -2.90596890e+00 -5.76012373e-01  7.56474257e-01  3.68133754e-01
   -1.66109371e+00  1.72644258e+00  4.97078151e-03  1.18015218e+00
    1.92148185e+00  3.89756441e-01  1.43419832e-01 -3.34532201e-01
   -1.08149743e+00  1.21578038e-01 -2.36467868e-01  1.73903227e+00
    1.34589624e+00 -5.03473461e-01 -1.82961822e+00  3.21822971e-01
    9.41980243e-01  2.44196907e-01 -4.23899770e-01  1.56017140e-01
   -3.99980664e-01 -6.05757982e-02 -4.19822156e-01  8.44729185e-01
   -1.37926832e-01 -4.01581883e-01 -1.84646398e-01  2.05500650e+00
    1.21487379e+00 -5.68237603e-01 -6.80037439e-01 -6.98643267e-01
    6.54899031e-02  1.36365438e+00 -1.12029672e+00  2.11123109e+00
   -4.97993052e-01 -6.70786798e-01 -7.34367013e-01  8.93040717e-01
   -7.96531737e-01 -9.12059128e-01 -1.55349398e+00  1.58619002e-01
   -6.14569783e-01  2.02495670e+00 -2.33574808e-01  5.68173170e-01
   -1.503

In [None]:
# 7. 전체 인코더

class Encoder(tf.keras.layers.Layer):
    def __init__(self, num_layers, d_model, num_heads, dff,
                 input_vocab_size, maximum_position_encoding, dropout_rate=0.1):
        super().__init__()

        self.d_model = d_model
        self.num_layers = num_layers

        self.embedding = Embedding(input_vocab_size, d_model)
        self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)

        self.enc_layers = [EncoderLayer(d_model, num_heads, dff, dropout_rate)
                           for _ in range(num_layers)]
        self.dropout = Dropout(dropout_rate)

    def call(self, x, training, mask=None):
        seq_len = tf.shape(x)[1]
        x = self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.pos_encoding[:, :seq_len, :]

        x = self.dropout(x, training=training)

        for enc_layer in self.enc_layers:
            # Pass 'training' as a keyword argument
            x = enc_layer(x, training=training, mask=mask)

        return x  # [batch_size, input_seq_len, d_model]