<a href="https://colab.research.google.com/github/juhumkwon/source_code/blob/main/Attention_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import tensorflow as tf
import numpy as np

# Scaled Dot-Product Attention 함수
def scaled_dot_product_attention(Q, K, V, mask=None):
    matmul_qk = tf.matmul(Q, K, transpose_b=True)  # [batch, seq_len_q, seq_len_k]

    # scale by sqrt(dk)
    dk = tf.cast(tf.shape(K)[-1], tf.float32)
    scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

    if mask is not None:
        scaled_attention_logits += (mask * -1e9)

    # softmax
    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)

    # Attention output
    output = tf.matmul(attention_weights, V)  # [batch, seq_len_q, depth_v]
    return output, attention_weights

# 예제 입력 (batch=1, seq_len=3, depth=4)
Q = tf.constant([[[1.0, 0.0, 1.0, 0.0],
                  [0.0, 2.0, 0.0, 2.0],
                  [1.0, 1.0, 1.0, 1.0]]])  # (1, 3, 4)

K = tf.constant([[[1.0, 0.0, 1.0, 0.0],
                  [0.0, 2.0, 0.0, 2.0],
                  [1.0, 1.0, 1.0, 1.0]]])  # (1, 3, 4)

V = tf.constant([[[0.1, 0.2],
                  [0.3, 0.4],
                  [0.5, 0.6]]])  # (1, 3, 2)

output, attn_weights = scaled_dot_product_attention(Q, K, V)

print("Attention Output:")
print(output.numpy())

print("\nAttention Weights:")
print(attn_weights.numpy())

Attention Output:
[[[0.3        0.40000004]
  [0.32028684 0.42028683]
  [0.3533913  0.4533913 ]]]

Attention Weights:
[[[0.42231882 0.15536241 0.42231882]
  [0.01587624 0.8668133  0.11731042]
  [0.15536241 0.42231882 0.42231882]]]
