<a href="https://colab.research.google.com/github/rafiqulcse/Natural-Language-Processing-Project/blob/main/Self_Attension.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Google Colab Link: https://colab.research.google.com/drive/18OJ6eX5sS9EBO9tVAIMkjk6geQYxsnXs?usp=sharing

In [None]:
import numpy as np
from scipy.special import softmax

In [None]:
def scaled_dot_product_attention(Q, K, V, mask=None):
    """
    Scaled Dot-Product Attention.

    Args:
        Q (np.ndarray): Query matrix of shape (batch_size, seq_length_q, d_model).
        K (np.ndarray): Key matrix of shape (batch_size, seq_length_k, d_model).
        V (np.ndarray): Value matrix of shape (batch_size, seq_length_v, d_model).
        mask (np.ndarray or None): Mask to indicate which positions should be masked (e.g., padding tokens).

    Returns:
        output (np.ndarray): Output of scaled dot-product attention.
        attention_weights (np.ndarray): Attention weights.
    """
    # Compute the dot product of Q and K.
    matmul_qk = np.matmul(Q, K.transpose(0, 2, 1))

    # Scale the dot product to avoid overly large gradients.
    d_k = K.shape[-1]
    scaled_attention_logits = matmul_qk / np.sqrt(d_k)

    # Apply the mask to the scaled attention logits if provided.
    if mask is not None:
        scaled_attention_logits += (mask * -1e9)

    # Compute the attention weights using softmax.
    attention_weights = softmax(scaled_attention_logits, axis=-1)

    # Use the attention weights to weight the V matrix.
    output = np.matmul(attention_weights, V)

    return output, attention_weights

In [None]:
# Example usage:
# Assume Q, K, and V are random matrices.
batch_size, seq_length, d_model = 2, 4, 64
Q = np.random.rand(batch_size, seq_length, d_model)
K = np.random.rand(batch_size, seq_length, d_model)
V = np.random.rand(batch_size, seq_length, d_model)

In [None]:
# Calculate self-attention
output, attention_weights = scaled_dot_product_attention(Q, K, V)
print("Output shape:", output.shape)
print("Attention weights shape:", attention_weights.shape)

Output shape: (2, 4, 64)
Attention weights shape: (2, 4, 4)
