In [2]:
import torch
torch.__version__

'2.7.1+cpu'

### Self Attention
- 

In [30]:
from typing_extensions import Annotated
import torch.nn as nn
import math

class SelfAttention(nn.Module):
    def __init__(self,
    embed_dimension: Annotated[int, "Dimension of embeddings for each word"],
    uses_bias: Annotated[bool, "Requires bias are not"] = True) -> None:
        print("Self Attention under construction...")
        super().__init__()
        self.embed_dimension = embed_dimension
        self.uses_bias = uses_bias
        self.w_q = nn.Linear(self.embed_dimension, self.embed_dimension, bias=uses_bias)
        self.w_k = nn.Linear(self.embed_dimension, self.embed_dimension, bias=uses_bias)
        self.w_v = nn.Linear(self.embed_dimension, self.embed_dimension, bias=uses_bias)

        
        

    def forward(self, sentence_sequences_embeddings:Annotated[torch.Tensor, "Batch of vectors represent each & every words of a sentence"]) -> torch.Tensor:
        q = self.w_q(sentence_sequences_embeddings)
        k = self.w_k(sentence_sequences_embeddings)
        v = self.w_v(sentence_sequences_embeddings)

        #(5, 6) (6,6) = (5, 6) is the dimension of q, k, v for example
        attention_scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.embed_dimension)
        normalized_attention_scores = torch.softmax(attention_scores, dim=-1)
        new_embedings = normalized_attention_scores @ v 
        return new_embedings






In [31]:
sa = SelfAttention(6)

Self Attention under construction...


In [32]:
import torch

# Define the parameters
batch_size = 2        # Number of sentences
seq_len = 3           # Number of tokens per sentence
embed_dim = 4         # Embedding dimension (must match model)

# Create dummy data: shape = (batch_size, seq_len, embed_dim)
dummy_input = torch.randn(batch_size, seq_len, embed_dim)

# Instantiate your SelfAttention class
attention_layer = SelfAttention(embed_dimension=embed_dim)

# Pass the dummy input through the attention layer
output = attention_layer(dummy_input)

# Print output
print("Input shape :", dummy_input.shape)     # Should be (2, 3, 4)
print("Output shape:", output.shape)          # Should be (2, 3, 4)
print("Output tensor:")
print(output)


Self Attention under construction...
Input shape : torch.Size([2, 3, 4])
Output shape: torch.Size([2, 3, 4])
Output tensor:
tensor([[[ 0.2281,  0.3294,  0.5354,  0.5081],
         [ 0.2245,  0.3259,  0.5349,  0.5073],
         [ 0.2182,  0.3278,  0.5342,  0.4850]],

        [[-0.0706,  0.0859,  0.4647,  0.5463],
         [-0.1122,  0.0315,  0.4701,  0.4877],
         [ 0.1383,  0.0632,  0.3782,  0.8658]]], grad_fn=<UnsafeViewBackward0>)


In [35]:
q = torch.Tensor([[
  [[1, 0, 1, 0],   # Word 1
    [0, 1, 0, 1],  # Word 2
    [1, 1, 1, 1]], # Word 3

[[1, 0, 1, 0],   # Word 1
    [0, 1, 0, 1],  # Word 2
    [1, 1, 1, 1]] 
]]) 
q.shape

torch.Size([1, 2, 3, 4])

In [None]:
torch.tensor([
    [[1,2,3,4,5], [2,3,4,5,6,]],
    [[1,2,3,4,5], [2,3,4,5,6,]],
    [[1,2,3,4,5], [2,3,4,5,6,]]
]
)
# Shape = (1, 3,2 ,)

torch.Size([3, 2, 5])

In [48]:
y = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
y.view( 2, 2, -1)

tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])

In [49]:
data = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
data.view(-1, 2)

tensor([[ 1,  2],
        [ 3,  4],
        [ 5,  6],
        [ 7,  8],
        [ 9, 10],
        [11, 12]])

In [50]:
y = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
y.view(-1)

tensor([1, 2, 3, 4, 5, 6, 7, 8])

In [51]:
y.view(2, -1)

tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])

In [52]:
y.view(-1, 2)

tensor([[1, 2],
        [3, 4],
        [5, 6],
        [7, 8]])

In [60]:
y.view(4,1, -1)

tensor([[[1, 2]],

        [[3, 4]],

        [[5, 6]],

        [[7, 8]]])

In [59]:
# (4, 1, -1)
torch.tensor([
    [[1, 2]],
    [[3, 4]],
    [[5, 6]],
    [[7, 8]]

])

tensor([[[1, 2]],

        [[3, 4]],

        [[5, 6]],

        [[7, 8]]])

In [65]:
import torch

x = torch.tensor([1, 2, 3, 4])


In [70]:
x.unsqueeze(0).shape

torch.Size([1, 4])

In [72]:
x.unsqueeze(1).shape

torch.Size([4, 1])

In [None]:
y = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) #(2, 2, 2)

y.shape

torch.Size([2, 2, 2])

In [76]:
y.unsqueeze(0).shape

torch.Size([1, 2, 2, 2])

In [81]:
y.unsqueeze(0)

tensor([[[[1, 2],
          [3, 4]],

         [[5, 6],
          [7, 8]]]])

In [82]:
y.unsqueeze(1)

tensor([[[[1, 2],
          [3, 4]]],


        [[[5, 6],
          [7, 8]]]])

In [87]:
x = torch.tensor([[[1, 2, 3, 4]]])
x.squeeze(0).squeeze(0)

tensor([1, 2, 3, 4])

In [88]:
x = torch.arange(4 * 2 * 5).reshape(4, 2, 5)
print(x)
print("Shape:", x.shape)

tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9]],

        [[10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29]],

        [[30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]]])
Shape: torch.Size([4, 2, 5])


In [2]:
import torch
torch.arange(12).unsqueeze(1)

tensor([[ 0],
        [ 1],
        [ 2],
        [ 3],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [ 8],
        [ 9],
        [10],
        [11]])