In [1]:
import torch
import torch.nn as nn
import math

In [2]:
class InputEmbedding(nn.Module):
    def __init__(self, vocab_size, d_model):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.d_model = d_model

    def forward(self, x): # x: [batch_size, seq_len]
        return self.embedding(x) * math.sqrt(self.d_model) # [batch_size, seq_len, d_model]

In [14]:
# Suppose we have a vocabulary size of 5000 words and we want our embeddings to be of size 300
vocab_size = 5000
d_model = 300

# Create an instance of our InputEmbedding class
input_embedding = InputEmbedding(vocab_size, d_model)

# Suppose we have the following batch of 2 sequences (mini-batch size of 2)
# Each sequence has 4 words (sequence length of 4)
# The numbers represent the index of the word in the vocabulary
sequences = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])

# Pass our sequences through the input embedding
embedded_sequences = input_embedding(sequences)

print(embedded_sequences.shape)  # Should output: torch.Size([2, 4, 300])

torch.Size([2, 4, 300])


In [15]:
sequences

tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])

In [16]:
sequences.shape

torch.Size([2, 4])