In [3]:
import torch
import torch.nn as nn
torch.__version__

'2.1.1+cu121'

In [8]:
# an Embedding module containing 10 tensors of size 3
embedding = nn.Embedding(10, 3)
# num_embeddings (int) – size of the dictionary of embeddings
# embedding_dim (int) – the size of each embedding vector

**weight (Tensor)**  
the learnable weights of the module of shape (num_embeddings, embedding_dim) initialized from $\mathcal{N}(0, 1)$

In [12]:
embedding.weight

Parameter containing:
tensor([[ 1.4514, -0.1529,  0.9000],
        [ 1.0890,  1.1874,  0.0591],
        [-0.3689, -1.9050,  1.8975],
        [ 0.1008, -1.9508,  0.7087],
        [-0.6662, -0.1594,  0.6227],
        [ 0.5339,  2.3852, -0.7213],
        [-1.6299,  1.2636, -0.1996],
        [ 1.0740, -0.6117,  1.4679],
        [ 0.3938, -1.2449, -0.1087],
        [-0.5494, -0.6781,  0.5931]], requires_grad=True)

In [13]:
# a batch of 2 samples of 4 indices each
input = torch.LongTensor([[1, 2, 4, 5], [4, 3, 2, 9]])
embedding(input)

tensor([[[ 1.0890,  1.1874,  0.0591],
         [-0.3689, -1.9050,  1.8975],
         [-0.6662, -0.1594,  0.6227],
         [ 0.5339,  2.3852, -0.7213]],

        [[-0.6662, -0.1594,  0.6227],
         [ 0.1008, -1.9508,  0.7087],
         [-0.3689, -1.9050,  1.8975],
         [-0.5494, -0.6781,  0.5931]]], grad_fn=<EmbeddingBackward0>)

In [14]:
# example with padding_idx
embedding = nn.Embedding(10, 3, padding_idx=0)
embedding.weight

Parameter containing:
tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.8475, -1.8647, -1.2478],
        [-0.2338,  0.3185, -1.1883],
        [-0.7653,  0.1575,  1.1594],
        [ 0.3000, -0.6165,  0.1131],
        [ 1.2430, -1.4505,  0.5493],
        [ 0.9537, -0.8129,  0.5127],
        [ 0.2310,  2.2952,  1.0959],
        [ 0.6148,  0.9893, -2.5490],
        [ 0.3897,  0.0713,  1.7416]], requires_grad=True)

In [15]:
input = torch.LongTensor([[0, 2, 0, 5]])
embedding(input)

tensor([[[ 0.0000,  0.0000,  0.0000],
         [-0.2338,  0.3185, -1.1883],
         [ 0.0000,  0.0000,  0.0000],
         [ 1.2430, -1.4505,  0.5493]]], grad_fn=<EmbeddingBackward0>)

In [6]:
# example of changing `pad` vector
padding_idx = 0
embedding = nn.Embedding(3, 3, padding_idx=padding_idx)
embedding.weight
with torch.no_grad():
    embedding.weight[padding_idx] = torch.ones(3)
embedding.weight

Parameter containing:
tensor([[ 1.0000,  1.0000,  1.0000],
        [-0.2458,  1.0273,  1.0195],
        [ 0.8632, -0.2585,  1.0999]], requires_grad=True)

## from_pretrained

In [7]:
# FloatTensor containing pretrained weights
weight = torch.FloatTensor([[1, 2.3, 3], [4, 5.1, 6.3]])
embedding = nn.Embedding.from_pretrained(weight)
# Get embeddings for index 1
input = torch.LongTensor([1])
embedding(input)

tensor([[4.0000, 5.1000, 6.3000]])