## Comparing Embedding layer with dense + one-hot encoding
---
### 1. Using nn.embedding

In [1]:
import torch
import torch.nn as nn

In [24]:
temp_input = torch.tensor([3, 2, 0, 1])

num_idx = temp_input.max() + 1
out_dim = 5

In [25]:
embedding_layer = nn.Embedding(num_idx, out_dim)

In [26]:
embedding_layer.weight

Parameter containing:
tensor([[ 0.3374, -0.1778, -0.3035, -0.5880,  1.5810],
        [ 1.3010,  1.2753, -0.2010, -0.1606, -0.4015],
        [ 0.6957, -1.8061, -1.1589,  0.3255, -0.6315],
        [-2.8400, -0.7849, -1.4096, -0.4076,  0.7953]], requires_grad=True)

In [27]:
embedding_layer(torch.tensor([1]))

# Looks up the embedding for the token with index 1
# So the index 1 vector from embedding_layer.weight is returned

tensor([[ 1.3010,  1.2753, -0.2010, -0.1606, -0.4015]],
       grad_fn=<EmbeddingBackward0>)

In [32]:
embedding_layer(torch.tensor([3, 2, 0, 1]))
# Lookup table:
# tensor([[ 0.3374, -0.1778, -0.3035, -0.5880,  1.5810],
#         [ 1.3010,  1.2753, -0.2010, -0.1606, -0.4015],
#         [ 0.6957, -1.8061, -1.1589,  0.3255, -0.6315],
#         [-2.8400, -0.7849, -1.4096, -0.4076,  0.7953]], requires_grad=True)

tensor([[-2.8400, -0.7849, -1.4096, -0.4076,  0.7953],
        [ 0.6957, -1.8061, -1.1589,  0.3255, -0.6315],
        [ 0.3374, -0.1778, -0.3035, -0.5880,  1.5810],
        [ 1.3010,  1.2753, -0.2010, -0.1606, -0.4015]],
       grad_fn=<EmbeddingBackward0>)

### 2. Using nn.Linear

In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [34]:
temp_input = torch.tensor([3, 2, 0, 1])
onehot = F.one_hot(temp_input)
onehot

tensor([[0, 0, 0, 1],
        [0, 0, 1, 0],
        [1, 0, 0, 0],
        [0, 1, 0, 0]])

Next, we initialize a `Linear` layer, which carries out a matrix multiplication $X W^T$:

In [41]:
torch.manual_seed(123)

linear_layer = nn.Linear(num_idx, out_dim, bias=False)
linear_layer.weight

Parameter containing:
tensor([[-0.2039,  0.0166, -0.2483,  0.1886],
        [-0.4260,  0.3665, -0.3634, -0.3975],
        [-0.3159,  0.2264, -0.1847,  0.1871],
        [-0.4244, -0.3034, -0.1836, -0.0983],
        [-0.3814,  0.3274, -0.1179,  0.1605]], requires_grad=True)

In [42]:
linear_layer.weight = nn.Parameter(embedding_layer.weight.T)

In [43]:
linear_layer.weight

Parameter containing:
tensor([[ 0.3374,  1.3010,  0.6957, -2.8400],
        [-0.1778,  1.2753, -1.8061, -0.7849],
        [-0.3035, -0.2010, -1.1589, -1.4096],
        [-0.5880, -0.1606,  0.3255, -0.4076],
        [ 1.5810, -0.4015, -0.6315,  0.7953]], requires_grad=True)

In [46]:
linear_layer(onehot.float())

tensor([[-2.8400, -0.7849, -1.4096, -0.4076,  0.7953],
        [ 0.6957, -1.8061, -1.1589,  0.3255, -0.6315],
        [ 0.3374, -0.1778, -0.3035, -0.5880,  1.5810],
        [ 1.3010,  1.2753, -0.2010, -0.1606, -0.4015]], grad_fn=<MmBackward0>)

In [48]:
embedding_layer(temp_input)

tensor([[-2.8400, -0.7849, -1.4096, -0.4076,  0.7953],
        [ 0.6957, -1.8061, -1.1589,  0.3255, -0.6315],
        [ 0.3374, -0.1778, -0.3035, -0.5880,  1.5810],
        [ 1.3010,  1.2753, -0.2010, -0.1606, -0.4015]],
       grad_fn=<EmbeddingBackward0>)