##### What is this notebook about?
- This notebook has some miscellaneous items related to Neural nets:
    - Embeddings 



### Embedding layer
- Also implement it inefficiently using FC layer

In [1]:
import torch
torch.manual_seed(2024)

<torch._C.Generator at 0x783e7c16f390>

In [2]:
# Set hyperparams
vocab_size = 5
emb_dim = 3

In [3]:
# Define embedding layer
emb_layer = torch.nn.Embedding(vocab_size, emb_dim)
print(emb_layer.weight.shape, emb_layer.weight)

torch.Size([5, 3]) Parameter containing:
tensor([[-0.0404,  1.7260, -0.8140],
        [ 1.3722,  0.5060, -0.4823],
        [-0.7853,  0.6681, -0.4439],
        [ 0.1888,  0.5986,  0.6458],
        [ 0.6306, -1.4668, -0.6798]], requires_grad=True)


In [4]:
# Sample embedding for tokens 2, 1
sample_token_ids = torch.tensor([2, 1])
sample_emb = emb_layer(sample_token_ids) # --> 3rd, 2nd rows of embedding layer
print(sample_token_ids.shape, sample_token_ids)
print(sample_emb.shape, sample_emb)
print(sample_emb == emb_layer.weight[sample_token_ids])


torch.Size([2]) tensor([2, 1])
torch.Size([2, 3]) tensor([[-0.7853,  0.6681, -0.4439],
        [ 1.3722,  0.5060, -0.4823]], grad_fn=<EmbeddingBackward0>)
tensor([[True, True, True],
        [True, True, True]])


In [5]:
# Implement using FC layer

# Set hyperparams
vocab_size = 5
emb_dim = 3

# FC layer
fc_layer = torch.nn.Linear(vocab_size, emb_dim, bias=False)
print(fc_layer.weight.shape, fc_layer.weight) # (emb_dim, vocab_size)

# Set same weights as emb layer
fc_layer.weight = torch.nn.Parameter(emb_layer.weight.T)  # Cast it since FloatTensor cannot be assigned to weight parameter
print(fc_layer.weight.shape, fc_layer.weight)

# Sample embedding for tokens 2, 1
sample_token_ids = torch.tensor([2, 1])
sample_token_onehot = torch.tensor([[0, 0, 1, 0, 0], 
                                    [0, 1, 0, 0, 0]])  # (num_tokens, vocab_size)
#sample_token_onehot = torch.nn.functional.one_hot(sample_token_ids)
print(sample_token_onehot.shape, sample_token_onehot)

# sample_token_onehot @ fc_layer.weight.T => (num_tokens, emb_dim)
sample_emb_fc = fc_layer(sample_token_onehot.float())  # -> 3rd, 2nd columns in fc_layer weight
print(sample_emb_fc.shape, sample_emb_fc)

torch.Size([3, 5]) Parameter containing:
tensor([[ 0.4318,  0.1762,  0.1705, -0.0531,  0.0578],
        [-0.2760, -0.1286,  0.0560, -0.4053, -0.0430],
        [-0.0369,  0.2302,  0.4409,  0.0130, -0.0426]], requires_grad=True)
torch.Size([3, 5]) Parameter containing:
tensor([[-0.0404,  1.3722, -0.7853,  0.1888,  0.6306],
        [ 1.7260,  0.5060,  0.6681,  0.5986, -1.4668],
        [-0.8140, -0.4823, -0.4439,  0.6458, -0.6798]], requires_grad=True)
torch.Size([2, 5]) tensor([[0, 0, 1, 0, 0],
        [0, 1, 0, 0, 0]])
torch.Size([2, 3]) tensor([[-0.7853,  0.6681, -0.4439],
        [ 1.3722,  0.5060, -0.4823]], grad_fn=<MmBackward0>)


In [6]:
sample_emb_fc == sample_emb

tensor([[True, True, True],
        [True, True, True]])