<a href="https://colab.research.google.com/github/shaunak-badani/Duke-Fall-24-Assignments/blob/main/520_Modeling_Processes_and_Algorithms/snippets/Embeddings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Embeddings in Pytorch are just a mapping from words to vectors.
Usually the weight matrix is of size (VOCAB_SIZE, EMBEDDING_DIMS).

You pass in indices of the words for which you want the embeddings, and the output is an embedding.

W -> (VOCAB_SIZE, EMBEDDING_DIM)

If you pass in the tensor [[0, 1]] to it, it just returns the corresponding row vectors of W.

W[0, 1] is the output

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the vocabulary and create word-to-index mapping
vocab = ["hello", "world", "good", "day", "bad", "happy", "sad", "<PAD>"]
word_to_idx = {word: idx for idx, word in enumerate(vocab)}

# Define the model
class SimpleClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(SimpleClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=word_to_idx["<PAD>"])
        self.fc1 = nn.Linear(embedding_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        embedded = self.embedding(x)
        pooled = torch.mean(embedded, dim=1)
        hidden = self.relu(self.fc1(pooled))
        output = self.fc2(hidden)
        return output

vocab_size = len(vocab)
embedding_dim = 50
hidden_dim = 20
output_dim = 2
model = SimpleClassifier(vocab_size, embedding_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [9]:
# Prepare some dummy training data
sentences = [
    ["hello", "world"],
    ["good", "day"],
    ["sad", "world"],
    ["bad", "day"]
]
labels = torch.tensor([0, 1, 0, 1])

# Convert sentences to indices
indexed_sentences = [[word_to_idx[word] for word in sentence] for sentence in sentences]
max_len = max(len(sentence) for sentence in indexed_sentences)
padded_sentences = [sentence + [word_to_idx["<PAD>"]] * (max_len - len(sentence)) for sentence in indexed_sentences]
input_tensor = torch.tensor(padded_sentences)

In [10]:

input_tensor

tensor([[0, 1],
        [2, 3],
        [6, 1],
        [4, 3]])

In [11]:
model.embedding(torch.tensor([0, 1], dtype = torch.long).view(1, -1))

tensor([[[ 8.5968e-01,  1.1785e+00, -7.8363e-01,  4.0976e-01,  1.4115e+00,
           1.7898e+00, -1.4364e+00, -1.2868e+00,  1.1944e+00, -5.4213e-01,
          -6.0180e-01,  8.7879e-01,  1.4892e+00, -4.6131e-01, -1.6923e+00,
          -7.8046e-01, -5.9111e-01,  7.4712e-01,  2.0151e+00, -1.4224e+00,
           2.9555e+00,  3.2424e-01, -3.5774e-01, -9.1922e-01, -4.2938e-01,
           1.7404e-01, -8.3192e-01,  1.5080e-03, -8.3295e-01,  1.0297e+00,
          -4.2784e-01, -8.9101e-01,  1.1736e+00, -8.4671e-01, -1.2132e+00,
          -8.9476e-01,  7.9593e-01,  3.1484e-01, -5.8347e-01, -3.5326e-01,
          -1.8668e-01,  1.9908e-01, -2.2509e-01, -3.6270e-01, -7.9253e-01,
           2.6316e-01,  9.8670e-01,  8.5062e-01, -1.2813e+00,  4.2514e-01],
         [ 1.1730e+00,  3.0943e-01, -9.0146e-01, -1.0058e+00, -1.4448e+00,
          -1.1262e+00,  1.8576e-01, -1.1980e+00,  5.9337e-01, -1.5322e-01,
          -1.3244e+00, -8.1402e-02, -9.6001e-01, -2.1825e+00, -2.5827e-01,
           4.0446e-01,  

In [12]:
model.embedding.weight[[0, 1]]

tensor([[ 8.5968e-01,  1.1785e+00, -7.8363e-01,  4.0976e-01,  1.4115e+00,
          1.7898e+00, -1.4364e+00, -1.2868e+00,  1.1944e+00, -5.4213e-01,
         -6.0180e-01,  8.7879e-01,  1.4892e+00, -4.6131e-01, -1.6923e+00,
         -7.8046e-01, -5.9111e-01,  7.4712e-01,  2.0151e+00, -1.4224e+00,
          2.9555e+00,  3.2424e-01, -3.5774e-01, -9.1922e-01, -4.2938e-01,
          1.7404e-01, -8.3192e-01,  1.5080e-03, -8.3295e-01,  1.0297e+00,
         -4.2784e-01, -8.9101e-01,  1.1736e+00, -8.4671e-01, -1.2132e+00,
         -8.9476e-01,  7.9593e-01,  3.1484e-01, -5.8347e-01, -3.5326e-01,
         -1.8668e-01,  1.9908e-01, -2.2509e-01, -3.6270e-01, -7.9253e-01,
          2.6316e-01,  9.8670e-01,  8.5062e-01, -1.2813e+00,  4.2514e-01],
        [ 1.1730e+00,  3.0943e-01, -9.0146e-01, -1.0058e+00, -1.4448e+00,
         -1.1262e+00,  1.8576e-01, -1.1980e+00,  5.9337e-01, -1.5322e-01,
         -1.3244e+00, -8.1402e-02, -9.6001e-01, -2.1825e+00, -2.5827e-01,
          4.0446e-01,  2.2148e-01, -3

In [13]:

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(input_tensor)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Test the model
test_sentence = ["happy", "world"]
test_indexed = [word_to_idx[word] for word in test_sentence]
test_padded = test_indexed + [word_to_idx["<PAD>"]] * (max_len - len(test_indexed))
test_tensor = torch.tensor([test_padded])

with torch.no_grad():
    output = model(test_tensor)
    predicted = torch.argmax(output, dim=1)
    print(f"Predicted class for 'happy world': {predicted.item()}")


Epoch [10/100], Loss: 0.5055
Epoch [20/100], Loss: 0.3706
Epoch [30/100], Loss: 0.2468
Epoch [40/100], Loss: 0.1540
Epoch [50/100], Loss: 0.0949
Epoch [60/100], Loss: 0.0595
Epoch [70/100], Loss: 0.0392
Epoch [80/100], Loss: 0.0275
Epoch [90/100], Loss: 0.0204
Epoch [100/100], Loss: 0.0159
Predicted class for 'happy world': 0


In [14]:
model.embedding.weight.shape

torch.Size([8, 50])

In [32]:
k = nn.Embedding(5, 3)

i_tensor = torch.tensor([[1, 2]], dtype = torch.long).view(1, -1)
h = k(i_tensor)
l = h.sum()

In [33]:
l.backward()

In [34]:
k.weight.grad

tensor([[0., 0., 0.],
        [1., 1., 1.],
        [1., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])