<a href="https://colab.research.google.com/github/mervegb/deep-learning/blob/main/bidirectional_rnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

In [92]:
vocab = {'hello': 0, 'world': 1, 'good': 2, 'bad': 3, 'day': 4}
vocab_size = len(vocab)
print(vocab_size)

5


In [93]:
# Sentences: ['hello world', 'good day', 'bad day']
X_data = [[0, 1], [2, 4], [3, 4], [0, 3], [2, 3], [0, 1], [2, 4]]
y_data = [1, 1, 0, 0, 0, 1, 1]  # Labels: [Positive, Positive, Negative]

In [94]:
#Convert to pytorch tensors
X_data = torch.tensor(X_data, dtype=torch.long)
y_data = torch.tensor(y_data, dtype=torch.long)

print(X_data.shape)
print(y_data.shape)

torch.Size([7, 2])
torch.Size([7])


In [106]:
#Define model
class BiLSTMClassifier(nn.Module):
  def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
    super(BiLSTMClassifier, self).__init__()

    #convert each word in to a fixed-size vector numbers,making it easier for the model to understand and process text
    self.embedding = nn.Embedding(vocab_size, embed_dim)

    #defines lstm layer for sequence processing
    # bidirectional, so it will have two hidden layers for each time step (one for the forward pass, one for the backward pass)
    self.lstm = nn.LSTM(embed_dim, hidden_dim, bidirectional=True)

    #maps the output of the LSTM to the output classes
    #since lstm is bidirectional, the dimensionality of the hidden state for each time step will be hidden_dim *2
    self.hidden2out = nn.Linear(hidden_dim * 2, output_dim)

    #converts it to log-probabilities
    #useful when you are dealing with classification task
    self.softmax = nn.LogSoftmax(dim=1)


  def forward(self, x):
    print("Input shape:", x.shape)
    embeds = self.embedding(x)
    print("Embeds shape:", embeds.shape)

    # Permute the tensor dimensions to match LSTM's expected input
    embeds = embeds.permute(1, 0, 2)

    lstm_out, _ = self.lstm(embeds)
    print("LSTM output shape:", lstm_out.shape)

    # Average along the sequence dimension
    lstm_out = torch.mean(lstm_out, dim=0)
    print("Averaged LSTM output shape:", lstm_out.shape)

    output = self.hidden2out(lstm_out)
    output = self.softmax(output)
    print("Final output shape:", output.shape)

    return output



# Embedding Layer
Imagine you're trying to teach a child different shapes like squares, circles, and triangles. Using one-hot encoding would be like showing the child every possible variation of a square, circle, or triangle you can think of. It's inefficient and overwhelming.

On the other hand, word embeddings would be to teaching the child the 'concept' of a square, circle, or triangle. Once the child understands the concept, they can easily identify these shapes in different sizes or orientations. In a similar way, word embeddings capture the essence or 'concept' of a word, making it easier for machine learning models to understand text.

In [107]:
embed_dim = 5
hidden_dim = 6
output_dim = 2

model = BiLSTMClassifier(vocab_size, embed_dim, hidden_dim,output_dim)
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [110]:
#Training Loop
epochs = 100

for epoch in range(epochs):
  optimizer.zero_grad()

  output = model(X_data)

  loss = loss_function(output, y_data)
  loss.backward()

  optimizer.step()

  if (epoch + 1) % 10 == 0:
    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

Input shape: torch.Size([7, 2])
Embeds shape: torch.Size([7, 2, 5])
LSTM output shape: torch.Size([2, 7, 12])
Averaged LSTM output shape: torch.Size([7, 12])
Final output shape: torch.Size([7, 2])
Input shape: torch.Size([7, 2])
Embeds shape: torch.Size([7, 2, 5])
LSTM output shape: torch.Size([2, 7, 12])
Averaged LSTM output shape: torch.Size([7, 12])
Final output shape: torch.Size([7, 2])
Input shape: torch.Size([7, 2])
Embeds shape: torch.Size([7, 2, 5])
LSTM output shape: torch.Size([2, 7, 12])
Averaged LSTM output shape: torch.Size([7, 12])
Final output shape: torch.Size([7, 2])
Input shape: torch.Size([7, 2])
Embeds shape: torch.Size([7, 2, 5])
LSTM output shape: torch.Size([2, 7, 12])
Averaged LSTM output shape: torch.Size([7, 12])
Final output shape: torch.Size([7, 2])
Input shape: torch.Size([7, 2])
Embeds shape: torch.Size([7, 2, 5])
LSTM output shape: torch.Size([2, 7, 12])
Averaged LSTM output shape: torch.Size([7, 12])
Final output shape: torch.Size([7, 2])
Input shape: to

In [111]:
# Evaluation
X_eval = [[0, 3], [4, 1]]  # Corresponding to ['hello bad', 'good bad', 'day world']
y_eval = [0,  1]  # Labels: [Negative, Negative, Positive]

X_eval = torch.tensor(X_eval, dtype=torch.long)
y_eval = torch.tensor(y_eval, dtype=torch.long)

def evaluate_model(model, X_data, y_data):
    with torch.no_grad():  # No need to calculate gradients for evaluation
        model.eval()  # Set the model to evaluation mode

        # Forward pass
        output = model(X_data)

        # Calculate loss
        loss = loss_function(output, y_data)

        # Get predictions
        _, predicted = torch.max(output, 1)

        # Calculate accuracy
        correct = (predicted == y_data).sum().item()
        total = y_data.size(0)
        accuracy = correct / total * 100

        print(f"Loss: {loss.item()}")
        print(f"Accuracy: {accuracy}%")

# Evaluate the model on training data (just to compare)
print("Evaluation on training data:")
evaluate_model(model, X_data, y_data)

# Evaluate the model on evaluation data
print("\nEvaluation on evaluation data:")
evaluate_model(model, X_eval, y_eval)

Evaluation on training data:
Input shape: torch.Size([7, 2])
Embeds shape: torch.Size([7, 2, 5])
LSTM output shape: torch.Size([2, 7, 12])
Averaged LSTM output shape: torch.Size([7, 12])
Final output shape: torch.Size([7, 2])
Loss: 0.24712011218070984
Accuracy: 100.0%

Evaluation on evaluation data:
Input shape: torch.Size([2, 2])
Embeds shape: torch.Size([2, 2, 5])
LSTM output shape: torch.Size([2, 2, 12])
Averaged LSTM output shape: torch.Size([2, 12])
Final output shape: torch.Size([2, 2])
Loss: 0.17033034563064575
Accuracy: 100.0%
