In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

class SimpleTextClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(SimpleTextClassifier, self).__init__()
        
        # Embedding layer
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        
        # Simple fully connected layers
        self.fc1 = nn.Linear(embedding_dim, hidden_dim)
        print(self.fc1.weight.shape)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        print(self.fc2.weight.shape)

    def forward(self, x):
        # Pass input through the embedding layer
        x = self.embedding(x)

        # Average the embeddings
        x = torch.mean(x, dim=1)
        
        # Pass the embeddings through the fully connected layers
        x = torch.relu(self.fc1(x.reshape(1,100)))
        x = self.fc2(x)
        print(f"x shape: {x.shape}")

        return torch.sigmoid(x)

# Example usage
vocab_size = 10000    # Size of your vocabulary (number of unique words)
embedding_dim = 100   # Dimension of the embedding vectors
hidden_dim = 64       # Dimension of the hidden layer in the neural network
output_dim = 1        # Dimension of the output (1 for binary sentiment classification)

model = SimpleTextClassifier(vocab_size, embedding_dim, hidden_dim, output_dim)

# Dummy input representing a sequence of word indices
input_sequence = torch.tensor([0, 2, 5, 8, 9, 42, 128]).long()

# Forward pass
output = model(input_sequence.unsqueeze(0))

print("Sentiment score:", output.item())


torch.Size([64, 100])
torch.Size([1, 64])
x shape: torch.Size([1, 1])
Sentiment score: 0.4810532331466675


In [3]:
embedding = model.embedding

In [9]:
embedding(torch.tensor([0, 2, 5, 8, 9, 42, 128]).long()).shape

torch.Size([7, 100])

In [31]:
embedding.weight[0,0].element_size() * 8

32