<a href="https://colab.research.google.com/github/mervegb/deep-learning/blob/main/text_classification_BoW.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim

In [55]:
vocab = {'love': 0, 'programming': 1, 'hate': 2, 'bugs': 3, "I":4}
data = [
    ('I love programming', 'positive'),
    ('I hate bugs', 'negative')
]

In [56]:
#to create a BoW vector for a given sentence and
def make_bow_vector(sentence, vocab):
    vec = torch.zeros(len(vocab))
    for word in sentence.split(' '):
        if vocab.get(word) is not None:
            vec[vocab[word]] += 1
    return vec.view(1, -1)

#map label to a number
def make_target(label):
    return torch.LongTensor([1 if label == 'positive' else 0])

In [57]:
#neural network definition
class BoWClassifier(nn.Module):
  def __init__(self, vocab_size,num_labels):
    super(BoWClassifier, self).__init__()
    self.linear = nn.Linear(vocab_size, num_labels)

  def forward(self,x):
    return self.linear(x)

In [58]:
model = BoWClassifier(len(vocab), 2) #Initialize model

loss_function = nn.CrossEntropyLoss() #Initialize loss

optimizer = optim.SGD(model.parameters(), lr=0.1) #Initialize optimizer

In [67]:
# Training Loop
for epoch in range(100):
    for sentence, label in data:
        model.zero_grad()

        bow_vec = make_bow_vector(sentence, vocab)
        target = make_target(label)

        output = model(bow_vec)

        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()

# After training, use the model to classify new sentences
def classify_sentence(sentence, model, vocab):
    with torch.no_grad():
        bow_vec = make_bow_vector(sentence, vocab)
        output = model(bow_vec)
        _, predicted = torch.max(output.data, 1)
        return 'positive' if predicted.item() == 1 else 'negative'

# Test the classify_sentence function
new_sentence1 = "I hate the cold"
new_sentence2 = "I love cold"

print(f"The sentence '{new_sentence1}' is {classify_sentence(new_sentence1, model, vocab)}.")
print(f"The sentence '{new_sentence2}' is {classify_sentence(new_sentence2, model, vocab)}.")

#Problems with this model is, any new words will be ignored so it only recognizes words that were in its training vocabulary
#BoW models don't consider the order of words

The sentence 'I hate the cold' is negative.
The sentence 'I love cold' is positive.
