In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import TransformerEncoder, TransformerEncoderLayer

# Creating a transformer model

At PyBooks, the recommendation engine you're working on needs more refined capabilities to understand the sentiments of user reviews. You believe that using transformers, a state-of-the-art architecture, can help achieve this. You decide to build a transformer model that can encode the sentiments in the reviews to kickstart the project.

The input data contains sentences such as : "I love this product", "This is terrible", "Could be better" … and their respective binary sentiment labels such as : 1, 0, 0, ...

The input data is split and converted to embeddings in the following variables: train_sentences, train_labels ,test_sentences,test_labels,token_embeddings

* Initialize the transformer encoder.
* Define the fully connected layer based on the number of sentiment classes.
* In the forward method, pass the input through the transformer encoder followed by the linear layer.

In [18]:
sentences = ["I love this product", "This is terrible", "Could be better", "This is the best",
             "Absolutely fantastic", "Not worth the money", "I am very satisfied", "Disappointed",
             "Exceeded my expectations", "Will not buy again", "Highly recommended", "Terrible experience",
             "Pretty good", "Not as described", "Works as expected", "Very bad", "Best purchase ever",
             "Not happy", "Awesome", "Regret buying this", "Satisfactory", "Amazing quality", "Bad quality",
             "Very useful", "Useless", "Happy with my purchase", "Total waste of money", "Excellent",
             "Would buy again", "Cheap and unreliable", "Great value", "Poor performance", "Love it",
             "Would not recommend", "Decent", "Waste of time", "Superb", "Not worth it", "I am impressed",
             "Very disappointing", "Good value for money", "Horrible", "Pleasantly surprised", "Awful",
             "Satisfied", "Worst purchase", "Great product", "Not as expected", "Top quality", "Terrible service",
             "Happy with it"]

labels = [1, 0, 0, 1, 
          1, 0, 1, 0, 
          1, 0, 1, 0, 
          1, 0, 1, 0, 
          1, 0, 1, 0, 
          1, 1, 0, 1, 
          0, 1, 0, 1, 
          1, 0, 1, 0, 
          1, 0, 1, 0, 
          1, 1, 0, 1, 
          0, 1, 0, 1, 
          1, 0, 1, 0, 
          1, 0, 1, 0, 
          1, 0, 1, 0, 
          1, 1]

train_sentences = sentences[:40]
train_labels = labels[:40]
test_sentences = sentences[40:]
test_labels = labels[40:]

# Example token embeddings with a size of 512
embedding_dim = 512
token_embeddings = {word: torch.randn(embedding_dim) for word in {
    "I", "love", "this", "product", "is", "terrible", "Could", "be", "better", 
    "the", "best", "Absolutely", "fantastic", "Not", "worth", "money", "am", 
    "very", "satisfied", "Disappointed", "Exceeded", "my", "expectations", 
    "Will", "not", "buy", "again", "Highly", "recommended", "experience", 
    "Pretty", "good", "as", "described", "Works", "expected", "bad", "Best", 
    "purchase", "ever", "happy", "Awesome", "Regret", "buying", "Satisfactory", 
    "Amazing", "quality", "Bad", "Very", "useful", "Useless", "Happy", "with", 
    "Total", "waste", "Excellent", "Would", "Cheap", "and", "unreliable", 
    "Great", "value", "Poor", "performance", "Love", "it", "recommend", "Decent", 
    "Waste", "time", "Superb", "worth", "impressed", "disappointing", "Good", 
    "for", "Horrible", "Pleasantly", "surprised", "Awful", "Satisfied", "Worst", 
    "service", "Top"
}}

In [24]:
# Ensure all tokens in sentences are in token_embeddings
all_tokens = set(token for sentence in sentences for token in sentence.split())
embedding_dim = 512

# Update token_embeddings to include all tokens
for token in all_tokens:
    if token not in token_embeddings:
        token_embeddings[token] = torch.randn(embedding_dim)

In [19]:
class TransformerEncoder(nn.Module):
    def __init__(self, embed_size, heads, num_layers, dropout):
        super(TransformerEncoder, self).__init__()
        # Initialize the encoder 
        self.encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=embed_size, nhead=heads),
            num_layers=num_layers)
        # Define the fully connected layer
        self.fc = nn.Linear(embed_size, 2)

    def forward(self, x):
        # Pass the input through the transformer encoder 
        x = self.encoder(x)
        x = x.mean(dim=1) 
        return self.fc(x)

model = TransformerEncoder(embed_size=512, heads=8, num_layers=3, dropout=0.5)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

You've successfully created a Transformer model for sentiment analysis. With this architecture, you can encode and understand the nuances of reviews more effectively. Let's move on to training this model.

In [20]:
train_sentences = ["I love this product", "This is terrible", "Could be better", "This is the best",
                   "Absolutely fantastic", "Not worth the money", "I am very satisfied", "Disappointed",
                   "Exceeded my expectations", "Will not buy again", "Highly recommended", "Terrible experience",
                   "Pretty good", "Not as described", "Works as expected", "Very bad", "Best purchase ever",
                   "Not happy", "Awesome", "Regret buying this", "Satisfactory", "Amazing quality", "Bad quality",
                   "Very useful", "Useless", "Happy with my purchase", "Total waste of money", "Excellent",
                   "Would buy again", "Cheap and unreliable", "Great value", "Poor performance", "Love it",
                   "Would not recommend", "Decent", "Waste of time", "Superb", "Not worth it", "I am impressed",
                   "Very disappointing", "Good value for money"]
train_labels = [1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0,
                1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]

# Training and testing the Transformer 

With the TransformerEncoder model in place, the next step at PyBooks is to train the model on sample reviews and evaluate its performance. Training on these sample reviews will help PyBooks understand the sentiment trends in their vast repository. By achieving a well-performing model, PyBooks can then automate sentiment analysis, ensuring readers get insightful recommendations and feedback.


The model instance of the TransformerEncoder class, token_embeddings, and the train_sentences, train_labels ,test_sentences,test_labels are preloaded for you.

* In the training loop, split the sentences into tokens and stack the embeddings.
* Zero the gradients and perform a backward pass.
* In the predict function, deactivate the gradient computations then get the sentiment prediction.

In [28]:
# Training loop
for epoch in range(5):
    for sentence, label in zip(train_sentences, train_labels):
        tokens = sentence.split()
        data = torch.stack([token_embeddings[token] for token in tokens], dim=0).unsqueeze(0)
        output = model(data)
        loss = criterion(output, torch.tensor([label]))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
     
    print(f"Epoch {epoch} , Loss: {loss.item()}")

Epoch 0 , Loss: 0.5028645396232605
Epoch 1 , Loss: 0.7015641927719116
Epoch 2 , Loss: 0.8257874250411987
Epoch 3 , Loss: 1.4521418809890747
Epoch 4 , Loss: 1.272108793258667


In [37]:
# Define accuracy calculation function
def calculate_accuracy(predictions, labels):
    preds = torch.argmax(predictions, dim=1)
    correct = (preds == labels).float()
    return correct.sum() / len(correct)

# Training loop
for epoch in range(10):  # Increase the number of epochs
    model.train()
    epoch_loss = 0
    epoch_acc = 0
    for sentence, label in zip(train_sentences, train_labels):
        tokens = sentence.split()
        data = torch.stack([token_embeddings.get(token, torch.rand(512)) for token in tokens], dim=0).unsqueeze(0)
        output = model(data)
        loss = criterion(output, torch.tensor([label]))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += calculate_accuracy(output, torch.tensor([label])).item()
    print(f"Epoch {epoch}, Loss: {epoch_loss / len(train_sentences)}, Accuracy: {epoch_acc / len(train_sentences)}")

Epoch 0, Loss: 0.6464293909872451, Accuracy: 0.6097560975609756
Epoch 1, Loss: 0.6323856858582031, Accuracy: 0.6097560975609756
Epoch 2, Loss: 0.6254274499852482, Accuracy: 0.6097560975609756
Epoch 3, Loss: 0.6380398166252346, Accuracy: 0.6097560975609756
Epoch 4, Loss: 0.6281913168183187, Accuracy: 0.6097560975609756
Epoch 5, Loss: 0.6316367558589796, Accuracy: 0.6341463414634146
Epoch 6, Loss: 0.6271933113656393, Accuracy: 0.6097560975609756
Epoch 7, Loss: 0.6261856995704698, Accuracy: 0.6341463414634146
Epoch 8, Loss: 0.6340920427223531, Accuracy: 0.6341463414634146
Epoch 9, Loss: 0.6232872532635201, Accuracy: 0.6097560975609756


In [39]:
def predict(sentence):
    model.eval()
    with torch.no_grad():
        tokens = sentence.split()
        # Ensure each token embedding has the correct shape
        data = torch.stack([token_embeddings.get(token, torch.rand(512)) for token in tokens], dim=0).unsqueeze(0)
        output = model(data)
        predicted = torch.argmax(output, dim=1)
        return "Positive" if predicted.item() == 1 else "Negative"

sample_sentence = "This product is  good"
print(f"'{sample_sentence}' is {predict(sample_sentence)}")


'This product is  good' is Positive
