In [32]:
"""
DistilBERT: a distilled version of BERT: smaller, faster, cheaper and lighter - https://arxiv.org/abs/1910.01108
sst2: Stanford Sentiment Treebank (215,154 phrases) - https://huggingface.co/datasets/stanfordnlp/sst2
"""
try:
    import torch, transformers, datasets
except:
    %pip install transformers datasets torch

In [30]:
import torch
from transformers import AutoModelForSequenceClassification, DistilBertTokenizer


class MAI_SentimentAnalyzer:
    """
    Given a text, returns POSTIVE or NEGATIVE.
    
    Note: The probability values returned by this class may be extreme (close to 0 or 1), 
    indicating a high confidence in the predicted sentiment. This is due to the nature of the 
    fine-tuned DistilBERT model used, which has been trained on a specific dataset and may not 
    generalize well to other datasets or text styles.
    """
    def __init__(
        self, model_id="distilbert/distilbert-base-uncased-finetuned-sst-2-english"
    ):
        self.model_id = model_id
        self.model = AutoModelForSequenceClassification.from_pretrained(model_id)
        self.tokenizer = DistilBertTokenizer.from_pretrained(model_id)

    def get_prediction(self, review):
        """Given a review, return the predicted sentiment"""
        # Tokenize the review
        inputs = self.tokenizer(
            review,
            return_tensors="pt",
            max_length=512,
            padding="max_length",
            truncation=True,
        )

        # Predict
        with torch.no_grad():
            logits = self.model(**inputs).logits            
            probs = torch.softmax(logits, dim=-1)
            predicted_class_id = logits.argmax().item()
            predicted_prob = probs[0, predicted_class_id].item()
                
        # ['NEGATIVE', 'POSITIVE']
        return self.model.config.id2label[predicted_class_id], predicted_prob


model_sentiment = MAI_SentimentAnalyzer()

# Check for negative
review = "This is too long for me.."
sentiment, prob = model_sentiment.get_prediction(review)
print(f"Review: {review} --> Sentiment: {sentiment} {prob*100:.2f}")
assert sentiment == "NEGATIVE", "The prediction should be negative"

# Check for positive
review = "The movie was exciting!"
sentiment, prob = model_sentiment.get_prediction(review)
print(f"Review: {review} --> Sentiment: {sentiment} {prob*100:.2f}")
assert sentiment == "POSITIVE", "The prediction should be positive"

# Check for positive
review = "This food tastes slightly okay but would not be my favorite but not bad at all"
sentiment, prob = model_sentiment.get_prediction(review)
print(f"Review: {review} --> Sentiment: {sentiment} {prob*100:.2f}")
assert sentiment == "POSITIVE", "The prediction should be slightly positive"

Review: This is too long for me.. --> Sentiment: NEGATIVE 99.93
Review: The movie was exciting! --> Sentiment: POSITIVE 99.99
Review: This food tastes slightly okay but would not be my favorite but not bad at all --> Sentiment: POSITIVE 99.16


In [31]:
import random
from datasets import load_dataset

# Load the test split of the imdb dataset
dataset = load_dataset("imdb")["test"]

# Get the last few reviews
# reviews = dataset["text"][-10:]
# labels = dataset["label"][-10:]

# Get random reviews
random_indices = random.sample(range(len(dataset["text"])), 10)

# Select reviews and labels using the random indices
reviews = [dataset["text"][i] for i in random_indices]
labels = [dataset["label"][i] for i in random_indices]

# Predict Sentiment on above dataset
for review, label in zip(reviews, labels):
    sentiment = model_sentiment.get_prediction(review)

    print(f"Review: {review[:100]} \n... {review[-100:]}")
    print(f"Sentiment: {sentiment}\n")

Review: The aftermath of World War Two almost resulted in the death of Soviet cinema. In the early years of  
...  what the actor is unable to portray: his inner sensations. The cameraman must act with the actors."
Sentiment: ('POSITIVE', 0.9984467625617981)

Review: Give director Stanley Tong of Jackie Chan's Super Cop and Rumble in the Bronx, and what do you get?  
... and it should be recommended to young children because they will probably think that its very funny.
Sentiment: ('NEGATIVE', 0.9837911128997803)

Review: <br /><br />Well, great costumes and a wonderful `feel' for Pre WWII Italy. But what happened here?  
... young-things-in- pretty-clothes chick flick!<br /><br />"Tea With Mussolini" Gone Amuck!<br /><br />
Sentiment: ('NEGATIVE', 0.9988588094711304)

Review: One of the best war films I have ever seen, if not the best. It is very hard to talk about such film 
...  a soldier in the worst nightmare of warfare in human history and turning point of WWII: Stalingrad.
Senti