# 0. Declare necessary importations


In [None]:
import re
import torch
import pandas as pd
from typing import Union
from transformers import AutoTokenizer, AutoModelForSequenceClassification

______________
# 1. Define Sentiment Analysis Engine

In [None]:
class SentimentAnalysisEngine:
  def __init__(self, tokenizer_pretrained: str = None, model_pretrained: str = None) -> None:
    """
    Initialize Sentiment Analysis Engine object.
    Args:
      tokenizer_pretrained: Huggingface pretrained identifier for tokenization (AutoTokenizer) object.
      model_pretrained: Huggingface pretrained identifier for model (AutoModelForSequenceClassification) object.
    """
    super().__init__()
    self.classes = ["Negative", "Neutral", "Positive"]
    self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_pretrained)
    self.model = AutoModelForSequenceClassification.from_pretrained(model_pretrained)

  def parse_df(self, df: pd.DataFrame) -> str:
    """
    Parse dataframe object into plain text.
    This function appends the keys and values together to build the plain text.
    Args:
      df: Dataframe object to be converted.
    Returns:
      A plain text string.
    """
    plain_text = ""

    # Iterate over the rows of the dataframe
    for index, row in df.iterrows():
        # Iterate over the columns to get key-value pairs
        for column, value in row.items():
          plain_text += re.sub(r'\s+', ' ', column).strip() + ": " # section
          plain_text += re.sub(r'\s+', ' ', value).strip() + "\n\n "  # content

    return plain_text

  def predict(self, data: Union[str, pd.DataFrame]) -> str:
    """
    Predict the sentiment score for the input data. The sentiment score is one of Negative, Neutral, and Positive.
    Currently, the predict supports only one data instance at a time. Future support for batch input is yet to be implemented.
    Args:
      data: Input data of either plain text or dataframe input.
    Returns:
      A string showing sentimental class of either Negative, Neutral, or Positive.
    """
    # Parse dataframe to plain text if not in plain text already
    data = self.parse_df(data) if not isinstance(data, str) else data
    # Tokenize
    tok_inp = self.tokenizer(data, return_tensors="pt")
    # Prediction
    preds = self.model(tok_inp['input_ids'])

    return self.classes[torch.argmax(preds.logits)]

# Create instance
sentiment_analyzer = SentimentAnalysisEngine(
    tokenizer_pretrained = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
    model_pretrained = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/933 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

______________
# 2. Usage


Example usage with plain text input

In [None]:
text = "This stock is going to crash very soon!"
sentiment_analyzer.predict(text)

'Negative'

Example usage with dataframe input

In [None]:
data1 = pd.DataFrame({
    'Title': ["Retail Giant Faces Declining Sales and Rising Costs Amid Economic Slowdown"],
    'Content': [
        """
        Today, one of America’s largest retail chains, RetailX, reported a sharp drop in quarterly sales as inflationary pressures and changing consumer spending habits continue to impact its bottom line. The company’s Q3 results showed a 15% decline in revenue compared to the same period last year, significantly missing Wall Street expectations.
        Executives pointed to rising costs in logistics and inventory, coupled with weaker demand across key product categories, particularly in non-essential goods. The disappointing performance has prompted RetailX to cut its full-year revenue guidance by 8%, further unsettling investors.
        The retail sector has been broadly affected by ongoing supply chain disruptions and inflation, which has driven up costs while reducing consumer purchasing power. Analysts warn that if these trends persist, RetailX and similar retailers could see further financial strain, leading to potential store closures or restructuring efforts.
        """
    ],
    'Date': ["11/11/2024"],
})
sentiment_analyzer.predict(data1)

'Negative'