In [1]:
# Installing dependencies
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
import csv
import urllib.request
from abc import ABC, abstractmethod
import random
import requests
import time

In [2]:
# Strategy Interface
# Define an abstract Strategy interface for sentiment analysis
class SentimentStrategy(ABC):
    @abstractmethod
    def analyze(self, text: str) -> str:
        pass # Subclasses will implement this method

In [3]:
# Local transformer model (RoBERTa - HuggingFace transformer model)
class TransformerSentimentStrategy(SentimentStrategy):
    def __init__(self, model, tokenizer, labels):
        self.model = model
        self.tokenizer = tokenizer
        self.labels = labels

    def analyze(self, text: str) -> str:
        encoded_input = self.tokenizer(text, return_tensors='pt') # Encoding the text into model input format
        output = self.model(**encoded_input) # Running inference with the model
        scores = output[0][0].detach().numpy() # Getting the raw scores and converting to numpy
        scores = softmax(scores) # Converting scores to probabilities using softmax
        ranking = np.argsort(scores) # Geting the indices of scores sorted in descending order
        ranking = ranking[::-1]
        l = labels[ranking[0]]
        s = np.round(float(scores[ranking[0]]), 4)
        return l, s # Returning label and score

In [4]:
# Simulated External API (using HuggingFace's external API for inference)
class APISentimentStrategy(SentimentStrategy):
    def __init__(self, hf_token, API_URL, headers):
        self.hf_token = hf_token
        self.API_URL = API_URL
        self.headers = headers

    def analyze(self, text: str) -> str:
        payload = dict(inputs=text, options=dict(wait_for_model=True)) # Preparing the payload to send
        response = requests.post(self.API_URL, headers=self.headers, json=payload) # Sending a POST request to the API
        sentiment_result = response.json()[0] # Parsing the JSON response
        top_sentiment = max(sentiment_result, key=lambda x: x['score']) # Getting the sentiment with the higher score
        l = top_sentiment['label']
        s = top_sentiment['score']
        return l, s  # Returning label and score

In [5]:
# Circuit breaker pattern that falls back to a secondary strategy on failure
# Main one is the External API
# Back up one is the Roberta locl model
class CircuitBreakerStrategy(SentimentStrategy):
    def __init__(self, primary_strategy, fallback_strategy, failure_threshold=1):
        self.primary = primary_strategy # Main strategy to try first
        self.fallback = fallback_strategy # Fallback if main fails
        self.failure_threshold = failure_threshold # Number of allowed failures
        self.failure_count = 0 # Counter to track failures

    def analyze(self, text: str) -> str:
        try:
            if self.failure_count >= self.failure_threshold:
                raise Exception("Circuit open") # Simulating circuit breaking
            result = self.primary.analyze(text) # Trying main strategy
            self.failure_count = 0 # Reseting failure count on success
            return result
        except:
            self.failure_count += 1
            return self.fallback.analyze(text) # Using fallback strategy

In [6]:
# Template Method - defining the high-level steps of text analysis
class SentimentAnalyzer(ABC):
    def analyze_text(self, texts):
        sentiments = [self.run_sentiment_model(text) for text in texts] # Running the sentiment model on each tweet
        return self.aggregate_results(sentiments) # Printing aggregated results

    @abstractmethod
    def run_sentiment_model(self, text): # Subclasses implement this to choose the strategy
      pass

    def aggregate_results(self, sentiments):
      results = []
      for sentiment, score in sentiments:  # Looping through results and print them
        print(f"Sentiment: {sentiment}, Score: {round(score, 4)}")
        #results.append({'sentiment': sentiment, 'score': round(score, 4)})
      #return results

In [7]:
# Final Analyzer using API with fallback to transformer
class CustomTextAnalyzer(SentimentAnalyzer):
    def __init__(self, tokenizer, model, labels, hf_token, API_URL, headers):
        self.strategy = CircuitBreakerStrategy(
            primary_strategy=APISentimentStrategy(hf_token, API_URL, headers),
            fallback_strategy=TransformerSentimentStrategy(model, tokenizer, labels)
        )

    def run_sentiment_model(self, text):
        return self.strategy.analyze(text) # Using the strategy to analyze a tweet


In [8]:
# Define the task and load the pre-trained model and tokenizer
task='sentiment'
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"  # Model name
tokenizer = AutoTokenizer.from_pretrained(MODEL)  # Loading tokenizer

# Loading sentiment labels from GitHub mapping file
labels=[]
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
    html = f.read().decode('utf-8').split("\n") # Reading and splitting by line
    csvreader = csv.reader(html, delimiter='\t')  # Parsing as TSV
labels = [row[1] for row in csvreader if len(row) > 1] # Extracting  labels

model = AutoModelForSequenceClassification.from_pretrained(MODEL) # Loading  actual sentiment classification model
model.save_pretrained(MODEL) # Optionally saving locally

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

In [None]:
# API inferencing - Setting up the HuggingFace API access
model2 = "cardiffnlp/twitter-roberta-base-sentiment-latest" # Updated version of the model
hf_token = ... # Enter your API token for huggingface
API_URL = "https://api-inference.huggingface.co/models/" + model2
headers = {"Authorization": "Bearer %s" % (hf_token)}  # Formatting the authorization header

In [10]:
# Testing on random sentences
texts = ["I love this product!", "It’s okay, not great.", "Absolutely terrible service."]
analyzer = CustomTextAnalyzer(tokenizer, model, labels, hf_token, API_URL, headers)
analyzer.analyze_text(texts) # Running sentiment analysis on the texts

Sentiment: positive, Score: 0.9848
Sentiment: negative, Score: 0.564
Sentiment: negative, Score: 0.9185


In [11]:
texts = ["I love Ariana's new song!", "It’s okay, but can be better."]
analyzer.analyze_text(texts) # Running sentiment analysis on the texts

Sentiment: positive, Score: 0.9905
Sentiment: positive, Score: 0.5517
