In [10]:
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

class LexiconSentimentAnalyzer:
    def __init__(self):
        self.vader = SentimentIntensityAnalyzer()
        # Expanded financial lexicon (>20 words)
        self.custom_lexicon = {
            # positive
            "beat": 1.0, "beats": 1.0, "surge": 0.9, "soar": 1.0, "rally": 0.9,
            "growth": 0.9, "profit": 1.0, "upgrade": 0.8, "outperform": 0.9,
            "strong": 0.8, "gain": 0.8, "bullish": 0.9,
            # negative
            "miss": -1.0, "missed": -1.0, "decline": -0.9, "drop": -0.8,
            "fall": -0.8, "weak": -0.8, "loss": -1.0, "downgrade": -0.9,
            "underperform": -0.9, "plunge": -1.0, "slump": -0.9,
            # neutral/other
            "stable": 0.0, "unchanged": 0.0, "mixed": 0.0
        }

    def vader_sentiment(self, text: str):
        compound = self.vader.polarity_scores(text)["compound"]
        label = "positive" if compound > 0.05 else "negative" if compound < -0.05 else "neutral"
        return {"compound": round(compound, 3), "label": label}

    def textblob_sentiment(self, text: str):
        polarity = TextBlob(text).sentiment.polarity  # [-1, 1]
        label = "positive" if polarity > 0.1 else "negative" if polarity < -0.1 else "neutral"
        return {"polarity": round(polarity, 3), "label": label}


    def custom_lexicon_sentiment(self, tokens):
        score = 0.0
        count = 0
        for t in tokens:
            if t in self.custom_lexicon:
                score += self.custom_lexicon[t]
                count += 1
        if count == 0:
            return 0.0
        # Normalize to -1..1 by dividing by max absolute weight (here weights already in -1..1)
        avg = score / count
        return round(avg, 3)

    def analyze(self, text: str, tokens):
        v = self.vader_sentiment(text)
        tb = self.textblob_sentiment(text)
        custom = self.custom_lexicon_sentiment(tokens)

        # Ensemble: simple average of the three reported numeric values (README example does same).
        # Note: VADER in [-1,1], TextBlob in [0,1], custom in [-1,1] â€” README averaged them directly.
        ensemble_score = round((v["compound"] + tb["polarity"] + custom) / 3.0, 3)

        # Confidence: use max absolute component (matches README example where confidence==0.82)
        confidence = max(abs(v["compound"]), abs(tb["polarity"]), abs(custom))
        confidence = round(confidence, 3)

        return {
            "text": text,
            "vader": v,
            "textblob": tb,
            "custom_score": custom,
            "ensemble_score": ensemble_score,
            "confidence": confidence
        }


In [11]:
lsa = LexiconSentimentAnalyzer()

text = "Excellent earnings beat expectations"
tokens = ["excellent", "earnings", "beat", "expectation"]

print(lsa.analyze(text, tokens))


{'text': 'Excellent earnings beat expectations', 'vader': {'compound': 0.572, 'label': 'positive'}, 'textblob': {'polarity': 1.0, 'label': 'positive'}, 'custom_score': 1.0, 'ensemble_score': 0.857, 'confidence': 1.0}
