In [None]:
!pip install vader

In [None]:
import pandas as pd
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline



In [None]:
df = pd.read_csv("final_articles.csv")

In [None]:
text_column = [col for col in df.columns if "newspaper_text" in col.lower()]
if not text_column:
    raise ValueError("No 'newspaper_text' column found in CSV.")
text_column = text_column[0]

texts = df[text_column].fillna("").astype(str)



In [None]:
polarity = []
subjectivity = []
tb_sentiment = []

for text in texts:
    tb = TextBlob(text)
    polarity.append(tb.sentiment.polarity)
    subjectivity.append(tb.sentiment.subjectivity)
    
    if tb.sentiment.polarity > 0:
        tb_sentiment.append("POSITIVE")
    elif tb.sentiment.polarity < 0:
        tb_sentiment.append("NEGATIVE")
    else:
        tb_sentiment.append("NEUTRAL")



In [None]:
df['tb_polarity'] = polarity
df['tb_subjectivity'] = subjectivity
df['tb_sentiment'] = tb_sentiment

vader_analyzer = SentimentIntensityAnalyzer()
vader_scores = []
vader_sentiment = []



In [None]:
for text in texts:
    score = vader_analyzer.polarity_scores(text)
    vader_scores.append(score)
    if score['compound'] >= 0.05:
        vader_sentiment.append("POSITIVE")
    elif score['compound'] <= -0.05:
        vader_sentiment.append("NEGATIVE")
    else:
        vader_sentiment.append("NEUTRAL")

df['vader_scores'] = vader_scores
df['vader_sentiment'] = vader_sentiment




In [None]:
hf_model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"

hf_pipeline = pipeline("sentiment-analysis",
                       model=hf_model_name,
                       tokenizer=hf_model_name,
                       truncation=True, max_length=512)

hf_labels = []
hf_scores = []

batch_size = 16
for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size].tolist()
    results = hf_pipeline(batch_texts)
    for r in results:
        best = max(r, key=lambda x: x['score'])
        hf_labels.append(best['label'])
        hf_scores.append(best['score'])

df['hf_sentiment_label'] = hf_labels
df['hf_sentiment_score'] = hf_scores

df.to_csv("final_articles_sentiment.csv", index=False)

df.head()
