In [None]:
# --- INSTALL FinBERT and dependencies ---
!pip install transformers torch pandas openpyxl tqdm

# --- IMPORT LIBRARIES ---
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn import functional as F
from tqdm import tqdm

# --- LOAD YOUR SCRAPED ARTICLES ---
scraped = pd.read_excel('/content/scraped_articles.xlsx')

print(f"✅ Loaded {len(scraped)} articles")
display(scraped.head())

# --- LOAD FinBERT MODEL ---
print("✅ Loading FinBERT model...")
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

# --- RUN FinBERT ON EACH ARTICLE ---
sentiment_scores = []
positive_probs = []
neutral_probs = []
negative_probs = []

print("✅ Running FinBERT sentiment analysis on your articles...")
for text in tqdm(scraped['Article_Text']):
    if pd.isnull(text):  # Handle empty articles
        sentiment_scores.append(0)
        positive_probs.append(0)
        neutral_probs.append(0)
        negative_probs.append(0)
        continue

    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    outputs = model(**inputs)
    probs = F.softmax(outputs.logits, dim=-1).detach().numpy()[0]

    positive_probs.append(probs[0])  # Positive class
    negative_probs.append(probs[1])  # Negative class
    neutral_probs.append(probs[2])   # Neutral class

    # Calculate sentiment_score
    sentiment_score = probs[0] - probs[1]  # Positive - Negative
    sentiment_scores.append(sentiment_score)

# --- ADD RESULTS BACK TO DATAFRAME ---
scraped['positive_prob'] = positive_probs
scraped['negative_prob'] = negative_probs
scraped['neutral_prob'] = neutral_probs
scraped['sentiment_score'] = sentiment_scores

# --- BUILD FINAL OUTPUT ---
# Assuming 'Publish date' is your article date
final_sentiment = scraped[['Publish date', 'sentiment_score']].copy()
final_sentiment = final_sentiment.rename(columns={'Publish date': 'date'})
final_sentiment['date'] = pd.to_datetime(final_sentiment['date'])

# --- SAVE FINAL FILE ---
final_sentiment.to_csv('/content/final_sentiment_data.csv', index=False)

print("\n✅ Final sentiment file created successfully!")
display(final_sentiment.head())

✅ Loaded 908 articles


Unnamed: 0,Unique ID (URI),URL,Publish date,Publish time,Sentiment,Source URI,Source title,Language,Article title,Article body (partial),Article_Text
0,8614999469,https://siouxcityjournal.com/news/nation-world...,2025-03-31,23:57:51,0.090196,siouxcityjournal.com,Sioux City Journal,eng,Democratic base's anger puts some party leader...,PHOENIX -- The Democratic base is angry. Not j...,The Social Security Administration has updated...
1,8615000847,https://wcfcourier.com/news/nation-world/gover...,2025-03-31,23:57:49,-0.105882,wcfcourier.com,Waterloo Cedar Falls Courier,eng,Democratic base's anger puts some party leader...,PHOENIX -- The Democratic base is angry. Not j...,The Social Security Administration has updated...
2,8614999307,https://magicvalley.com/news/nation-world/gove...,2025-03-31,23:53:41,-0.121569,magicvalley.com,Magic Valley,eng,Democratic base's anger puts some party leader...,PHOENIX -- The Democratic base is angry. Not j...,The Social Security Administration has updated...
3,8614997839,https://newsadvance.com/news/nation-world/gove...,2025-03-31,23:50:45,0.058824,newsadvance.com,NewsAdvance.com,eng,Democratic base's anger puts some party leader...,PHOENIX -- The Democratic base is angry. Not j...,The Social Security Administration has updated...
4,8614996738,https://www.nwitimes.com/news/nation-world/gov...,2025-03-31,23:49:38,0.058824,nwitimes.com,nwi.com,eng,Democratic base's anger puts some party leader...,PHOENIX -- The Democratic base is angry. Not j...,The Social Security Administration has updated...


✅ Loading FinBERT model...
✅ Running FinBERT sentiment analysis on your articles...


100%|██████████| 908/908 [19:56<00:00,  1.32s/it]


✅ Final sentiment file created successfully!





Unnamed: 0,date,sentiment_score
0,2025-03-31,-0.907437
1,2025-03-31,-0.907437
2,2025-03-31,-0.907437
3,2025-03-31,-0.907437
4,2025-03-31,-0.907437
