In [9]:
# 🗞️ Stock News Sentiment Analysis
# Phase 2 of Outlier Detection Project
# Author: Rehan Chaudhry

# =============================
# 📦 1. Install dependencies
# =============================
# Run this cell only once
!pip install newsapi-python transformers torch pandas seaborn matplotlib --quiet
import pandas as pd

# =============================
# 🔑 2. Load API key for NewsAPI
# =============================
from newsapi import NewsApiClient

NEWS_API_KEY = "a2b11161eb444bfe96b3b5defccff5cf"  # 🔐 Replace with your key
newsapi = NewsApiClient(api_key=NEWS_API_KEY)

# =============================
# 🔍 3. Define tickers and search terms
# =============================
tickers = {
    "TSLA": "Tesla",
    "MSFT": "Microsoft",
    "GOOGL": "Google",
    "AAPL": "Apple",
    "AMZN": "Amazon"
}

# =============================
# 📰 4. Fetch headlines
# =============================
from datetime import datetime, timedelta

def fetch_headlines(company_name, days=7):
    from_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
    headlines = newsapi.get_everything(
        q=f"{company_name} stock",  # << make sure this is specific
        from_param=from_date,
        language="en",
        sort_by="relevancy",
        page_size=50
    )
    return [article["title"] for article in headlines["articles"]]

# Example: Fetch Tesla news
tesla_news = fetch_headlines("Tesla", days=30)
print(f"Sample Tesla headlines:\n{tesla_news[:5]}")

# =============================
# 🧠 5. Load FinBERT for sentiment
# =============================
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn.functional import softmax
import torch

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

def get_sentiment_score(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    outputs = model(**inputs)
    probs = softmax(outputs.logits, dim=1)
    labels = ["negative", "neutral", "positive"]
    return dict(zip(labels, probs[0].detach().numpy()))

# Example: Score 1 headline
print("\n🧪 Sentiment Example:")
print(tesla_news[0])
print(get_sentiment_score(tesla_news[0]))

# =============================
# 📊 6. Sentiment Summary Table
# =============================
def analyze_company_sentiment(name):
    titles = fetch_headlines(name)
    scores = [get_sentiment_score(t) for t in titles]
    df = pd.DataFrame(scores)
    df['title'] = titles
    df['company'] = name
    return df

all_sentiment = pd.concat([analyze_company_sentiment(name) for name in tickers.values()])
all_sentiment.reset_index(drop=True, inplace=True)
all_sentiment.to_csv("../data/processed/news_sentiment.csv", index=False)

print("\n✅ Sentiment analysis complete.")


Sample Tesla headlines:
['Elon Musk Has a New Plan to Win Back MAGA', 'The End of the Stock Market As We Know It', 'Stephen Miller Owns Stock in Notorious ICE Collaborator Palantir', 'Elon Musk loses $15 billion in net worth after Tesla stock sinks', "Vox populi, vox dei — Elon Musk loves polling people on X. Here's a list of polls he's done, and what happened after."]

🧪 Sentiment Example:
Elon Musk Has a New Plan to Win Back MAGA
{'negative': 0.19564614, 'neutral': 0.010223062, 'positive': 0.79413074}

✅ Sentiment analysis complete.
