In [None]:
import requests
import csv
from bs4 import BeautifulSoup

# Read stock symbols from stocks.txt
with open("stocks.txt", "r") as file:
    stock_symbols = [line.strip() for line in file.readlines()]

# Define headers to mimic a real browser
headers = {
    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0"
}

# Excluded categories
exclude_keywords = ["Life", "Entertainment", "Finance", "Sports", "New on Yahoo"]

# Prepare list for storing news
all_news = []

for ticker in stock_symbols:
    url = f"https://finance.yahoo.com/quote/{ticker}/news/"
    print(f"🔍 Fetching news for {ticker} from {url}")

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, "html.parser")
        
        # Find all news headlines
        headlines = soup.find_all("h3")

        for h in headlines:
            headline_text = h.get_text(strip=True)

            # Filter out unwanted categories
            if not any(keyword in headline_text for keyword in exclude_keywords):
                all_news.append([ticker, headline_text])

    else:
        print(f" Failed to fetch  {ticker}. Status code:", response.status_code)

# Save to CSV
filename = "news.csv"
with open(filename, "w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Ticker", "Headline"])  # Write header
    writer.writerows(all_news)

print(f" All stock news saved in {filename}")


🔍 Fetching news for AAPL from https://finance.yahoo.com/quote/AAPL/news/
🔍 Fetching news for MSFT from https://finance.yahoo.com/quote/MSFT/news/
🔍 Fetching news for GOOGL from https://finance.yahoo.com/quote/GOOGL/news/
🔍 Fetching news for ^GSPC from https://finance.yahoo.com/quote/^GSPC/news/
✅ All stock news saved in news.csv


In [None]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Step 1: Read the scraped news from news.csv
df = pd.read_csv("news.csv")

# Step 2: Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Step 3: Analyze sentiment and classify
for index, row in df.iterrows():
    headline = str(row["Headline"])  # Convert to string to avoid errors
    
    if headline.strip() == "nan":  # Skip if headline is NaN or empty
        continue  

    sentiment_score = analyzer.polarity_scores(headline)["compound"]
    sentiment_label = (
        "Positive" if sentiment_score > 0 else 
        "Negative" if sentiment_score < 0 else 
        "Neutral"
    )
    
    # Print the results
    print(f" Headline: {headline}")
    print(f" Sentiment Score: {sentiment_score:.3f}")
    print(f"🔹 Sentiment: {sentiment_label}")
    print("-" * 60)  # Separator for readability


📰 Headline: News
📊 Sentiment Score: 0.000
🔹 Sentiment: Neutral
------------------------------------------------------------
📰 Headline: Why Mag 7 is a 'boy band' that needs to 'breakup'
📊 Sentiment Score: 0.000
🔹 Sentiment: Neutral
------------------------------------------------------------
📰 Headline: Magnificent Seven Stocks: Amazon, Apple, Meta, Nvidia, Tesla Sell Off
📊 Sentiment Score: 0.681
🔹 Sentiment: Positive
------------------------------------------------------------
📰 Headline: Feeling Ripped Off by $1,000 Phones? The Secondhand Market Is Taking Off
📊 Sentiment Score: 0.128
🔹 Sentiment: Positive
------------------------------------------------------------
📰 Headline: Powerbeats Pro 2 Review: Best Workout Earbuds Still Lag on Audio Quality
📊 Sentiment Score: 0.421
🔹 Sentiment: Positive
------------------------------------------------------------
📰 Headline: Apple Inc. (AAPL)  Stock Under Pressure – UBS Warns of Slowing Services Growth
📊 Sentiment Score: 0.000
🔹 Sentiment: Ne