In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Define the URL for Yahoo Finance news
url = "https://finance.yahoo.com/news"

# Make an HTTP request to the website
response = requests.get(url)

In [7]:
# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find all the <div> containers that hold the article headlines and links
    articles = soup.find_all('div', class_='content yf-1qcp8cc')
    
    # Extract headlines and links
    news_data = []
    for article in articles:
        # Try to extract the headline using the h3 tag
        headline_tag = article.find('h3', class_='clamp tw-line-clamp-3 sm:tw-line-clamp-2 yf-1qcp8cc')
        
        if headline_tag:
            # Extract headline text
            headline = headline_tag.text.strip()
        else:
            # Skip this article if no headline is found
            continue
        
        # Extract link from the <a> tag (if exists)
        link_tag = article.find('a', class_ = 'subtle-link fin-size-small titles noUnderline yf-1e4diqp')
        
        if link_tag and 'href' in link_tag.attrs:
            link = link_tag['href']
            # Check if the link already contains the full URL
            if not link.startswith('http'):
                link = "https://finance.yahoo.com" + link
        else:
            # If no <a> tag or href attribute is found, skip this article
            print(f"No link found for headline: {headline}")
        
        # Append the extracted data to the list
        if headline:
            news_data.append({
                "Headline": headline,
                "Link": link
            })
    
    # Convert to a Pandas DataFrame
    news_df = pd.DataFrame(news_data)

    # Display the first few rows
    print(news_df.head())

    # Save to a CSV file for later use
    news_df.to_csv('news_data.csv', index=False)

else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")

                                            Headline  \
0  Alex Jones-affiliated company challenges the O...   
1  Stellantis CEO says will adapt to U.S. market ...   
2  Thames Water Pushes Deadline in Hunt for £3.3 ...   
3  Nvidia Is Helping Google Design Quantum Comput...   
4  How Robert F. Kennedy Jr. could impact the hea...   

                                                Link  
0  https://finance.yahoo.com/news/alex-jones-affi...  
1  https://finance.yahoo.com/news/stellantis-ceo-...  
2  https://finance.yahoo.com/news/thames-water-pu...  
3  https://finance.yahoo.com/news/nvidia-helping-...  
4  https://finance.yahoo.com/news/how-robert-f-ke...  
