In [31]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import random
import time

# --- User-Agent and headers setup ---
USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",

]
user_agent = random.choice(USER_AGENTS)

# --- Configure headless Chrome ---
options = Options()
#options.add_argument("--headless")  # Run without UI
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument(f"user-agent={user_agent}")
options.add_argument("window-size=1920,1080")

# --- Launch browser ---
driver = webdriver.Chrome(options=options)

try:
    url = "https://finance.yahoo.com/quote/AAPL/news/"
    driver.get(url)

    wait = WebDriverWait(driver, 10)

    # --- Accept cookie banner ---
    try:
        cookie_button = wait.until(EC.element_to_be_clickable((By.NAME, "agree")))
        cookie_button.click()
        print("✅ Cookie banner accepted")
        time.sleep(1)
    except:
        print("⚠️ No cookie banner appeared")

    # --- Scroll to load more content ---
    scroll_pause = 2
    last_height = driver.execute_script("return document.body.scrollHeight")

    for _ in range(100):  # Scroll 10 times, or adjust as needed
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(scroll_pause)

        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break  # Stop scrolling if no more content loads
        last_height = new_height

    print("✅ Finished scrolling")

    # --- Parse content ---
    soup = BeautifulSoup(driver.page_source, "html.parser")
    articles = soup.find_all("div", class_="content yf-1y7058a")

    for article in articles:
        link = article.find("a", class_="subtle-link")
        if link:
            href = link.get("href")
            title = link.get("title") or link.get_text(strip=True)
            print(f"Title: {title}")
            print(f"Link: {href}")
            print("-" * 40)

finally:
    driver.quit()

print(f"Number of articles: {len(articles)}")

✅ Cookie banner accepted
✅ Finished scrolling
Title: Google stock sinks on report Apple plans to integrate AI search into Safari browser
Link: https://finance.yahoo.com/news/google-stock-sinks-on-report-apple-plans-to-integrate-ai-search-into-safari-browser-155545131.html
----------------------------------------
Title: Was Jim Cramer Right About Apple Inc. (AAPL)?
Link: https://finance.yahoo.com/news/jim-cramer-apple-inc-aapl-203343749.html
----------------------------------------
Title: Fed defies Trump as it declines to lower rates
Link: https://finance.yahoo.com/news/fed-defies-trump-declines-lower-200721666.html
----------------------------------------
Title: AI Is Hitting Search Traffic, Testimony Suggests. Google Stock Falls.
Link: https://finance.yahoo.com/m/516c513e-bcfe-3adc-ac94-5c8120f32c7a/ai-is-hitting-search-traffic-.html
----------------------------------------
Title: Alphabet Shares Tumble on Report Apple Could Drop Google for AI Search
Link: https://finance.yahoo.com/n