In [19]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import time

# Set up Chrome options
options = Options()

# Start the browser
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 10)

# Step 1: Open the BBC News homepage
driver.get("https://www.bbc.com/news")
time.sleep(5)  

# Step 2: Extract article links from homepage
articles = driver.find_elements(By.CSS_SELECTOR, 'a[href*="/news/"]')

# Filter and collect unique links
article_links = set()
for a in articles:
    link = a.get_attribute("href")
    if link and link.startswith("https://www.bbc.com/news") and "live" not in link:
        article_links.add(link)

article_links = list(article_links)
print(f"Found {len(article_links)} article links.")

news_data = []

# Step 3: Visit each article and extract details
for url in article_links[:10]:  
    try:
        driver.get(url)
        wait.until(EC.presence_of_element_located((By.TAG_NAME, "h1")))
        time.sleep(2)

        # Headline
        headline = driver.find_element(By.TAG_NAME, "h1").text.strip()

        # First paragraph as summary
        try:
            summary = driver.find_element(By.CSS_SELECTOR, 'article p').text.strip()
        except:
            summary = ""

        # Image
        try:
            image = driver.find_element(By.CSS_SELECTOR, "figure img").get_attribute("src")
        except:
            image = ""

        news_data.append({
            "Headline": headline,
            "URL": url,
            "Summary": summary,
            "Image": image
        })

    except Exception as e:
        print(f"Error processing {url}: {e}")

# Step 4: Close the browser
driver.quit()

# Step 5: Save to JSON
with open("bbc_news.json", "w", encoding="utf-8") as f:
    json.dump(news_data, f, ensure_ascii=False, indent=4)

print("News articles saved to 'bbc_news.json'")


Found 41 article links.
News articles saved to 'bbc_news.json'
