# Wikipedia Web Scraping using Selenium + BeautifulSoup

This notebook demonstrates a hybrid approach:
- Selenium for page loading
- BeautifulSoup for parsing
- Pandas for data storage

In [None]:

# Install required libraries (run once)
# !pip install selenium beautifulsoup4 pandas


In [None]:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time


In [None]:

# Open Wikipedia page using Selenium
driver = webdriver.Chrome()
driver.get("https://en.wikipedia.org/wiki/List_of_largest_companies_by_revenue")
driver.maximize_window()

WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CLASS_NAME, "wikitable"))
)

time.sleep(2)


In [None]:

# Pass Selenium page source to BeautifulSoup
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")


In [None]:

# Scrape table data
table = soup.find("table", class_="wikitable")
rows = table.find_all("tr")

companies = []

for row in rows[1:]:
    cols = row.find_all("td")
    if len(cols) >= 4:
        companies.append({
            "Rank": cols[0].text.strip(),
            "Company": cols[1].text.strip(),
            "Revenue (USD million)": cols[2].text.strip(),
            "Country": cols[3].text.strip()
        })


In [None]:

# Close browser
driver.quit()


In [None]:

# Create DataFrame
df = pd.DataFrame(companies)
df.head()


In [None]:

# Save to CSV
df.to_csv("wikipedia_companies.csv", index=False)
print("CSV file saved successfully")
