In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

# Function to get the WebDriver
def get_driver():
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')  # Run in headless mode
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver

# Function to parse HTML and extract data
def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    table = soup.find('table', {'id': 'main-table'})  # Adjust the selector to match the table ID
    
    if not table:
        raise ValueError("Table not found on the page")
    
    data = []

    # Extract table rows
    rows = table.find('tbody').find_all('tr')
    for row in rows:
        symbol = row.find('td', {'class': 'sym svelte-eurwtr'}).find('a').text.strip()
        data.append([symbol])
    
    return data

# Function to scrape symbols from a given URL
def scrape_symbols(url):
    driver = get_driver()
    driver.get(url)
    time.sleep(3)  # Allow time for the page to load
    
    html = driver.page_source
    data = parse_html(html)
    
    # Close the driver
    driver.quit()
    
    return data

# Main function
def main():
    large_cap_url = 'https://stockanalysis.com/list/large-cap-stocks/'
    mega_cap_url = 'https://stockanalysis.com/list/mega-cap-stocks/'
    
    # Scrape symbols from both URLs
    large_cap_data = scrape_symbols(large_cap_url)
    mega_cap_data = scrape_symbols(mega_cap_url)
    
    # Combine the data with Mega Cap symbols on top
    combined_data = mega_cap_data + large_cap_data
    
    # Display the data in the notebook
    df = pd.DataFrame(combined_data, columns=['Symbol'])
    display(df)
    
  
main()

Unnamed: 0,Symbol
0,AAPL
1,NVDA
2,MSFT
3,GOOGL
4,AMZN
...,...
847,LW
848,ARMK
849,MHK
850,HII
