# Nasdaq Scraping Code

#### Data Wrangling Project | Matt Lancaster, Advika Shah | 5/3/25

In [1]:
# Download all required packages
import pandas as pd
from selenium import webdriver  
from selenium.webdriver.chrome.service import Service  
from selenium.webdriver.common.by import By  
from selenium.webdriver.chrome.options import Options  
from webdriver_manager.chrome import ChromeDriverManager  
import time  
import random

In [2]:
# Set up ChromeDriver using webdriver_manager
chrome_options = Options()  # Initialize Chrome options (optional)
service = Service(ChromeDriverManager().install())  # Install and set up ChromeDriver as a service

# Create a ChromeDriver instance
browser = webdriver.Chrome()#(service=service, options=chrome_options)

# Open the website
url = 'https://www.nasdaq.com/market-activity/stocks/screener?page=1&rows_per_page=25'
browser.get(url)

# Maximize the browser window for better visibility
browser.maximize_window()

# Wait a random time between 3-7 seconds before starting
time.sleep(random.uniform(3, 7))

In [3]:
# Function to scroll down the page randomly
def random_scroll(browser, total_wait_time=5):
    # Get the total height of the page
    total_height = browser.execute_script("return document.body.scrollHeight")
    
    # Number of steps to scroll (adjust this number for finer control)
    scroll_steps = random.randint(3, 7)
    
    # Height to scroll per step
    scroll_increment = total_height // scroll_steps
    
    # Time to wait per step
    time_per_step = total_wait_time / scroll_steps
    
    # Scroll in steps
    for step in range(scroll_steps):
        # Scroll down
        browser.execute_script(f"window.scrollBy(0, {scroll_increment});")
        # Wait for a random time between each scroll
        time.sleep(random.uniform(0.5 * time_per_step, 1.5 * time_per_step))
    
    # Ensure we scroll to the bottom of the page
    browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")

In [5]:
# scrape the page
browser = webdriver.Chrome()
browser.maximize_window()

symbol = []
name = []
last_sale = []
net_change = []
pct_change = []
market_cap = []

page = 0

while page < 278:
    page += 1
    url = "https://www.nasdaq.com/market-activity/stocks/screener?page=" + str(page) + "&rows_per_page=25"
    browser.get(url)
    time.sleep(random.uniform(1, 3))
    ul = browser.find_element(By.CLASS_NAME, "jupiter22-c-symbol-screener-table__body")
    rows  = ul.find_elements(By.TAG_NAME,'tr')
    for row in rows[1:]:
        cells = row.find_elements(By.TAG_NAME, "td")
        #now using indexing to add values to the list. 
        symbol.append(cells[0].text.strip())
        name.append(cells[1].text.strip())
        last_sale.append(cells[2].text.strip())
        net_change.append(cells[3].text.strip())
        pct_change.append(cells[4].text.strip())
        market_cap.append(cells[5].text.strip())

In [6]:
# create nasdaq_df using the scraped data in the lists
nasdaq_df = pd.DataFrame({ 
    'Symbol' : symbol,
    'Name' : name,
    'Last Sale' : last_sale,
    'Net Change' : net_change,
    'Percent Change': pct_change,
    'Market Cap': market_cap 
})
display(nasdaq_df)

Unnamed: 0,Symbol,Name,Last Sale,Net Change,Percent Change,Market Cap
0,MSFT,Microsoft Corporation Common Stock,$425.40,30.14,7.625%,3162416042769
1,NVDA,NVIDIA Corporation Common Stock,$111.61,2.69,2.47%,2723284000000
2,AMZN,"Amazon.com, Inc. Common Stock",$190.20,5.78,3.134%,2015688122750
3,GOOG,Alphabet Inc. Class C Capital Stock,$162.79,1.90,1.181%,1975456650000
4,GOOGL,Alphabet Inc. Class A Common Stock,$161.30,2.50,1.574%,1957375500000
...,...,...,...,...,...,...
6556,XFLT^A,XAI Octagon Floating Rate & Alternative Income...,$25.17,0.01,0.04%,
6557,YCBD^A,"cbdMD, Inc. 8.0% Series A Cumulative Convertib...",$1.67,0.12,7.742%,
6558,YHNA,YHN Acquisition I Limited Ordinary Shares,$10.24,UNCH,--,
6559,YHNAR,YHN Acquisition I Limited Right,$0.1329,-0.0071,-5.071%,


In [7]:
# rename Symbol column to Ticker
nasdaq_df = nasdaq_df.rename(columns = {'Symbol': 'Ticker'})

# reformat "^" in the tickers to "-"
nasdaq_df['Ticker'] = (nasdaq_df['Ticker'].str.replace('^', '-'))

display(nasdaq_df)

Unnamed: 0,Ticker,Name,Last Sale,Net Change,Percent Change,Market Cap
0,MSFT,Microsoft Corporation Common Stock,$425.40,30.14,7.625%,3162416042769
1,NVDA,NVIDIA Corporation Common Stock,$111.61,2.69,2.47%,2723284000000
2,AMZN,"Amazon.com, Inc. Common Stock",$190.20,5.78,3.134%,2015688122750
3,GOOG,Alphabet Inc. Class C Capital Stock,$162.79,1.90,1.181%,1975456650000
4,GOOGL,Alphabet Inc. Class A Common Stock,$161.30,2.50,1.574%,1957375500000
...,...,...,...,...,...,...
6556,XFLT-A,XAI Octagon Floating Rate & Alternative Income...,$25.17,0.01,0.04%,
6557,YCBD-A,"cbdMD, Inc. 8.0% Series A Cumulative Convertib...",$1.67,0.12,7.742%,
6558,YHNA,YHN Acquisition I Limited Ordinary Shares,$10.24,UNCH,--,
6559,YHNAR,YHN Acquisition I Limited Right,$0.1329,-0.0071,-5.071%,


In [8]:
# save nasdaq_df as a csv (nasdaq_final.csv)
nasdaq_df.to_csv('nasdaq_final.csv', encoding = 'utf-8')