### Libraries and settings

In [1]:
import os
import io
import time
import requests
import logging
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:
logging.basicConfig(
    filename="nasdaq.log",
    level=logging.INFO,
    encoding="utf-8",
    filemode="a", # append
    format="{asctime} - {levelname} - {message}",
    style="{",
    datefmt="%Y-%m-%d %H:%M",
)

logger = logging.getLogger(__name__)

### Get all Nasdaq symbols

In [3]:
url = "https://www.nasdaqtrader.com/dynamic/symdir/nasdaqlisted.txt"

try:
    response = requests.get(url)
    data = response.text
    data = '\n'.join(data.split('\n')[:-2]) # Remove 2 last lines containing newline and file creation timestamp

    symbols = pd.read_csv(io.StringIO(data), sep='|', engine='python') # allow reading string data as CSV file with custom delimeter
    
    symbol_names = symbols['Symbol'].tolist() # only keep symbol names
except Exception as e:
    print("Error getting request body: " + str(e))

### Set up download directory

In [12]:
download_directory = os.path.abspath("./Data/Nasdaq/")

if not os.path.exists(download_directory):
    os.makedirs(download_directory)
    print(f"Directory created: {download_directory}")
else:
    print(f"Directory already exists: {download_directory}")

### Set up WebDriver and Options

In [13]:
driver_path = './Drivers/chromedriver-win64/chromedriver.exe'

options = Options()
options.add_experimental_option('prefs', {
    "download.default_directory": os.path.abspath(download_directory),
    "download.prompt_for_download": False,  # Disable download prompt
    "download.directory_upgrade": True,     # Automatically overwrite existing files
    "safebrowsing.enabled": True,           # Enable safe browsing
})
#options.add_argument('--headless') # headless mode to reduce resource usage
options.add_argument('--disable-search-engine-choice-screen') # disable seach engine selection screen with Chrome driver v127
#options.add_argument('--start-maximized')  # Start maximised for headless


# Set service for executable path
service = Service(executable_path=driver_path)
driver = webdriver.Chrome(service=service, options=options)

### Use Selenium to interact with Yahoo Finance

In [None]:
base_url ="https://finance.yahoo.com/quote/{}/history"

for symbol in symbol_names:
    try: 
        url = base_url.format(symbol)
        driver.get(url)

        try:
            reject_gdpr_btn = driver.find_element(By.CLASS_NAME, 'reject-all') # find GDPR reject
            reject_gdpr_btn.click()
        except NoSuchElementException:
            print("GDPR rejection button not found, continuing without action.")
        
        wait = WebDriverWait(driver, 60) # wait up to 60 seconds
        
        # Date filter
        filter_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@data-ylk='elmt:menu;itc:1;elm:input;sec:qsp-historical;slk:date-select;subsec:calendar']")))
        filter_btn.click()

        # Max period
        max_filter_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[@value='MAX']")))
        max_filter_btn.click()
        
        # Donwload CSV button
        download_btn = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[@data-testid='download-link']")))
        download_btn.click()
        
        logging.info(f"File for {symbol=} was downloaded.")

        time.sleep(5) # sleep 5s before next symbol
    except TimeoutException as e:
        print(f"Timed out waiting for page elements:\n{e}")
        print(f"Symbol {symbol} was not found.")
        logging.info(f"WARNING: File for {symbol=} was NOT FOUND.")
    except Exception as e:
        print(f"An error occurred:\n{e}")
        
driver.quit()