<center>

*******************************************************************************************
<font size="4">
Web Scraping &nbsp; &nbsp; <br>
Nasdaq Europe Stocks &nbsp; &nbsp; <br>
</font>

##### 14 January 2026

##### Juan Ignacio Mendoza Garay

*******************************************************************************************

</center>

INFORMATION:

* Description:

    Collect historical data from selected stocks traded at Nasdaq Europe.
>
* Tested with:

    * Python 3.11
    * Selenium 4.2
    * Firefox 146
    * Windows 11
>
* Instructions:

    Edit the values indicated with an arrow like this: <---\
    Run the program, close your eyes and hope for the best.

*******************************************************************************************


In [1]:
import os
import csv
import time
import sqlite3

import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

***
#### Set paths, URL and parameters

In [2]:
data_folder = 'stocks_data'      # <--- directory to save the files
dataset_name = 'HEL_LargeCap_5Y' # <--- name for the dataset to be created

sel_market = 'HEL'        # <--- select market
sel_segment = 'Large Cap' # <--- select segment
sel_sector = None         # <--- select sector
sel_curr = None           # <--- select currency
sel_tspan = '5Y'          # <--- select time span

max_pages = 700 # <--- maximum number of pages for the list of stocks (safe stop)
t_del = 2       # <--- delay time (seconds)
t_out = 3       # <--- time-out (seconds)

>
#### Functions

In [3]:
def get_sh_root(root, host_attr, attr='XPATH'):
    sh_host = eval(f'root.find_element(By.{attr}, host_attr)')
    sh_root = driver.execute_script('return arguments[0].shadowRoot.children', sh_host)[0]
    return sh_root

def dropdown_sel(root, dd_text, init_sel, sel_opt, n_opt):
    if not isinstance(init_sel,list): init_sel = [init_sel]
    sh_host_dd_xpath = f'//*[contains(@id,"-{dd_text}")]'
    sh_root_dd = get_sh_root(root, sh_host_dd_xpath)
    dd_box = sh_root_dd.find_element(By.XPATH,'/div/div/button')
    dd_box.click()
    for i in range (1,n_opt+1):
        opt_text = sh_root_dd.find_element(By.XPATH,f'/div/div/div/button[{i}]/span').text
        if not isinstance(sel_opt,list): sel_opt = [sel_opt]
        if opt_text in sel_opt:
            if opt_text not in init_sel:
                sh_root_dd.find_element(By.XPATH,f'/div/div/div/button[{i}]').click()
    return dd_box

>
#### Initialise web driver

In [4]:
dataset_folder = f'{data_folder}/{dataset_name}'
stocks_folder = dataset_folder + '/stocks'

options = Options()
options.add_argument('--headless')  # run in headless mode.
options.add_argument('--no-sandbox')  # bypass OS security
options.add_argument('--disable-dev-shm-usage') # overcome limited resource problems
options.set_preference("browser.download.folderList", 2) # downloads folder (1 = default, 2 = use custom)
options.set_preference("browser.download.manager.showWhenStarting", False) # display of download progress
options.set_preference("browser.download.dir",os.path.abspath(stocks_folder)) # folder to save files
options.set_preference("browser.download.useDownloadDir", True)
options.set_preference("browser.helperApps.neverAsk.saveToDisk", # auto download specifed MIME type
                       "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
options.add_argument("--start-maximized")

driver = webdriver.Firefox(options = options)

>
#### Download list of stocks

In [5]:
driver.get('https://www.nasdaq.com/european-market-activity/shares')

# ..................................................................................................
# cookies' settings:

wait = WebDriverWait(driver, t_out)
for cl in [' cookie-setting-link', 'ot-pc-refuse-all-handler']:
    try: wait.until(EC.element_to_be_clickable((By.XPATH,f'//button[@class="{cl}"]'))).click()
    except: pass # settings already saved

# ..................................................................................................

if not os.path.isdir(dataset_folder): os.makedirs(dataset_folder)
stocks_list_ffn = dataset_folder + '/stocks_list.csv'
if os.path.isfile(stocks_list_ffn):
    print('There is already a list of stocks in the data folder; no download has been done.')
else:
    # ..............................................................................................
    # get common roots:

    sh_host_1_xpath = '/html/body/div[2]/div/main/div[2]/article/div/div[2]/div[2]/div/div/div[2]/nasdaq-market-data-api-screener'
    sh_root_1 = get_sh_root(driver, sh_host_1_xpath)
    sh_host_1_1_xpath = '/div/div[1]/nasdaq-market-data-api-filters'
    sh_root_1_1 = get_sh_root(sh_root_1, sh_host_1_1_xpath)

    # ..............................................................................................
    # select options:

    dropdown_sel(sh_root_1_1, 'market', 'Nordic', sel_market, 5)
    dropdown_sel(sh_root_1_1, 'segment', 'Large Cap', sel_segment, 4)
    dd_box_sector = dropdown_sel(sh_root_1_1, 'sector', None, sel_sector, 11)
    dd_box_sector.click()
    sh_root_1_1.find_element(By.XPATH,'/div/div[2]/button[1]').click()

    # ..............................................................................................
    # get links to each stock's webpage:

    sh_host_1_2_xpath = '/div/div[2]/nasdaq-market-data-api-table-sort'
    sh_host_1_3_xpath = '/div/nasdaq-market-data-api-pagination'
    done_all = False
    done_page = False
    page = 0
    c = 0
    with open(stocks_list_ffn, 'w', newline='', encoding='utf-8') as stocks_to_csv:
        wr = csv.writer(stocks_to_csv)
        wr.writerow(['NAME','CURRENCY','URL'])
        time.sleep(t_del)
        while done_all is False:
            done_page = False
            i = 2
            sh_root_1_2 = get_sh_root(sh_root_1, sh_host_1_2_xpath)
            while done_page is False:
                try:
                    name_href = sh_root_1_2.find_element(By.XPATH,f'/div/div/div/div[{i}]/div[1]/a')
                    ccy = sh_root_1_2.find_element(By.XPATH,f'/div/div/div/div[{i}]/div[2]')
                    wr.writerow([name_href.text, ccy.text, name_href.get_attribute('href')])
                    i += 1
                    c += 1
                except:
                    done_page = True
            sh_root_1_2 = get_sh_root(sh_root_1, sh_host_1_2_xpath)
            sh_root_1_3 = get_sh_root(sh_root_1_2, sh_host_1_3_xpath)
            next_btn = sh_root_1_3.find_element(By.XPATH,'/div/div[2]/button[2]')
            safe_stop = page == max_pages
            if next_btn.get_attribute('disabled') or safe_stop:
                print(f'{c} links downloaded and saved to {stocks_list_ffn}')
                if safe_stop: print(f'(safe-stop at {page} pages)')
                done_all = True
            else:
                next_btn.click()
                time.sleep(t_del)
                sh_root_1 = get_sh_root(driver, sh_host_1_xpath)
            page += 1

37 links downloaded and saved to stocks_data/HEL_LargeCap_5Y/stocks_list.csv


>
#### Create database and table

In [6]:
connection = sqlite3.connect(f'{dataset_folder}/{dataset_name}.db')
list_table_name = 'stocks_list'
cursor = connection.cursor()
try:
    rc = cursor.execute(f'SELECT COUNT(*) FROM {list_table_name}')
    print(f'Table \'{list_table_name}\' already exists, and has {rc.fetchone()[0]} rows.')
except:
    stocks_list_df = pd.read_csv(stocks_list_ffn)
    stocks_list_df.to_sql(name=list_table_name, con=connection)
    print(f'Table \'{list_table_name}\' created.')

Table 'stocks_list' created.


>
#### Select stocks

In [7]:
if sel_curr is None:
    curr_sel_str = ''
    sel_cond = '(all)'
else:
    curr_sel_str = f'WHERE CURRENCY = "{sel_curr}"'
    sel_cond = f'of {len(stocks_list_df)}'
query = f"""
            SELECT NAME, CURRENCY, URL
            FROM {list_table_name}
            {curr_sel_str}
        """
sel_stocks_df = pd.read_sql_query(query, con=connection)
print(f'{len(sel_stocks_df)} stocks {sel_cond} selected')
sel_stocks_df.head()

37 stocks (all) selected


Unnamed: 0,NAME,CURRENCY,URL
0,Elisa Oyj,EUR,https://www.nasdaq.com/european-market-activit...
1,Fiskars Oyj Abp,EUR,https://www.nasdaq.com/european-market-activit...
2,Fortum Oyj,EUR,https://www.nasdaq.com/european-market-activit...
3,Hiab Oyj,EUR,https://www.nasdaq.com/european-market-activit...
4,Huhtamäki Oyj,EUR,https://www.nasdaq.com/european-market-activit...


>
#### Get selected stocks' data

In [8]:
verbose = True # <---

sel_cols = ['Date','Average price'] # columns to put in table for each stock
if not os.path.isdir(stocks_folder): os.mkdir(stocks_folder)
options.set_preference("browser.download.dir", stocks_folder ) # folder to save files

# Iterate stocks' pages:
sh_host_2_stem = '/html/body/div[2]/div/main/div[2]/article/div/div[2]/div/div[2]/div[2]/div[2]/nasdaq-market-data-api-chart/div[1]/nasdaq-market-data-api-'
sh_host_2_1_xpath = sh_host_2_stem + 'tabs'
sh_host_2_2_xpath = sh_host_2_stem + 'download'
L = len(sel_stocks_df)
new_tab = 0
table_names = []
for i, d in sel_stocks_df.iterrows():
    driver.get(d.URL)
    sh_host_2_1 = driver.find_element(By.XPATH,sh_host_2_1_xpath)
    if i == 0: i_ts_btn = sh_host_2_1.text.split(' ').index(sel_tspan) + 1
    sh_root_2_1 = driver.execute_script('return arguments[0].shadowRoot.children', sh_host_2_1)[0]
    sh_root_2_1.find_element(By.XPATH,f'/div/button[{i_ts_btn}]').click()
    sh_root_2_2 = get_sh_root(driver, sh_host_2_2_xpath)
    sh_root_2_2.find_element(By.XPATH,f'/div/button').click()
    time.sleep(t_del)
    csv_ffn = max([stocks_folder + '/' + f for f in os.listdir(stocks_folder)],
                       key=os.path.getctime)
    csv_fn = os.path.basename(csv_ffn)
    separator = pd.read_csv(csv_ffn,nrows = 0).columns[0].split('=')[1]
    csv_data_src = pd.read_csv( csv_ffn, skiprows = 1, sep=separator, usecols=sel_cols,
                                 decimal=".", thousands="," )
    if verbose: print(f'{i+1} of {L}: ',end='')
    try:
        cursor.execute(f'SELECT COUNT(*) FROM "{d.NAME}"')
        table_name = f'{d.NAME}_{d.CURRENCY}'
    except:
        table_name = d.NAME
    csv_data_src.to_sql(name=table_name, con=connection)
    table_names.append(table_name)
    new_tab += 1
    if verbose: print(f'table "{table_name}" created')
if verbose: print()
print(f'{new_tab} new tables for {L} selected stocks')

1 of 37: table "Elisa Oyj" created
2 of 37: table "Fiskars Oyj Abp" created
3 of 37: table "Fortum Oyj" created
4 of 37: table "Hiab Oyj" created
5 of 37: table "Huhtamäki Oyj" created
6 of 37: table "Kalmar Oyj B" created
7 of 37: table "Kemira Oyj" created
8 of 37: table "Kesko Oyj A" created
9 of 37: table "Kesko Oyj B" created
10 of 37: table "Kojamo Oyj" created
11 of 37: table "KONE Oyj" created
12 of 37: table "Konecranes Oyj" created
13 of 37: table "Mandatum" created
14 of 37: table "Metso Oyj" created
15 of 37: table "Metsä Board Oyj A" created
16 of 37: table "Metsä Board Oyj B" created
17 of 37: table "Neste Oyj" created
18 of 37: table "Nokia Oyj" created
19 of 37: table "Nokian Renkaat Oyj" created
20 of 37: table "Nordea Bank Abp" created
21 of 37: table "Orion Oyj A" created
22 of 37: table "Orion Oyj B" created
23 of 37: table "Outokumpu Oyj" created
24 of 37: table "Qt Group Oyj" created
25 of 37: table "Sampo Oyj A" created
26 of 37: table "Sanoma Oyj" created
27 of 

>
#### Collect stocks' data in one table

In [9]:
# Create new table in database, with dates and one stock's daily average prices:
table_name = table_names[0]
query = f"""
            CREATE TABLE "av_prices" AS
            SELECT
                Date, "Average price"
                AS "{table_name}"
            FROM "{table_name}"
        """
cursor.execute(query)

# Put all remaining stocks' daily average prices into table:
for i_stock in range(1,len(table_names)):
    table_name = table_names[i_stock]
    cursor.execute(f'ALTER TABLE av_prices ADD COLUMN "{table_name}"');
    query = f"""
                UPDATE av_prices
                SET "{table_name}" = (
                                        SELECT "Average price"
                                        FROM "{table_name}"
                                        WHERE Date = av_prices.Date
                                      );
            """
    cursor.execute(query)
connection.commit()

>
#### Close web driver and database

In [10]:
driver.quit()
connection.close()