# ETF Classification Tracker for etf.com

In [None]:
# ⚠️ Disclaimer:
# This script scrapes public ETF data from ETF.com.
# For academic/research use only. Please respect ETF.com's robots.txt and terms of service.

In [21]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd


def get_etf_methodology_fields(etf_symbol):
    url = f"https://www.etf.com/{etf_symbol.lower()}"

    options = webdriver.ChromeOptions()
    # options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.get(url)

    fields = [
        "Index Selection Methodology",
        "Index Weighting Methodology",
        "Index Tracked"
    ]

    results = {"Ticker": etf_symbol}

    try:
        for field in fields:
            try:
                # Wait for lable
                label = WebDriverWait(driver, 15).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(text(), '{field}')]"))
                )

                # make sure find element after loading web
                value_div = WebDriverWait(driver, 15).until(
                    lambda d: label.find_element(By.XPATH, "following-sibling::div[1]") if
                    label.find_element(By.XPATH, "following-sibling::div[1]").text.strip().lower() != "loading..." else False
                )

                results[field] = value_div.text.strip()
            except Exception:
                results[field] = "Not found"

    except Exception as e:
        print(f"Error: {e}")

    driver.quit()
    return results


In [22]:
# Input tickers
etf_list = ["QQQ", "ARKK", "JEPI", "SPY"]
# Run the program
all_results = [get_etf_methodology_fields(symbol) for symbol in etf_list]
df = pd.DataFrame(all_results)
print(df)

  Ticker Index Selection Methodology Index Weighting Methodology  \
0    QQQ             NASDAQ - Listed                  Market Cap   
1   ARKK                 Proprietary                 Proprietary   
2   JEPI                 Proprietary                 Proprietary   
3    SPY                   Committee                  Market Cap   

         Index Tracked  
0     NASDAQ 100 Index  
1  No Underlying Index  
2  No Underlying Index  
3              S&P 500  


In [23]:
# Save as Excel file
df.to_excel("etf_methodology_results.xlsx", index=False)
print("Saved as etf_methodology_results.xlsx")

Saved as etf_methodology_results.xlsx
