In [94]:
#!apt update
#!apt install chromium-chromedriver
! pip install selenium

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait

import pandas as pd



In [104]:

def extract_value_from_format(input):
    value = None    # TODO: maybe use NaN instead!?

    input = input.replace(",","") # erase every ",", they just show the thousends: 1,234. To spilt decimals, the "." is used
    if "%" in input:
        value = float(input.replace("%", ""))
    elif "M" in input:
        value = float(input.replace("M", "")) * 1000 * 1000
    elif "B" in input:
        value = float(input.replace("B", "")) * 1000 * 1000 * 1000
    elif input.replace('.','',1).isdigit():
        value = float(input)
    elif "N/A" in input:
        pass  # value = None
    else:
        print("input not preprocessed: ", input)

    return value

In [105]:
def get_key_from_metric_label(label):
    return label.replace(" ", "_")

def add_metrics_from_site(driver, metrics, metrics_by_label):
    
    if len(driver.find_elements(By.ID, 'Col1-0-KeyStatistics-Proxy')) == 0:
        print("ERROR: no key-statistics page found! Returns empty metrics")
        return metrics + [None] * (len(metrics_by_label) - 1) # return placeholders
    
    
    metrics_body = driver.find_element(By.ID, 'Col1-0-KeyStatistics-Proxy')
    
    for metric_label in metrics_by_label:
        if metric_label == Metrics.TICKER: # tickername only for dataframe-label reasons. Maybe change logic to more sothisticated list 
            continue
            
        market_cap_element = metrics_body.find_element(By.XPATH, "//tr[contains(., '"+metric_label+"')]")
        child_elements = market_cap_element.find_elements(By.XPATH, "./*")

        assert(len(child_elements) == 2)
        
        # extract values from format
        value = extract_value_from_format(child_elements[1].text) 
        metrics.append(value)
        
        # TODO!
        #if get_key_from_metric_label(child_elements[0].text) != metric_label:
        #    print("Caution: modified metric-name from website " + get_key_from_metric_label(child_elements[0].text) + " differs from the used column-key: "+ metric_label )
        
        
        #metrics[get_key_from_metric_label(child_elements[0].text)] = child_elements[1].text
        #metrics.append((child_elements[0].text, child_elements[1].text))  # TODO return metric_label value (but be aware of the contains-search!)

    return metrics
    

In [106]:
# supporting class with constants from the website (keys etc.)
class Metrics(object):
    
    # Valuation Measures
    MARKET_CAP = 'Market Cap'
    MARKET_CAP_KEY = 'Market_Cap'
    ENTERPRISE_VALUE = 'Enterprise Value'
    ENTERPRISE_VALUE_KEY = 'Enterprise_Value'
    
    
    # Profitability
    PM = "Profit Margin"
    OM = "Operating Margin"
    
    # Management Effectiveness
    ROA = "Return on Assets"
    ROE = "Return on Equity"
    
    # Balance Sheet
    TC = "Total Cash (mrq)"  # CAUTION! search only by contains, not equals!
    TCPS = "Total Cash Per Share (mrq)"
    TD = "Total Debt (mrq)"  # CAUTION! search only by contains, not equals!
    TDE = "Total Debt/Equity (mrq)"
    # ...
    
    # Cash Flow Statement
    OCF = "Operating Cash Flow"
    LFCF = "Levered Free Cash Flow"
    
    # Dividiens & Splits
    FADR = "Forward Annual Dividend Rate"
    FADY = "Forward Annual Dividend Yield"
    TADR = "Trailing Annual Dividend Rate"
    TADY = "Trailing Annual Dividend Yield"
    Y5ADY = "5 Year Average Dividend Yield"
    PR = "Payout Ratio"
    # ...
    
    base = []
    TICKER = "Ticker"
    
    
    def __init__(self):
        self.base = [self.TICKER]
    
    def basics(self, addBase=False):
        if addBase:
            return self.base + [self.MARKET_CAP, self.ENTERPRISE_VALUE]
        else:
            return [self.MARKET_CAP, self.ENTERPRISE_VALUE]
    
    def diviends(self, addBase=False):
        if addBase:
            return self.base + [self.FADR, self.FADY, self.TADR, self.TADY, self.Y5ADY, self.PR]
        else:
            return [self.FADR, self.FADY, self.TADR, self.TADY, self.Y5ADY, self.PR]
        
    
    def dividends_slides(self, addBase=False):
        if addBase:
            return self.base + [self.FADY, self.TADY, self.Y5ADY, self.PR, self.PM, self.ROE, self.TC]
        else:
            return [self.FADY, self.TADY, self.Y5ADY, self.PR, self.PM, self.ROE, self.TC]
    
    def stability_slides(self, addBase=False):
        if addBase:
            return self.base + [self.TDE, self.OCF, self.LFCF]
        else:
            return [self.TDE, self.OCF, self.LFCF]
    
    def from_slides(self):
        return self.dividends_slides(addBase=True) + self.stability_slides()
    
    

class Tickerinfo(object):  # for additional type security
    
    def __init__(self, name, url):
        self.name = name
        self.url = url
        

In [107]:
import time 

def get_nasdaq_100_tickers():
    # get nasdaq-100 tickers from
    # https://www.nasdaq.com/market-activity/quotes/nasdaq-ndx-index

    print("set up driver for nasdaq website")
    from selenium.webdriver.support.wait import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC

    options = webdriver.ChromeOptions()
    #options.add_argument('--headless') # for not displaying the graphical environment, shows virtualized browser without GUI
    options.add_argument('--no-sandbox') # so that it can access machine resources, blocking sandbox processes it can access whatever
    options.add_argument('--disable-dev-shm-usage')  # colab does not have enough memory
    # open it, go to a website, and get results
    driver = webdriver.Chrome(options=options)


    try:
        url = "https://www.nasdaq.com/market-activity/quotes/nasdaq-ndx-index"
        driver.get(url)

        time.sleep(5)
        # Accept cookies by clicking the button with the specified ID
        #iframe = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'onetrust-banner-sdk')))

        print("accept cookies")
        #print(driver.page_source)
        iframe = driver.find_element(By.ID, 'onetrust-banner-sdk')
        accept_cookies_button = iframe.find_element(By.ID, 'onetrust-accept-btn-handler')
        accept_cookies_button.click()


        print("get nasdaq-100 tickers")

        # call url with tickers:

        tab = driver.find_element(By.TAG_NAME, 'tbody')
        tickers = []
        assert(tab)
        elements = tab.find_elements(By.TAG_NAME, 'tr')

        print("amount of tickers: ", len(elements))
        #elements = elements[:5]  # TODO, take all tickers (only for testing)


        links = [e.find_element(By.TAG_NAME, 'a') for e in elements]

        for l in links:
            tickers.append(Tickerinfo(l.text, l.get_attribute("href")))

        print("getting tickers finished")
    finally:
        # Close the WebDriver
        driver.quit()

    print()
    print("finished scraping nasdaq website")
    
    return tickers


In [108]:
def get_trending_tickers_yahoo(driver):
    print("call trending tickers")
    # call url with tickers:
    driver.get("https://finance.yahoo.com/trending-tickers")
    
    tab = driver.find_element(By.TAG_NAME, 'tbody')
    tickers = []
    assert(tab)
    elements = tab.find_elements(By.TAG_NAME, 'tr')
    
    print("amount of tickers: ", len(elements))
    #elements = elements[:5]  # TODO, take all tickers (only for testing)
    
    
    links = [e.find_element(By.TAG_NAME, 'a') for e in elements]

    for l in links:
        tickers.append(Tickerinfo(l.text, l.get_attribute("href")))
    
    print("getting tickers finished")
    return tickers

In [109]:
print("set up driver for yahoo finance website")

options = webdriver.ChromeOptions()
options.add_argument('--headless') # for not displaying the graphical environment, shows virtualized browser without GUI
options.add_argument('--no-sandbox') # so that it can access machine resources, blocking sandbox processes it can access whatever
options.add_argument('--disable-dev-shm-usage')  # colab does not have enough memory
# open it, go to a website, and get results
driver = webdriver.Chrome(options=options)

url = "https://finance.yahoo.com/"
driver.get(url)

try:
    # Accept cookies by clicking the button with the specified ID
    print("accept cookies")
    iframe = driver.find_element(By.CLASS_NAME, 'con-wizard')
    accept_cookies_button = iframe.find_element(By.CLASS_NAME, 'accept-all')
    accept_cookies_button.click()
    
    #tickers = get_trending_tickers_yahoo(driver)
    tickers = get_nasdaq_100_tickers()
    
    print("get metrics for "+str(len(tickers))+" tickers")
    required_metrics = Metrics().from_slides()
    
    df_metrics = pd.DataFrame(columns=[get_key_from_metric_label(label) for label in required_metrics])
    
    # call metric-webpage for each ticker and scrape values
    for ticker in tickers:
        #if not ticker.name == "ES=F":
        #    continue
            
        tickername = ticker.name
        print()
        print("ticker: ", tickername)
        url = "https://finance.yahoo.com/quote/"+tickername+"/key-statistics?p="+tickername
        driver.get(url)

        metrics = [tickername]
         # get metric values from website
        print("start scraping metrics for ticker from: ", driver.current_url)
        metrics = add_metrics_from_site(driver, metrics, required_metrics)

         # add metrics as new last row to df
        df_metrics.loc[len(df_metrics)] = metrics

    #print(df_metrics)

finally:
    # Close the WebDriver
    driver.quit()
    
print()
print("finished scraping")

set up driver for yahoo finance website
accept cookies
set up driver for nasdaq website
accept cookies
get nasdaq-100 tickers
amount of tickers:  78
getting tickers finished

finished scraping nasdaq website
get metrics for 78 tickers

ticker:  AAPL
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/AAPL/key-statistics?p=AAPL

ticker:  ABNB
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ABNB/key-statistics?p=ABNB


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  ADBE
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ADBE/key-statistics?p=ADBE


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  ADI
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ADI/key-statistics?p=ADI

ticker:  ADP
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ADP/key-statistics?p=ADP

ticker:  AEP
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/AEP/key-statistics?p=AEP

ticker:  ALGN
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ALGN/key-statistics?p=ALGN


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  AMGN
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/AMGN/key-statistics?p=AMGN

ticker:  ANSS
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ANSS/key-statistics?p=ANSS


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  ASML
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ASML/key-statistics?p=ASML

ticker:  AVGO
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/AVGO/key-statistics?p=AVGO

ticker:  AZN
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/AZN/key-statistics?p=AZN

ticker:  BIIB
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/BIIB/key-statistics?p=BIIB


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  BKNG
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/BKNG/key-statistics?p=BKNG


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  BKR
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/BKR/key-statistics?p=BKR

ticker:  CDNS
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CDNS/key-statistics?p=CDNS


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  CEG
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CEG/key-statistics?p=CEG


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  CHTR
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CHTR/key-statistics?p=CHTR


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  CMCSA
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CMCSA/key-statistics?p=CMCSA

ticker:  COST
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/COST/key-statistics?p=COST

ticker:  CPRT
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CPRT/key-statistics?p=CPRT


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  CRWD
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CRWD/key-statistics?p=CRWD


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  CSCO
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CSCO/key-statistics?p=CSCO

ticker:  CSGP
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CSGP/key-statistics?p=CSGP


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  CSX
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CSX/key-statistics?p=CSX

ticker:  CTAS
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CTAS/key-statistics?p=CTAS

ticker:  CTSH
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/CTSH/key-statistics?p=CTSH

ticker:  DDOG
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/DDOG/key-statistics?p=DDOG


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  DLTR
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/DLTR/key-statistics?p=DLTR


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  EA
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/EA/key-statistics?p=EA


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  EXC
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/EXC/key-statistics?p=EXC

ticker:  FAST
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/FAST/key-statistics?p=FAST

ticker:  GEHC
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/GEHC/key-statistics?p=GEHC


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  GFS
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/GFS/key-statistics?p=GFS


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  GILD
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/GILD/key-statistics?p=GILD

ticker:  GOOGL
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/GOOGL/key-statistics?p=GOOGL


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  IDXX
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/IDXX/key-statistics?p=IDXX


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  ILMN
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ILMN/key-statistics?p=ILMN


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  INTU
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/INTU/key-statistics?p=INTU


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  ISRG
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ISRG/key-statistics?p=ISRG


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  KDP
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/KDP/key-statistics?p=KDP

ticker:  KHC
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/KHC/key-statistics?p=KHC

ticker:  KLAC
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/KLAC/key-statistics?p=KLAC

ticker:  LCID
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/LCID/key-statistics?p=LCID


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  LRCX
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/LRCX/key-statistics?p=LRCX

ticker:  LULU
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/LULU/key-statistics?p=LULU


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  MAR
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/MAR/key-statistics?p=MAR


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  MCHP
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/MCHP/key-statistics?p=MCHP

ticker:  MDLZ
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/MDLZ/key-statistics?p=MDLZ

ticker:  MELI
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/MELI/key-statistics?p=MELI


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  META
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/META/key-statistics?p=META


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  MNST
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/MNST/key-statistics?p=MNST


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  MRVL
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/MRVL/key-statistics?p=MRVL

ticker:  NFLX
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/NFLX/key-statistics?p=NFLX


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  NXPI
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/NXPI/key-statistics?p=NXPI

ticker:  ODFL
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ODFL/key-statistics?p=ODFL

ticker:  ON
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ON/key-statistics?p=ON


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  ORLY
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ORLY/key-statistics?p=ORLY


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  PANW
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/PANW/key-statistics?p=PANW


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  PAYX
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/PAYX/key-statistics?p=PAYX

ticker:  PCAR
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/PCAR/key-statistics?p=PCAR

ticker:  PDD
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/PDD/key-statistics?p=PDD


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  PEP
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/PEP/key-statistics?p=PEP

ticker:  REGN
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/REGN/key-statistics?p=REGN


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  SBUX
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/SBUX/key-statistics?p=SBUX


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  SGEN
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/SGEN/key-statistics?p=SGEN


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  SIRI
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/SIRI/key-statistics?p=SIRI


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  SNPS
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/SNPS/key-statistics?p=SNPS


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  TEAM
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/TEAM/key-statistics?p=TEAM


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  TMUS
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/TMUS/key-statistics?p=TMUS


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  TTD
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/TTD/key-statistics?p=TTD


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  VRSK
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/VRSK/key-statistics?p=VRSK


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  VRTX
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/VRTX/key-statistics?p=VRTX


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  WBD
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/WBD/key-statistics?p=WBD


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  WDAY
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/WDAY/key-statistics?p=WDAY


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  XEL
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/XEL/key-statistics?p=XEL

ticker:  ZM
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ZM/key-statistics?p=ZM


  df_metrics.loc[len(df_metrics)] = metrics



ticker:  ZS
start scraping metrics for ticker from:  https://finance.yahoo.com/quote/ZS/key-statistics?p=ZS


  df_metrics.loc[len(df_metrics)] = metrics



finished scraping


In [110]:
df_metrics

Unnamed: 0,Ticker,Forward_Annual_Dividend_Yield,Trailing_Annual_Dividend_Yield,5_Year_Average_Dividend_Yield,Payout_Ratio,Profit_Margin,Return_on_Equity,Total_Cash_(mrq),Total_Debt/Equity_(mrq),Operating_Cash_Flow,Levered_Free_Cash_Flow
0,AAPL,0.51,0.49,0.82,15.33,25.31,171.95,6.155000e+10,199.42,1.105400e+11,8.218000e+10
1,ABNB,,0.00,,0.00,56.87,74.47,1.096000e+10,25.31,4.280000e+09,2.880000e+09
2,ADBE,,0.00,0.12,0.00,27.11,33.97,7.520000e+09,25.96,8.030000e+09,7.320000e+09
3,ADI,1.88,1.88,1.83,50.99,26.94,9.20,9.580600e+08,20.96,4.820000e+09,3.310000e+09
4,ADP,2.44,2.17,1.98,59.38,19.08,115.46,1.460000e+09,108.45,3.820000e+09,3.540000e+09
...,...,...,...,...,...,...,...,...,...,...,...
73,WBD,,0.00,,0.00,-11.48,-9.91,2.470000e+09,97.11,6.740000e+09,2.150000e+10
74,WDAY,,0.00,,0.00,0.96,1.11,6.880000e+09,49.45,1.850000e+09,1.700000e+09
75,XEL,3.42,3.37,2.74,64.79,11.75,10.33,6.350000e+08,157.06,5.120000e+09,-1.030000e+09
76,ZM,,0.00,,0.00,5.21,3.56,6.490000e+09,1.07,1.460000e+09,1.620000e+09


In [3]:
import os

base = '/'.join(os.getcwd().split('/')[:-1])
datasetpath = os.path.join(base, 'Dataset')
df_metrics.to_csv(os.path.join(datasetpath, 'data.csv'))

'/home/jorge/Desktop/Data science/4/WA/Final/StockAnalysis-tool/Dataset'

In [74]:
# get nasdaq-100 tickers from
# https://www.nasdaq.com/market-activity/quotes/nasdaq-ndx-index

from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

options = webdriver.ChromeOptions()
#options.add_argument('--headless') # for not displaying the graphical environment, shows virtualized browser without GUI
options.add_argument('--no-sandbox') # so that it can access machine resources, blocking sandbox processes it can access whatever
options.add_argument('--disable-dev-shm-usage')  # colab does not have enough memory
# open it, go to a website, and get results
driver = webdriver.Chrome(options=options)


try:
    url = "https://www.nasdaq.com/market-activity/quotes/nasdaq-ndx-index"
    driver.get(url)
    
    # Accept cookies by clicking the button with the specified ID
    iframe = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'onetrust-banner-sdk')))
    
    print("accept cookies")
    iframe = driver.find_element(By.ID, 'onetrust-banner-sdk')
    accept_cookies_button = iframe.find_element(By.ID, 'onetrust-accept-btn-handler')
    accept_cookies_button.click()
    
    
    print("call nasdaq-100 tickers")

    

    # call url with tickers:
    
    tab = driver.find_element(By.TAG_NAME, 'tbody')
    tickers = []
    assert(tab)
    elements = tab.find_elements(By.TAG_NAME, 'tr')
    
    print("amount of tickers: ", len(elements))
    #elements = elements[:5]  # TODO, take all tickers (only for testing)
    
    
    links = [e.find_element(By.TAG_NAME, 'a') for e in elements]

    for l in links:
        tickers.append(Tickerinfo(l.text, l.get_attribute("href")))
    
    print("getting tickers finished")
finally:
    # Close the WebDriver
    driver.quit()
    
print()
print("finished scraping")


accept cookies
<html lang="en" dir="ltr" prefix="content: http://purl.org/rss/1.0/modules/content/  dc: http://purl.org/dc/terms/  foaf: http://xmlns.com/foaf/0.1/  og: http://ogp.me/ns#  rdfs: http://www.w3.org/2000/01/rdf-schema#  schema: http://schema.org/  sioc: http://rdfs.org/sioc/ns#  sioct: http://rdfs.org/sioc/types#  skos: http://www.w3.org/2004/02/skos/core#  xsd: http://www.w3.org/2001/XMLSchema# " class=" js"><head>
  <meta charset="utf-8">
<script async="" type="text/javascript" src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js" data-ad-client="ca-pub-1861191755926748"></script><script id="#livevalidationscript" src="https://img04.en25.com/i/livevalidation_standalone.compressed.js"></script><script src="https://www.googletagmanager.com/gtm.js?id=GTM-TB9QCTB&amp;gtm_auth=AbgJsDZWx9ClJu0CVnQWXA&amp;gtm_preview=env-10&amp;gtm_cookies_win=x" async=""></script><script async="" src="//c.amazon-adsystem.com/aax2/apstag.js"></script><script async="" type="text/j

getting tickers finished
[<__main__.Tickerinfo object at 0x00000256DF53ADD0>, <__main__.Tickerinfo object at 0x00000256DF216500>, <__main__.Tickerinfo object at 0x00000256DF47AEF0>, <__main__.Tickerinfo object at 0x00000256E4121420>, <__main__.Tickerinfo object at 0x00000256DF47BEE0>, <__main__.Tickerinfo object at 0x00000256E3FCF400>, <__main__.Tickerinfo object at 0x00000256E3FCD4B0>, <__main__.Tickerinfo object at 0x00000256E00DB8E0>, <__main__.Tickerinfo object at 0x00000256E3FCF340>, <__main__.Tickerinfo object at 0x00000256E3FCD420>, <__main__.Tickerinfo object at 0x00000256DF43EA10>, <__main__.Tickerinfo object at 0x00000256DF43E8C0>, <__main__.Tickerinfo object at 0x00000256DF43EB00>, <__main__.Tickerinfo object at 0x00000256DF43E4A0>, <__main__.Tickerinfo object at 0x00000256DF43FB80>, <__main__.Tickerinfo object at 0x00000256DF43F310>, <__main__.Tickerinfo object at 0x00000256DF43EF80>, <__main__.Tickerinfo object at 0x00000256DF43DD20>, <__main__.Tickerinfo object at 0x00000

In [77]:
print(len(tickers))
for t in tickers:
    print(t.name)

83
ABNB
ADBE
ADI
ADSK
AEP
ALGN
AMAT
AMD
AMGN
AMZN
ANSS
AVGO
AZN
BIIB
BKNG
BKR
CEG
CHTR
CMCSA
CPRT
CRWD
CSCO
CSGP
CSX
CTAS
CTSH
DDOG
DLTR
DXCM
EA
EBAY
ENPH
EXC
FANG
FAST
FTNT
GFS
GILD
HON
IDXX
ILMN
INTU
JD
KDP
KHC
KLAC
LRCX
LULU
MAR
MCHP
MDLZ
MELI
MNST
MRNA
MRVL
NVDA
NXPI
ODFL
ON
ORLY
PANW
PAYX
PCAR
PEP
PYPL
QCOM
ROST
SBUX
SGEN
SIRI
SNPS
TEAM
TMUS
TTD
TXN
VRSK
VRTX
WBA
WBD
WDAY
XEL
ZM
ZS
