In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common import TimeoutException
import datetime
import pandas as pd

In [3]:
def getTreemapAndListData():

    #GET WEB CONTENT-----------------------------------------------------
    #avoid launching browser UI
    options = Options()
    options.add_argument('--headless=new')

    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

    sectors = ['Technology', 'Financial Services', 'Healthcare', 'Consumer Cyclical', 'Industrials', 'Communication Services', 'Consumer Defensive', 'Energy', 'Basic Materials', 'Real Estate', 'Utilities']
    tree_name_all = []
    tree_weight_all = []
    tree_sector_all = []
    company_name_all = []
    company_sector_all = []

    for sector in sectors:

        new_sector = sector.replace(' ', '-')
        url = 'https://finance.yahoo.com/sectors/' + new_sector
        print(url)
        driver.get(url)

        #in case a cookie consent modal appears
        try:
            consent_overlay = WebDriverWait(driver, 1).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.consent-overlay')))
            accept_all_button = consent_overlay.find_element(By.CSS_SELECTOR, '.accept-all')
        except:
            pass
        

        #EXTRACT TREEMAP DATA----------------------------------------------------
        industries = driver.find_elements(By.XPATH, "//td[@class='svelte-vatrz8']")

        #convert result to text
        industries_text = [elem.text for elem in industries]

        #store the industry names and market weights
        name = []
        weight = []

        for i, elem in enumerate(industries_text):
            if (i%2)==0: #even indices contain the names
                if 'All' in elem:
                    continue
                name.append(elem)
            else: #odd indices contain the weights
                if '100' in elem:
                    continue
                industry_weight = elem
                industry_weight = float(industry_weight.split('%')[0]) #remove % symbol and convert string to float
                weight.append(industry_weight)

        tree_name_all.extend(name)
        tree_weight_all.extend(weight)
        tree_sector_all.extend([sector]*len(name))

    
        #EXTRACT COMPANY LIST---------------------------------------------------
        companies = driver.find_elements(By.XPATH, "//span[@class='svelte-1rvxuc5 longName']")
        companies = companies[0:10]

        #convert result to text
        companies_text = [elem.text for elem in companies]
        company_name_all.extend(companies_text)
        company_sector_all.extend([sector]*len(companies_text))

    market_weights = pd.DataFrame({'Sector':tree_sector_all,
                                   'Name':tree_name_all,
                                   'Market Weight':tree_weight_all}) 
    companies = pd.DataFrame({'Sector':company_sector_all,
                             'Company':company_name_all})
    
    #companies.to_csv('companies.csv', index=False)

    now = datetime.now()
    time_updated = now.strftime('%d/%m/%Y %H:%M')

    driver.close()

    #return(market_weights, companies, time_updated)

In [4]:
getTreemapAndListData()

https://finance.yahoo.com/sectors/Technology
https://finance.yahoo.com/sectors/Financial-Services
https://finance.yahoo.com/sectors/Healthcare
https://finance.yahoo.com/sectors/Consumer-Cyclical
https://finance.yahoo.com/sectors/Industrials
https://finance.yahoo.com/sectors/Communication-Services
https://finance.yahoo.com/sectors/Consumer-Defensive
https://finance.yahoo.com/sectors/Energy
https://finance.yahoo.com/sectors/Basic-Materials
https://finance.yahoo.com/sectors/Real-Estate
https://finance.yahoo.com/sectors/Utilities


In [2]:

def getStocksDataOld():

    driver = webdriver.Chrome()

    sectors = ['Technology', 'Financial Services', 'Healthcare', 'Consumer Cyclical', 'Industrials', 'Communication Services', 'Consumer Defensive', 'Energy', 'Basic Materials', 'Real Estate', 'Utilities']

    price_all = []
    change_all = []
    volume_all = []
    sector_all = []

    for sector in sectors:

        new_sector = sector.replace(' ', '-')

        url = 'https://finance.yahoo.com/screener/predefined/sec-ind_sec-largest-equities_' + new_sector
        driver.get(url)

        #extract data
        price = driver.find_elements(By.XPATH, "//td[@aria-label='Price (Intraday)']")
        change = driver.find_elements(By.XPATH, "//td[@aria-label='% Change']")
        volume = driver.find_elements(By.XPATH, "//td[@aria-label='Volume']")

        #convert to text
        price_text = [elem.text for elem in price]
        change_text = [elem.text for elem in change]
        volume_text = [elem.text for elem in volume]

        #format text
        change_text = [float(elem.split('%')[0]) for elem in change_text]
        volume_text = [float(elem.split('M')[0]) if 'M' in elem else (float(elem.replace(',', ''))/1000000) for elem in volume_text]

        #store values
        price_all.extend(price_text)
        change_all.extend(change_text)
        volume_all.extend(volume_text)
        sector_all.extend([sector]*len(price_text))

    stocksData = pd.DataFrame({'Sector': sector_all,
                             'Price': price_all,
                             'Percent Change': change_all,
                             'Volume': volume_all})
    
    driver.close()
    
    return stocksData


In [1]:
import yahoo_fin.stock_info as si
import yfinance as yf
import pandas as pd

In [3]:
tickers = si.tickers_nasdaq()
len(tickers)

4925

In [5]:
aacg = yf.Ticker("AACG")

In [8]:
info = aacg.info
info.keys()

dict_keys(['address1', 'address2', 'city', 'zip', 'country', 'phone', 'fax', 'website', 'industry', 'industryKey', 'industryDisp', 'sector', 'sectorKey', 'sectorDisp', 'longBusinessSummary', 'fullTimeEmployees', 'companyOfficers', 'compensationAsOfEpochDate', 'maxAge', 'priceHint', 'previousClose', 'open', 'dayLow', 'dayHigh', 'regularMarketPreviousClose', 'regularMarketOpen', 'regularMarketDayLow', 'regularMarketDayHigh', 'exDividendDate', 'beta', 'forwardPE', 'volume', 'regularMarketVolume', 'averageVolume', 'averageVolume10days', 'averageDailyVolume10Day', 'bid', 'ask', 'bidSize', 'askSize', 'marketCap', 'fiftyTwoWeekLow', 'fiftyTwoWeekHigh', 'priceToSalesTrailing12Months', 'fiftyDayAverage', 'twoHundredDayAverage', 'currency', 'enterpriseValue', 'profitMargins', 'floatShares', 'sharesOutstanding', 'sharesShort', 'sharesShortPriorMonth', 'sharesShortPreviousMonthDate', 'dateShortInterest', 'sharesPercentSharesOut', 'heldPercentInstitutions', 'shortRatio', 'shortPercentOfFloat', 'imp

In [26]:
tickers[0]

'AACG'

In [32]:
si.get_quote_data('aacg')

AssertionError: Invalid response from server.  Check if ticker is
                              valid.

In [25]:
# USING yahoo_fin

for ticker in tickers:
    table = si.get_quote_data(ticker)

AssertionError: Invalid response from server.  Check if ticker is
                              valid.

In [4]:
# USING yfinance
# ten minutes >:(

sectors, prices, volumes, fifty_day_averages = ([None]*len(tickers) for i in range(4))

for i, ticker in enumerate(tickers):
    ticker_obj = yf.Ticker(ticker)

    try:
        sector = ticker_obj.info['sector']
    except:
        sector = -1
    sectors[i] = sector

    try:
        price = ticker_obj.info['currentPrice']
    except:
        price = str(-1)
    prices[i] = price

    try:
        volume = ticker_obj.info['volume']
    except:
        volume = str(-1)
    volumes[i] = volume

    try:
        fifty_day_average = ticker_obj.info['fiftyDayAverage']
    except: fifty_day_average = str(-1)
    fifty_day_averages[i] = fifty_day_average
        
    
data_dict = {'Ticker': tickers,
             'Sector': sectors,
             'Price': prices,
             'Volume': volumes,
             'Fifty Day Average': fifty_day_averages}

data = pd.DataFrame(data_dict)
data.to_csv('data/stock_data.csv', index=False)



404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ZAZZT?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=ZAZZT&crumb=Us02swnnvHJ
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ZBZZT?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=ZBZZT&crumb=Us02swnnvHJ
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ZCZZT?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=ZCZZT&crumb=Us02swnnvHJ
404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ZXYZ.A?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=fina

In [31]:
aacg = yf.Ticker(ticker[0])
aacg.info

{'address1': '5301 Stevens Creek Boulevard',
 'city': 'Santa Clara',
 'state': 'CA',
 'zip': '95051',
 'country': 'United States',
 'phone': '800 227 9770',
 'fax': '866 497 1134',
 'website': 'https://www.agilent.com',
 'industry': 'Diagnostics & Research',
 'sector': 'Healthcare',
 'longBusinessSummary': "Agilent Technologies, Inc. provides application focused solutions to the life sciences, diagnostics, and applied chemical markets worldwide. The company operates in three segments: Life Sciences and Applied Markets, Diagnostics and Genomics, and Agilent CrossLab. The Life Sciences and Applied Markets segment offers liquid chromatography systems and components; liquid chromatography mass spectrometry systems; gas chromatography systems and components; gas chromatography mass spectrometry systems; inductively coupled plasma mass spectrometry instruments; atomic absorption instruments; microwave plasma-atomic emission spectrometry instruments; inductively coupled plasma optical emissio