<h1>Yahoo Finance Ticker Statistics Scraper</h1>
<h3>This script takes a list of tickers and scraps its info from the yahoo finance statistics page.</h3>

In [40]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service as ChromeService
import pandas as pd
from datetime import date


In [33]:
tickers = ['CRWD',
           'TEAM',
           'ZS',
           'NET',
           'SNOW',
           'HUBS',
           'U',
           'DDOG',
           'DOCN',
           'ZM',
           'SEMR',
           'MDB',
           'SHOP']
hashtable = {}

<h2>Retrieving the columns and renaming accordingly</h2>

In [34]:
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
driver.maximize_window() 
driver.get('https://finance.yahoo.com/quote/TSLA/key-statistics?p=TSLA')

dfs = []

tables = driver.find_elements(by='xpath', value='//*[@id="Col1-0-KeyStatistics-Proxy"]/section/div[2]//table')
for table in tables:
    spec_name = []
    for row in table.find_elements(by='xpath', value=".//tr"):
        box = row.find_elements(by='xpath', value = './/td')
        spec_name.append(box[0].get_attribute('textContent'))
    df = pd.DataFrame({"Metrics":spec_name})
    dfs.append(df)

driver.quit()
all_stats_df = pd.concat([dfs[0],dfs[1],dfs[2],dfs[3],dfs[4],dfs[5],dfs[6],dfs[7],dfs[8],dfs[9]])
all_stats_df.reset_index(inplace=True)
all_stats_df.drop(['index'],axis = 1, inplace=True)
all_stats_df['Metrics'].replace(regex={r'[0-9]$': ''}, inplace = True)
all_stats_df

Unnamed: 0,Metrics
0,Market Cap (intraday)
1,Enterprise Value
2,Trailing P/E
3,Forward P/E
4,PEG Ratio (5 yr expected)
5,Price/Sales (ttm)
6,Price/Book (mrq)
7,Enterprise Value/Revenue
8,Enterprise Value/EBITDA
9,Beta (5Y Monthly)


<h2>Scraping all the ticker stats and storing the dataframes into a hashtable</h2>

In [36]:
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
driver.maximize_window() 
for i in range(len(tickers)):
    driver.get(f'https://finance.yahoo.com/quote/{tickers[i]}/key-statistics?p={tickers[i]}')
    #print(f'https://finance.yahoo.com/quote/{tickers[i]}/key-statistics?p={tickers[i]}')
    
    dfs = []

    tables = driver.find_elements(by='xpath', value='//*[@id="Col1-0-KeyStatistics-Proxy"]/section/div[2]//table')
    for table in tables:
        spec_item = []
        for row in table.find_elements(by='xpath', value=".//tr"):
            box = row.find_elements(by='xpath', value = './/td')
            spec_item.append(box[1].get_attribute('textContent'))
        df = pd.DataFrame({f"{tickers[i]}" :spec_item})
        dfs.append(df)
    
    dummy_df = pd.DataFrame()
    for n in range(len(dfs)):
        dummy_df = pd.concat([dummy_df, dfs[n]])
    
    #hashtable[f'{tickers[i]}_stats'] = pd.concat([dfs[0],dfs[1],dfs[2],dfs[3],dfs[4],dfs[5],dfs[6],dfs[7],dfs[8],dfs[9]])
    
    hashtable[f'{tickers[i]}_stats'] = dummy_df
    hashtable[f'{tickers[i]}_stats'].reset_index(inplace=True)
    hashtable[f'{tickers[i]}_stats'].drop(['index'],axis = 1, inplace=True)

driver.quit()
hashtable[f'{tickers[i]}_stats']

Unnamed: 0,SHOP
0,38.19B
1,32.43B
2,
3,1.43k
4,
5,7.64
6,4.38
7,6.48
8,-16.81
9,1.80


<h2>Combining all the ticker stats</h2>

In [37]:
for i in range(len(tickers)):
#for i in range(8):
    all_stats_df = pd.concat([all_stats_df, hashtable[f'{tickers[i]}_stats']], axis = 1)

all_stats_df

Unnamed: 0,Metrics,CRWD,TEAM,ZS,NET,SNOW,HUBS,U,DDOG,DOCN,ZM,SEMR,MDB,SHOP
0,Market Cap (intraday),40.25B,60.02B,20.59B,19.23B,54.88B,14.89B,12.17B,30.68B,3.72B,24.05B,1.60B,16.82B,38.19B
1,Enterprise Value,38.70B,59.88B,19.93B,19.15B,51.17B,14.39B,12.23B,29.78B,4.02B,18.62B,1.42B,16.21B,32.43B
2,Trailing P/E,,,,,,,,9.69k,,24.94,,,
3,Forward P/E,149.25,142.86,136.99,625.00,1.11k,96.15,,92.59,34.72,21.93,,,1.43k
4,PEG Ratio (5 yr expected),3.68,4.69,2.73,,5.55,2.52,,1.58,,11.09,,,
5,Price/Sales (ttm),21.67,21.28,20.91,23.19,32.58,9.64,9.73,23.30,8.30,5.76,7.07,15.24,7.64
6,Price/Book (mrq),32.84,178.85,38.73,33.12,10.11,17.08,5.33,25.25,77.13,4.01,7.34,25.17,4.38
7,Enterprise Value/Revenue,21.10,21.36,20.55,23.57,31.24,9.43,10.03,21.81,8.17,4.34,6.36,14.98,6.48
8,Enterprise Value/EBITDA,-995.97,-134.62,-74.06,-119.54,-76.57,-608.11,-23.71,469.06,62.70,20.40,-281.86,-50.53,-16.81
9,Beta (5Y Monthly),1.28,1.01,1.03,1.22,,1.60,,1.15,,-0.61,,0.95,1.80


<h2>Exporting to a dated csv file</h2>

In [45]:
current_datetime = date.today().isoformat()
all_stats_df.to_csv(f'saas_{current_datetime}.csv')

'2022-09-05'