In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

## YF Financial Scraper
You can also use this funtion to scrape balance sheet and cashflow statement. Some paratmeters are wrapped under buttons which need to be clicked before scraping and depth is this amount of clicks to be done.

In [8]:
# Initializing selenium chromedrive module.
path = "chromedriver.exe"
service = webdriver.chrome.service.Service(path)
service.start()
options = Options()
options.add_argument("--headless") 

In [11]:
def get_financial_statement(ticker,type_of_statement="income_statement",depth=1):
    """
    Parameters
    ----------
    ticker : str
    type_of_statement : str
        DESCRIPTION. either of income_statement, balance_sheet and cashflow_statement. The default is income_statement.
    depth : int
        DESCRIPTION. till what depth of the statement you need to go. if depth is 2, the code will iterate the button finding process twice

    Returns
    -------
    df : dataframe

    """
    if type_of_statement=="income_statement":
        url = "https://finance.yahoo.com/quote/{}/financials?p={}".format(ticker,ticker)
    elif type_of_statement=="balance_sheet":
        url = "https://finance.yahoo.com/quote/{}/balance-sheet?p={}".format(ticker,ticker)
    elif type_of_statement=="cashflow_statement":
        url = "https://finance.yahoo.com/quote/{}/cash-flow?p={}".format(ticker,ticker)

    driver = webdriver.Chrome(service=service, options = options)
    driver.get(url)
    driver.implicitly_wait(0.2)
    
    ### clicking dropdown buttons before scraping   
    clicked_buttons = []
    for i in range(depth):
        buttons = driver.find_elements(By.XPATH,  '//section[@class="main svelte-e2c64s"]//button')
        buttons = [i for i in buttons if i not in clicked_buttons]
        for button in buttons:
            if button.accessible_name in ["","Follow","Quarterly","Annual","prev","next"]:
                pass
            else:
                #WebDriverWait(driver, 0.2).until(EC.element_to_be_clickable(button)).click()
                driver.execute_script("arguments[0].click();", button) #this way of clicking may be required for some of the wrapped buttons
        clicked_buttons+=buttons
    
    table = driver.find_element(By.XPATH,  '//div[@class="tableContainer svelte-1pgoo1f"]')
    text_blob = table.text.split("\n")
    income_st_dir = {}
    last_key = None
    for count, row in enumerate(text_blob):
        if count == 0:
            heading = row.split()
            column_count = len(heading[1:])
        else:
            if count%(column_count+1) == 1:
                income_st_dir[row] = []
                last_key = row
            else:
                income_st_dir[last_key].append(row)        
    df = pd.DataFrame(income_st_dir).T
    df.columns = heading[1:]
    
    for col in df.columns:
        df[col] = df[col].str.replace(r'[,\|s-]', '', regex=True)
        df[col] = pd.to_numeric(df[col], errors='coerce')
    
    driver.close()
    return df

## Key Statistics Scraper

In [7]:
def get_key_stat(ticker):
    #getting key statistics data from yahoo finance for the given ticker
    url = 'https://finance.yahoo.com/quote/{}/key-statistics'.format(ticker)
    
    driver = webdriver.Chrome(service=service, options = options)
    driver.get(url)
    driver.implicitly_wait(0.2)
    
    table = driver.find_element(By.XPATH,  '//div[@class="table-container svelte-104jbnt"]')
    text_blob = table.text.split("\n")
    financial_metrics = {}
    header = text_blob[0].split()
    
    for item in text_blob[1:]:
        parts = item.split()
        key_end = len(parts) - len(header)
        key = ' '.join(parts[:key_end])
        values = parts[key_end:]
        financial_metrics[key] = dict(zip(header, values))
    
    df = pd.DataFrame(financial_metrics).T
    df = df.replace({'T': 'E+12', 'B': 'E+09', 'M': 'E+06', 'K': 'E+03'}, regex=True)
    df = df.apply(pd.to_numeric, errors='coerce')

    driver.close()
    return df