In [3]:
from IPython.display import HTML
from IPython.display import Image

HTML('''Enter stock ticker below and press ENTER.''')

In [3]:
from datetime import datetime
import lxml
from lxml import html
import requests
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
pd.options.display.float_format = '{:.0f}'.format
from ipywidgets import interact
import ipywidgets as widgets
from IPython.display import clear_output
import os

pd.set_option("display.precision", 2)

def get_page(url):
    # Set up the request headers that we're going to use, to simulate
    # a request by the Chrome browser. Simulating a request from a browser
    # is generally good practice when building a scraper
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'en-US,en;q=0.9',
        'Cache-Control': 'max-age=0',
        'Pragma': 'no-cache',
        'Referrer': 'https://google.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
    }

    return requests.get(url, headers=headers)

def parse_rows(table_rows):
    parsed_rows = []

    for table_row in table_rows:
        parsed_row = []
        el = table_row.xpath("./div")

        none_count = 0

        for rs in el:
            try:
                (text,) = rs.xpath('.//span/text()[1]')
                parsed_row.append(text)
            except ValueError:
                parsed_row.append(np.NaN)
                none_count += 1

        if (none_count < 4):
            parsed_rows.append(parsed_row)
            
    return pd.DataFrame(parsed_rows)

def clean_data(df):
    df = df.set_index(0) # Set the index to the first column: 'Period Ending'.
    df = df.transpose() # Transpose the DataFrame, so that our header contains the account names
    
    # Rename the "Breakdown" column to "Date"
    cols = list(df.columns)
    cols[0] = 'Date'
    df = df.set_axis(cols, axis='columns', inplace=False)
    
    numeric_columns = list(df.columns)[1::] # Take all columns, except the first (which is the 'Date' column)

    for column_index in range(1, len(df.columns)): # Take all columns, except the first (which is the 'Date' column)
        df.iloc[:,column_index] = df.iloc[:,column_index].str.replace(',', '') # Remove the thousands separator
        df.iloc[:,column_index] = df.iloc[:,column_index].astype(np.float64) # Convert the column to float64
        
    return df

def scrape_table(url):
    # Fetch the page that we're going to parse
    page = get_page(url);

    # Parse the page with LXML, so that we can start doing some XPATH queries
    # to extract the data that we want
    tree = html.fromstring(page.content)

    # Fetch all div elements which have class 'D(tbr)'
    table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
    
    # Ensure that some table rows are found; if none are found, then it's possible
    # that Yahoo Finance has changed their page layout, or have detected
    # that you're scraping the page.
    assert len(table_rows) > 0
    
    df = parse_rows(table_rows)
    df = clean_data(df)
        
    return df

# symbol_input = input()

# Create text widget for input as well as an output widget
input_text = widgets.Text()
output = widgets.Output()

# Define function to bind value of the input
@output.capture(clear_output=True,wait=True)
def display_buttons(sender):
    symbol = input_text.value
    df_balance_sheet = scrape_table('https://finance.yahoo.com/quote/' + symbol + '/balance-sheet?p=' + symbol)
    df_balance_sheet = df_balance_sheet.style.set_caption("Balance Sheet")


    #xyz = pd.df_balance_sheet
    #xyz.plot(x='Date',y='Total Assets')


    df_income_statement = scrape_table('https://finance.yahoo.com/quote/' + symbol + '/financials?p=' + symbol)
    df_income_statement = df_income_statement.style.set_caption("Income Statement")

    df_cash_flow = scrape_table('https://finance.yahoo.com/quote/' + symbol + '/cash-flow?p=' + symbol)
    df_cash_flow = df_cash_flow.style.set_caption("Cash Flow")


    ###################################################
    btn1 = widgets.Button(description='Balance Sheet')
    #display(btn1)

    @output.capture(clear_output=False,wait=True)
    def btn_eventhandler(x):
        display(df_balance_sheet)
        display(btn4)

    btn1.on_click(btn_eventhandler)

    ###################################################
    btn2 = widgets.Button(description='Income Statement')
    #display(btn2)

    @output.capture(clear_output=False,wait=True)
    def btn_eventhandler2(x):
        display(df_income_statement)
        display(btn4)

    btn2.on_click(btn_eventhandler2)

    ###################################################
    btn3 = widgets.Button(description='Cash Flow')
    #display(btn3)

    @output.capture(clear_output=False,wait=True)
    def btn_eventhandler3(x):
        display(df_cash_flow)
        display(btn4)

    btn3.on_click(btn_eventhandler3)
    input_widgets = widgets.HBox([btn1,btn2,btn3])
    display(input_widgets)

    ###################################################
    btn4 = widgets.Button(description='Download to Excel')

    @output.capture(clear_output=False,wait=True)
    def btn_eventhandler4(x):
        date = datetime.today().strftime('%Y-%m-%d')
        output_path = os.path.expanduser("~") + "//Downloads//"
        if not os.path.exists(output_path):
            print("Downloads folder not found, downloading to tmp directory")
            output_path = os.path.expanduser("~") + "/tmp/"
        writer = pd.ExcelWriter(output_path + symbol +" " 'Finances-Scrape-' + date + '.xlsx')
        df_cash_flow.to_excel(writer, sheet_name='Cash Flow Statement')
        df_balance_sheet.to_excel(writer, sheet_name= 'Balance Sheet' )
        df_income_statement.to_excel(writer, sheet_name='Income Statment')
        writer.save()
        btn4.style.button_color = 'lightgreen'
        btn4.description = 'Downloaded!'

    btn4.on_click(btn_eventhandler4)


#display(btn4)

input_text.on_submit(display_buttons)

display(input_text)
output



#balance_sheet_dropdown = widgets.Dropdown(options = ['Total Assets','Total Liabilities Net Minority Interest','Total Equity Gross Minority Interest','Total Capitalization','Common Stock Equity','Capital Lease Obligations','Net Tangible Assets','Working Capital','Invested Capital','Tangible Book Value','Total Debt','Net Debt','Share Issued','Ordinary Shares Number'])
#display(dropdown_year)

#df_balance_sheet.plot(x='Date',y='Total Assets')
#plot.show()

#df_income_statement
#df_cash_flow

#date = datetime.today().strftime('%Y-%m-%d')
#writer = pd.ExcelWriter('Financial Statement Scrape' + date + '.xlsx')
#df_balance_sheet.to_excel(writer)
#writer.save()

#https://www.mattbutton.com/2019/01/24/how-to-scrape-yahoo-finance-and-extract-fundamental-stock-market-data-using-python-lxml-and-pandas20

Text(value='')

Output()