In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import requests
import logging
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains


logging.getLogger().setLevel(logging.INFO) 

In [None]:
class ScrapeTrendingView:

    def __init__(self):

        #innitialize and set chrome-webdriver options
        chrome_options = Options()
        chrome_options.add_argument("--start-maximized")
        #self.chrome_options.add_argument("--window-size=1000,1080")
        # chrome_options.add_argument("--headless")

        self.driver = webdriver.Chrome("G:\My Drive\Investing\Programming\chromedriver.exe", options=chrome_options)
        self.driver.implicitly_wait(5)
        # self.driver.maximize_window()

        company_url = 'NASDAQ-AAPL'
        # company_url = 'KRX-005930'
        self.scrapeIncomeStatement(company_url=company_url)
        # self.scrapeBalanceSheet()
        # self.scrapeCashFlow()
        self.scrapeStatistics(company_url=company_url)

        
    def close_cookies_popup(self):
        cookie_button_xpath = "//button[@class='acceptAll-WvyPjcpY button-OvB35Th_ size-xsmall-OvB35Th_ color-brand-OvB35Th_ variant-primary-OvB35Th_']"
        cookie_button_element = self.driver.find_element_by_xpath(cookie_button_xpath)
        cookie_button_element.click()

    def switch_annual_data(self):
        annual_button_xpath = "//button[@id='FY']"
        annual_button_element = self.driver.find_element_by_xpath(annual_button_xpath)
        annual_button_element.click()

    def scraped_data_to_dataframe(self, output):
        output_index = []
        output_values = []
        output_colums = output[0][1:]
        self.currency = output[0][0].replace('Currency: ', '')
        # print(self.currency)

        for i in range(1,len(output)):
            output_index.append(output[i][0])
            output_values.append(output[i][1:])

        output_values = self.fixDataValues(input_data=output_values) #apply neccessery correction to fix the values-data
        df = pd.DataFrame(output_values, columns = output_colums, index=output_index) #add scraped data to dataframe
        return df

    def scrapeIncomeStatement(self, company_url):

        url = "https://www.tradingview.com/symbols/" + company_url + "/financials-income-statement/?selected="
        
        self.driver.get(url)
        self.switch_annual_data()
        # time.sleep(1)

        self.close_cookies_popup()

        #expand income-statement collapsed rows
        i = 0 
        while True:
            i=i+1
            if i > 20:
                logging.info(f'Break While loop i={i}')
                break
            try:
                expand_arrow_xpath = "//span[@class='arrow-_PBNXQ7k']"
                expand_arrow_element = self.driver.find_element_by_xpath(expand_arrow_xpath)
                # self.driver.execute_script("arguments[0].scrollIntoView();", expand_arrow_element) #scroll view to element

                expand_arrow_element.click()
                # print(expand_arrow_element.if_exists)

            except:
                print(f'i={i}, all rows expanded')
                break

        #scrape the data
        # financial_table_xpath = "//div[@class='container-YOfamMRP']"
        financial_table_xpath = "//div[@class='container-YOfamMRP']/div"
        financial_table_rows = self.driver.find_elements_by_xpath(financial_table_xpath)
        print(len(financial_table_rows))

        output = []
        number_of_periods = len(financial_table_rows[0].text.splitlines())

        for item in financial_table_rows:
            item_list = item.text.splitlines()
            output_temp = []

            if len(item_list) == number_of_periods: #rows without YOY-grow

                for i in range(len(item_list)):
                    output_temp.append(item_list[i].replace('\u202a','').replace('\u202c',''))
            else: #rows with YOY-grow
                if 'YoY growth' in item_list: #Quarterly 
                    for i in range(0, len(item_list), 2): #skip YOY-grow row
                        output_temp.append(item_list[i].replace('\u202a','').replace('\u202c',''))
                else: #Anual report
                    output_temp.append(item_list[0])
                    for i in range(1, len(item_list), 2): #skip YOY-grow row
                        output_temp.append(item_list[i].replace('\u202a','').replace('\u202c',''))

                
            output.append(output_temp)

        self.income_statement = self.scraped_data_to_dataframe(output=output)

        # self.driver.close()

     
    def scrapeBalanceSheet(self):
        logging.info('Start Balance Sheet Scrape')
        self.base_url = "https://www.tradingview.com/symbols/KRX-005930/financials-balance-sheet/?selected="
        self.driver.get(self.base_url)
        # time.sleep(1)

        self.close_cookies_popup()

        #expand balance-sheet collapsed-rows level-1
        i = 0 
        while True:
            logging.info('Start Expanding Balance Sheet Rows Level-1')
            i=i+1
            if i > 20:
                logging.info(f'Break While loop i={i}')
                break
            try:
                # expand_arrow_xpath = "//span[@class='arrow-_PBNXQ7k']"
                expand_arrow_xpath = "//span[@class='arrow-_PBNXQ7k hasChanges-_PBNXQ7k']"
                expand_arrow_element = self.driver.find_element_by_xpath(expand_arrow_xpath)
                # self.driver.execute_script("arguments[0].scrollIntoView();", expand_arrow_element) #scroll view to element

                expand_arrow_element.click()
                # print(expand_arrow_element.if_exists)

            except:
                logging.info('End Expanding Balance Sheet Rows Level-1')
                break
        
         #expand balance-sheet collapsed-rows level-2
        i = 0 
        while True:
            logging.info('Start Expanding Balance Sheet Rows Level-2')
            i=i+1
            if i > 20:
                logging.info(f'Break While loop i={i}')
                break
            try:
                expand_arrow_xpath = "//span[@class='arrow-_PBNXQ7k']"
                expand_arrow_element = self.driver.find_element_by_xpath(expand_arrow_xpath)
                # self.driver.execute_script("arguments[0].scrollIntoView();", expand_arrow_element) #scroll view to element

                expand_arrow_element.click()
                # print(expand_arrow_element.if_exists)

            except:
                logging.info('End Expanding Balance Sheet Rows Level-2')
                break
        
        
    def scrapeCashFlow(self, company_url):

        logging.info('Start CashFlow Scrape')
        url = "https://www.tradingview.com/symbols/"+ company_url + "/financials-cash-flow/?selected="
        self.driver.get(self.url)
        # time.sleep(1)

        self.close_cookies_popup()

        #expand cash-flow collapsed-rows level-1
        i = 0 
        while True:
            logging.info('Start Expanding CashFlow Rows Level-1')
            i=i+1
            if i > 20:
                logging.info(f'Break While loop i={i}')
                break
            try:
                # expand_arrow_xpath = "//span[@class='arrow-_PBNXQ7k']"
                expand_arrow_xpath = "//span[@class='arrow-_PBNXQ7k hasChanges-_PBNXQ7k']"
                expand_arrow_element = self.driver.find_element_by_xpath(expand_arrow_xpath)
                # self.driver.execute_script("arguments[0].scrollIntoView();", expand_arrow_element) #scroll view to element

                expand_arrow_element.click()
                # print(expand_arrow_element.if_exists)

            except:
                logging.info('End Expanding CashFlow Rows Level-1')
                break
        
        #expand cash-flow collapsed-rows level-2
        i = 0 
        while True:
            logging.info('Start Expanding CashFlow Rows Level-2')
            i=i+1
            if i > 20:
                logging.info(f'Break While loop i={i}')
                break
            try:
                expand_arrow_xpath = "//span[@class='arrow-_PBNXQ7k']"
                expand_arrow_element = self.driver.find_element_by_xpath(expand_arrow_xpath)
                # self.driver.execute_script("arguments[0].scrollIntoView();", expand_arrow_element) #scroll view to element

                expand_arrow_element.click()
                # print(expand_arrow_element.if_exists)

            except:
                logging.info('End Expanding CashFlow Rows Level-2')
                break
        

    def scrapeStatistics(self, company_url):
        logging.info('Start Statistics Scrape')
        url = "https://www.tradingview.com/symbols/" + company_url + "/financials-statistics-and-ratios/?selected="
        self.driver.get(url)

        # self.close_cookies_popup()
        self.switch_annual_data()
        

        statistics_table_xpath = "//div[@class='container-YOfamMRP']/div"
        statistics_table_rows = self.driver.find_elements_by_xpath(statistics_table_xpath)

        # for item in financial_table[1]:
        #     print(item)

        # print(self.financial_table.text)
        print(len(statistics_table_rows))
        output = []
        
        for item in statistics_table_rows:
            item_list = item.text.splitlines()
            output_temp = []

            #skip non-data items like Key stats, Profitability ratios, Liquidity ratios, Solvency ratios
            if len(item_list) == 1:
                continue
            else:
                for i in range(len(item_list)):

                    output_temp.append(item_list[i].replace('\u202a','').replace('\u202c',''))
                    # print(temp[i])

                # print(type(temp), len(temp))
                # print(temp)
                output.append(output_temp)
        
        # for item in output:
        #     print(len(item),item)
        #     pass

        self.statistics = self.scraped_data_to_dataframe(output=output)

        self.driver.close()


    def fixDataValues(self, input_data):
        output = []
        
        for row in input_data:
            output_row = []
            for item in row:
                # print(f'item={item}')

                if '−' in item: #convert minus sign to real minus, for some reason the sign is not recognized as minus
                    item = item.replace('−', '-')

                if 'T' in item: #convert Trillion-values to numeric
                    item = item.replace('T','')
                    item = float(item)
                    item = item*1000000000000
                    # item = int(item)

                elif 'B' in item: #convert Billion-values to numeric
                    item = item.replace('B','')
                    item = float(item)
                    item = item*1000000000
                    # item = int(item)

                elif 'M' in item: #convert Milion-values to numeric
                    item = item.replace('M','')
                    item = float(item)
                    item = item*1000000
                    # item = int(item)

                elif 'K' in item: #convert Thousants-values to numeric
                    item = item.replace('K','')
                    item = float(item)
                    item = item*1000
                    # item = int(item)

                if isinstance(item, str): #if item is not integer (0.00, ---, -)

                    if '—' in item: #set value to None
                        item = None

                    elif '.' in item: #convert value to float
                        item = float(item)

                if self.currency !='USD': #convert values to USD

                    if self.currency == 'KRW':
                        self.multiplier = 0.000700680009950

                    if isinstance(item, float) or isinstance(item, int): #check if item is int or float
                        item = item*self.multiplier


                output_row.append(item)
                    
            output.append(output_row)

        return output
        

run_scraper = ScrapeTrendingView()
# run_scraper.driver.close()




In [None]:
run_scraper.statistics

In [None]:
#plotting the data
plt.figure(figsize=(20,8)) #set figure size


# tesla_data.financial_ratios_quarter.loc['Current Ratio'].plot()
# apple_data.financial_ratios_quarter.loc['Current Ratio'].plot()

# print(run_scraper.income_statement.index[0])
# param_name = run_scraper.income_statement.index[0] #get parameter name from dataframe columns


run_scraper.income_statement.loc['Total revenue'].plot()
run_scraper.income_statement.loc['Net income'].plot()
# run_scraper.income_statement.loc['Interest expense on debt'].plot()
# plt.plot()

#invert x-axis (2009 on the left, 2022 on the right)
# ax = plt.gca()
# ax.invert_xaxis()
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# df = px.data.gapminder().query("country=='Canada'")
fig = px.line(x=run_scraper.income_statement.columns, 
            y=[run_scraper.income_statement.loc['Total revenue'],
            run_scraper.income_statement.loc['Net income']],
            title='Total Revenue')
fig.show()

In [None]:
run_scraper.income_statement.transpose()

In [26]:
df_income_statement = run_scraper.income_statement.transpose()
fig_revenue = px.line(df_income_statement, x=df_income_statement.index, y=['Total revenue','Cost of goods sold', 'Gross profit'], 
                title='Total Revenue - Cost Of Goods Sold = Gross Profit', markers=True)
fig_revenue.show()

fig_operating_income = px.line(df_income_statement, x=df_income_statement.index, y=['Operating expenses (excl. COGS)','Operating income', 'Gross profit'], 
                title='Gross Profit - Operating Expenses = Operating Income', markers=True)
fig_operating_income.show()

fig_pretax_income = px.line(df_income_statement, x=df_income_statement.index, y=['Operating income', 'Non-operating income, total', 'Pretax income'], 
                title='Operating Income + Non Operating Income = Pretax Income', markers=True)
fig_pretax_income.show()

fig_net_income = px.line(df_income_statement, x=df_income_statement.index, y=['Pretax income', 'Taxes', 'Net income'], 
                title='Pretax Income - Taxes = Net Income', markers=True)
fig_net_income.show()

fig_eps = px.line(df_income_statement, x=df_income_statement.index, y=['Basic earnings per share (Basic EPS)', 'Diluted earnings per share (Diluted EPS)'], 
                title='Earnings Per Share', markers=True)
fig_eps.show()

fig_shares = px.line(df_income_statement, x=df_income_statement.index, y=['Average basic shares outstanding', 'Diluted shares outstanding'], 
                title='Earnings Per Share', markers=True)
fig_shares.show()


# Index(['Total revenue', 'Cost of goods sold', 'Deprecation and amortization',
#        'Depreciation', 'Amortization of intangibles',
#        'Amortization of deferred charges', 'Other cost of goods sold',
#        'Gross profit', 'Operating expenses (excl. COGS)',
#        'Selling/general/admin expenses, total', 'Research & development',
#        'Selling/general/admin expenses, other',
#        'Other operating expenses, total', 'Operating income',
#        'Non-operating income, total',
#        'Interest expense, net of interest capitalized',
#        'Interest expense on debt', 'Interest capitalized',
#        'Non-operating income, excl. interest expenses',
#        'Non-operating interest income', 'Pretax equity in earnings',
#        'Miscellaneous non-operating expense', 'Unusual income/expense',
#        'Impairments', 'Restructuring charge', 'Legal claim expense',
#        'Unrealized gain/loss', 'Other exceptional charges', 'Pretax income',
#        'Equity in earnings', 'Taxes', 'Income tax, current',
#        'Income tax, current - domestic', 'Income Tax, current - foreign',
#        'Income tax, deferred', 'Income tax, deferred - domestic',
#        'Income tax, deferred - foreign', 'Income Tax Credits',
#        'Non-controlling/minority interest', 'After tax other income/expense',
#        'Net income before discontinued operations', 'Discontinued operations',
#        'Net income', 'Dilution adjustment', 'Preferred dividends',
#        'Diluted net income available to common stockholders',
#        'Basic earnings per share (Basic EPS)',
#        'Diluted earnings per share (Diluted EPS)',
#        'Average basic shares outstanding', 'Diluted shares outstanding',
#        'EBITDA', 'EBIT', 'Total operating expenses'],
#       dtype='object')


In [None]:
df_statistics = run_scraper.statistics.transpose()

fig_shares_outstanding = px.line(df_statistics, x=df_statistics.index, y=['Total common shares outstanding', 'Float shares outstanding'], title='Number of Shares', markers=True)
fig_shares_outstanding.show()

fig_enterprice_values = px.line(df_statistics, x=df_statistics.index, y=['Enterprise value'], title='Enterprise value', markers=True)
fig_enterprice_values.show()

fig_numer_of_employees_shareholders = px.line(df_statistics, x=df_statistics.index, y=['Number of employees','Number of shareholders'], title='Number of employees/shareholders', markers=True)
fig_numer_of_employees_shareholders.show()

fig_price_ratios = px.line(df_statistics, x=df_statistics.index, y=['Price to earnings ratio', 'Price to sales ratio', 'Price to cash flow ratio','Enterprise value to EBITDA ratio',
                                                        'Price to book ratio'], title='Price Ratios', markers=True)
fig_price_ratios.show()

fig_return_ratios = px.line(df_statistics, x=df_statistics.index, y=['Return on assets %', 'Return on equity %', 'Return on invested capital %'],
                             title='Return Ratios', markers=True)
fig_return_ratios.show()

fig_margins = px.line(df_statistics, x=df_statistics.index, y=['Gross margin %','EBITDA margin %', 'Net margin %', 'Operating margin %'], title='Margins', markers=True)
fig_margins.show()

fig_dept_ratios = px.line(df_statistics, x=df_statistics.index, y=['Debt to assets ratio', 'Debt to equity ratio','Long term debt to total assets ratio'], title='Dept Ratios', markers=True)
fig_dept_ratios.show()

fig_liquidity_ratios = px.line(df_statistics, x=df_statistics.index, y=['Quick ratio', 'Current ratio', 'Inventory turnover', 'Asset turnover'], title='Liquidity Ratios', markers=True)
fig_liquidity_ratios.show()