<a href="https://colab.research.google.com/github/sergio-cabrales/python/blob/main/Fundamental_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Analyzing Financial Statements for Insight into Company's Valuation for Possible Investment
from datetime import datetime
import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd

symbol = 'EC.NZ'
url = 'https://finance.yahoo.com/quote/EC/balance-sheet?p=EC'

# Set up the request headers that we're going to use, to simulate
# a request by the Chrome browser. Simulating a request from a browser
# is generally good practice when building a scraper

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
    'Cache-Control': 'max-age=0',
    'Pragma': 'no-cache',
    'Referrer': 'https://google.com',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}

# Fetch the page that we're going to parse, using the request headers
# defined above

page = requests.get(url, headers)

# Parse the page with LXML, so that we can start doing some XPATH queries
# to extract the data that we want

tree = html.fromstring(page.content)

# Smoke test that we fetched the page by fetching and displaying the H1 element

tree.xpath("//h1/text()")

['Ecopetrol S.A. (EC)']

In [4]:
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")

# Ensure that some table rows are found; if none are found, then it's possible
# that Yahoo Finance has changed their page layout, or have detected
# that you're scraping the page.

assert len(table_rows) > 0

parsed_rows = []

for table_row in table_rows:
    parsed_row = []
    el = table_row.xpath("./div")
    
    none_count = 0
    
    for rs in el:
        try:
            (text,) = rs.xpath('.//span/text()[1]')
            parsed_row.append(text)
        except ValueError:
            parsed_row.append(np.NaN)
            none_count += 1

    if (none_count < 4):
        parsed_rows.append(parsed_row)

df = pd.DataFrame(parsed_rows)
df

Unnamed: 0,0,1,2,3,4
0,Breakdown,12/31/2020,12/31/2019,12/31/2018,12/31/2017
1,Total Assets,139417000000,133890296000,124643498000,117847412000
2,Total Liabilities Net Minority Interest,83598000000,75658668000,67535718000,69631713000
3,Total Equity Gross Minority Interest,55819000000,58231628000,57107780000,48215699000
4,Total Capitalization,93903000000,87640756000,89180632000,84838284000
5,Common Stock Equity,52095000000,54413790000,55137914000,46434953000
6,Net Tangible Assets,51540000000,52770770000,53567245000,44894805000
7,Working Capital,4552000000,1622162000,9205990000,6377405000
8,Invested Capital,98826000000,92652929000,93200559000,89982788000
9,Tangible Book Value,51540000000,52770770000,53567245000,44894805000
