In [1]:
import requests
import pandas as pd 
from bs4 import BeautifulSoup

In [2]:
symbol = "WHABT"
if '&' in symbol:
    symbol_url = symbol.replace('&', '%26')
else:
    symbol_url = symbol
URL = f"https://www.set.or.th/set/companyhighlight.do?symbol={symbol_url}&ssoPageId=5&language=en&country=US"

In [3]:
web = requests.get(URL)
web.ok

True

In [4]:
soup = BeautifulSoup(web.text, "lxml")
table = soup.find("table", attrs={"class":"table table-hover table-info"})

In [5]:
header = table.find_all("thead")[0]

In [6]:
header

<thead>
<tr align="center" valign="middle">
<th height="30"><strong>Period <br/> as of</strong></th>
<th><strong>Y/E '18<br/>31/03/2018</strong></th>
<th><strong>Y/E '19<br/>31/03/2019</strong></th>
<th><strong>Y/E '20<br/>31/03/2020</strong></th>
<th><strong>Y/E '21<br/>31/03/2021</strong></th>
<th><strong>Q2 '22<br/>30/09/2021</strong></th>
</tr>
</thead>

In [7]:
for head in header.find_all("th"):
    print(head)

<th height="30"><strong>Period <br/> as of</strong></th>
<th><strong>Y/E '18<br/>31/03/2018</strong></th>
<th><strong>Y/E '19<br/>31/03/2019</strong></th>
<th><strong>Y/E '20<br/>31/03/2020</strong></th>
<th><strong>Y/E '21<br/>31/03/2021</strong></th>
<th><strong>Q2 '22<br/>30/09/2021</strong></th>


In [5]:
header = table.find_all("thead")[0]
date = []
for head in header.find_all("th")[1:-1]:
    text = head.get_text(" ")
    if text != '':
        date.append(text)
date

["Y/E '19 31/12/2019", "Y/E '20 31/12/2020", "Q3 '21 30/09/2021"]

In [6]:
data = {}
table_name = "FinancialData"
data[table_name] = {}
data[table_name]["date"] = date
data[table_name]["info"] = {}

for row in table.tbody.find_all("td"):
    if row.text == "Last Price(Baht)": break
    
    attrs = row.attrs
    if attrs.get('style', None) == 'text-align:left;' and not attrs.get('colspan', None) is None:
        pass
    elif attrs.get('style', None) == 'text-align:left;':
        name = row.text 
        data[table_name]["info"][name] = []
    else:
        info = row.get_text(strip=True)
        if info != '':
            info = info.replace(',','')
            try:
                info = float(info)
            except:
                pass
            data[table_name]["info"][name].append(info)

In [7]:
data

{'FinancialData': {'date': ["Y/E '19 31/12/2019",
   "Y/E '20 31/12/2020",
   "Q3 '21 30/09/2021"],
  'info': {'Assets': [152176.21, 144979.3, 191619.26],
   'Liabilities': [113468.38, 107063.34, 94747.46],
   'Equity': [38707.83, 37823.44, 96818.87],
   'Paid-up Capital': [90000.0, 90000.0, 120000.0],
   'Revenue': [583208.75, 432848.96, 356406.09],
   'Profit (Loss) from Other Activities': [927.85, 108.31, 641.4],
   'Net Profit': [10895.81, 8791.07, 9120.62],
   'EPS (Baht)': [1.21, 0.98, 0.79],
   'ROA(%)': [9.59, 8.09, 9.54],
   'ROE(%)': [28.15, 22.97, 17.45],
   'Net Profit Margin(%)': [1.87, 2.03, 2.56]}}}

In [8]:
table_name = "FinancialData"
print(f"{symbol} | {table_name}")
pd.DataFrame(data[table_name]["info"], index=data[table_name]["date"]).T

OR | FinancialData


Unnamed: 0,Y/E '19 31/12/2019,Y/E '20 31/12/2020,Q3 '21 30/09/2021
Assets,152176.21,144979.3,191619.26
Liabilities,113468.38,107063.34,94747.46
Equity,38707.83,37823.44,96818.87
Paid-up Capital,90000.0,90000.0,120000.0
Revenue,583208.75,432848.96,356406.09
Profit (Loss) from Other Activities,927.85,108.31,641.4
Net Profit,10895.81,8791.07,9120.62
EPS (Baht),1.21,0.98,0.79
ROA(%),9.59,8.09,9.54
ROE(%),28.15,22.97,17.45


In [9]:
header = table.find_all("thead")[1]
date = []
for head in header.find_all("th")[1:]:
    text = head.get_text(" ")
    if text != '':
        date.append(text)
date

['30/12/2021', '03/02/2022']

In [10]:
for i, row in enumerate(table.tbody.find_all("td")):
    if row.text == "Last Price(Baht)": 
        print(i, row)
        index = i
        break

59 <td height="15" style="text-align:left;">Last Price(Baht)</td>


In [11]:
rows = table.tbody.find_all("td")[index:]

table_name = "Statistics"
data[table_name] = {}
data[table_name]["date"] = date
data[table_name]["info"] = {}

for row in rows:
    attrs = row.attrs
    if attrs.get('style', None) == 'text-align:left;' and attrs.get('colspan', None) == '5':
        pass
    elif attrs.get('style', None) == 'text-align:left;':
        name = row.text 
        data[table_name]["info"][name] = []
    else:
        info = row.get_text(strip=True)
        if info != '':
            info = info.replace(',','')
            try:
                info = float(info)
            except:
                pass
            data[table_name]["info"][name].append(info)

In [12]:
data[table_name]

{'date': ['30/12/2021', '03/02/2022'],
 'info': {'Last Price(Baht)': [27.0, 24.5],
  'Market Cap.': [324000.0, 294000.0],
  'F/S Period (As of date)': ['30/09/2021', '30/09/2021'],
  'P/E': [26.9, 24.41],
  'P/BV': [3.35, 3.04],
  'Book Value per share (Baht)': [8.07, 8.07],
  'Dvd. Yield(%)': [0.36, 0.39]}}

In [13]:
table_name = "Statistics"
print(f"{symbol} | {table_name}")
pd.DataFrame(data[table_name]["info"], index=data[table_name]["date"]).T

OR | Statistics


Unnamed: 0,30/12/2021,03/02/2022
Last Price(Baht),27.0,24.5
Market Cap.,324000.0,294000.0
F/S Period (As of date),30/09/2021,30/09/2021
P/E,26.9,24.41
P/BV,3.35,3.04
Book Value per share (Baht),8.07,8.07
Dvd. Yield(%),0.36,0.39
