In [1]:
import os
import pandas as pd
from urllib import request
from bs4 import BeautifulSoup
from time import sleep, strftime
url = "https://www.set.or.th/en/market/product/stock/quote/"

input_file = 'name-ttl.csv'
current_dir = '../Daily'
input_dir = os.path.join(os.path.dirname(current_dir), 'Data')
output_dir = os.path.join(os.path.dirname(current_dir), 'Data')
file_in = os.path.join(input_dir, input_file)
file_out = os.path.join(output_dir, "price-hilo.csv")
print(file_in, file_out)

..\Data\name-ttl.csv ..\Data\price-hilo.csv


In [2]:
def get_stock_data(input_df):
    # Assuming the first row is header, use `usecols` parameter of read_csv method to skip it.
    input_df = pd.read_csv(file_in, header=None)
    input_df.columns = ['Name']
    
    output_columns = ['name', 'today_low', 'today_high', 'year_low', 'year_high']
    data = []
    for index, row in input_df.iterrows():
        name = str(row['Name']).upper()
        response = request.urlopen(url + name + '/price')
        html_page = BeautifulSoup(response, 'html.parser')
        
        values_low = html_page.find_all('span', class_='title-font-family fs-16px fw-bolder me-auto lh-1')
        today_low = values_low[0].text
        if (today_low == '-'):
            today_low = '0.00'
        
        values_high = html_page.find_all('span', class_='title-font-family fs-16px fw-bolder lh-1')
        today_high = values_high[0].text
        if (today_high == '-'):
            today_high = '0.00'         
        
        year_low = values_low[1].text        
        year_high = values_high[1].text        
        data.append([name,today_low,today_high,year_low,year_high]) 
        sleep(1)   # to avoid overloading the server and getting blocked
        print([name,today_low,today_high,year_low,year_high]) 
        sleep(5)
        
    output_df = pd.DataFrame(data, columns=output_columns)
    return output_df  # return the final dataframe with all values populated

In [3]:
start_time = strftime("%I:%M %p")
print('Start at: ', start_time)

Start at:  04:21 PM


In [4]:
df = pd.read_csv(file_in, header=None)
output_df = get_stock_data(df)

['ACE', '1.52', '1.56', '1.40', '2.68']
['ADVANC', '215.00', '217.00', '194.00', '231.00']
['AEONTS', '152.00', '154.00', '145.00', '208.00']
['AH', '29.00', '30.50', '25.50', '39.25']
['AIE', '1.36', '1.42', '1.16', '3.12']
['AIMIRT', '10.40', '10.60', '10.00', '12.50']
['AIT', '3.76', '3.82', '3.60', '6.85']
['AJ', '6.30', '6.60', '6.10', '13.60']
['AMATA', '22.20', '22.90', '19.40', '27.25']
['ANAN', '0.83', '0.85', '0.70', '1.49']
['AOT', '60.50', '61.25', '58.00', '75.50']
['AP', '10.90', '11.20', '10.10', '12.80']
['ASIAN', '6.80', '7.00', '5.95', '14.00']
['ASK', '19.50', '20.00', '19.20', '34.50']
['ASP', '2.80', '2.84', '2.68', '3.24']
['ASW', '8.20', '8.25', '7.65', '9.10']
['AWC', '3.84', '3.92', '3.40', '6.20']
['BA', '14.70', '15.00', '11.80', '17.60']
['BAM', '7.90', '8.15', '7.75', '16.50']
['BANPU', '6.40', '6.60', '6.25', '12.70']
['BAY', '26.75', '27.50', '26.75', '34.00']
['BBL', '143.50', '145.50', '144.00', '175.00']
['BCH', '22.30', '23.00', '16.60', '23.00']
['BC

['THG', '49.75', '52.25', '49.75', '72.00']
['TIDLOR', '21.30', '22.00', '18.60', '28.00']
['TIPH', '29.25', '30.50', '28.50', '53.75']
['TIPCO', '9.35', '9.40', '8.75', '12.60']
['TISCO', '97.25', '98.00', '89.50', '103.50']
['TK', '5.50', '5.55', '4.92', '8.90']
['TKN', '10.40', '10.80', '8.65', '14.40']
['TKS', '6.75', '6.95', '5.00', '13.90']
['TMT', '6.10', '6.15', '6.00', '8.45']
['TMW', '49.00', '50.50', '35.00', '53.25']
['TOA', '22.30', '22.80', '19.20', '35.00']
['TOP', '52.00', '53.00', '42.25', '60.25']
['TPIPL', '1.41', '1.43', '1.32', '1.87']
['TPIPP', '3.38', '3.42', '3.22', '3.54']
['TQM', '28.75', '29.50', '24.70', '44.00']
['TR', '0.00', '0.00', '41.25', '48.00']
['TRUE', '5.30', '5.50', '5.00', '9.00']
['TSE', '2.02', '2.12', '1.64', '2.64']
['TSTH', '0.81', '0.83', '0.77', '1.22']
['TTA', '6.25', '6.45', '4.74', '8.30']
['TTB', '1.76', '1.81', '1.29', '1.80']
['TTLPF', '17.90', '18.10', '17.90', '23.20']
['TTW', '9.30', '9.45', '8.15', '9.75']
['TU', '15.30', '15.70

In [5]:
output_df['today_low'] = output_df['today_low'].astype('float')
output_df['today_high'] = output_df['today_high'].astype('float')
output_df['year_low'] = output_df['year_low'].astype('float')
output_df['year_high'] = output_df['year_high'].astype('float')

In [6]:
end_time = strftime("%I:%M %p")
print('End at: ', end_time)

End at:  04:48 PM


In [7]:
output_df.query('today_low <= year_low')

Unnamed: 0,name,today_low,today_high,year_low,year_high
20,BAY,26.75,27.5,26.75,34.0
21,BBL,143.5,145.5,144.0,175.0
29,BEC,4.74,4.88,4.78,10.8
30,BEM,7.35,7.5,7.4,10.1
36,BLA,19.1,19.5,19.1,32.25
49,CPF,17.6,18.2,17.9,24.2
54,CRC,34.0,35.25,35.0,47.5
61,EA,38.0,39.75,39.0,91.25
63,ECL,1.39,1.41,1.39,2.16
82,HMPRO,10.6,10.8,10.7,15.1


In [8]:
output_df.query('today_low <= year_low').shape[0]

26

In [9]:
output_df.query('today_high >= year_high')

Unnamed: 0,name,today_low,today_high,year_low,year_high
22,BCH,22.3,23.0,16.6,23.0
210,TTB,1.76,1.81,1.29,1.8


In [10]:
output_df.query('today_high >= year_high').shape[0]

2

In [11]:
output_df.to_csv(file_out)  # write the output dataframe to a new CSV file