In [27]:
import json
from tqdm.auto import tqdm
import requests
from bs4 import BeautifulSoup
import csv

In [32]:
def download_soups(urls: list):
    """
    Download soups from a given list of URLs
    """
    soups = []
    for url in tqdm(urls):
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
        else:
            print(url)
        soups.append(soup)
    return soups


def get_doctor_details(doctor_soup: BeautifulSoup):
    """
    Get a profile from a given doctor soup.
    See https://www.siphhospital.com/th/medical-services/find-doctor as an example.
    Each doctor's information can be found in a gray box.
    """
    name = doctor_soup.find('div', class_='card-text-name-doctor').get_text(strip=True)
    image_src = doctor_soup.find('img', class_='circular--square')['src']
    url = doctor_soup.find('a', class_='card-btn-view-data-doctor')['href']
    try:
        tablecheck = doctor_soup.find('ul', class_='text-table-day').get_text()
        tablecheck = " ".join(tablecheck.strip().split())
    except:
        tablecheck = ""
    return {
        "name": name,
        "image_src": image_src,
        "url": url,
        "table_check": tablecheck,
    }


def get_profile_details(doctor_url: BeautifulSoup):
    """
    Get an additional details from a scraped doctor URL.
    This include qualification and expertise.
    """
    response = requests.get(doctor_url)
    details_soup = BeautifulSoup(response.text, 'html.parser')
    try:
        qualification = details_soup.find("div", class_="layout-column-one layout-column-editable").get_text(strip=True)
    except:
        qualification = ""
    try:
        expertise = details_soup.find("div", class_="doctor-qualification-content").get_text(strip=True)
    except:
        expertise = ""
    return qualification, expertise



if __name__ == "__main__":
    urls = ["https://www.siphhospital.com/th/medical-services/find-doctor"] + [
        f"https://www.siphhospital.com/th/medical-services/find-doctor?page={i}"
        for i in range(2, 70)
    ]
    soups = download_soups(urls)
    
    x = True
    doctor_details = []
    for soup in tqdm(soups):
        # find profile information
        divs = soup.find_all("div", class_='box-bg-gray text-center')
        for div in divs:
            doctor_detail = get_doctor_details(div)
            #qual, expertise = get_profile_details(doctor_detail["url"])
            #doctor_detail["qualification"] = qual
            #doctor_detail["expertise"] = expertise
            #doctor_details.append(doctor_detail)
            
            #write CSV file
            file = open('export_data.csv', 'a', newline='', encoding='utf-8')
            writer = csv.writer(file)
                #write header
            if x:
                headers = (list(doctor_detail.keys()))
                writer.writerow(headers)
                x = False
                
            writer.writerow(list(doctor_detail.values()))
            file.close()

  0%|          | 0/69 [00:00<?, ?it/s]

  0%|          | 0/69 [00:00<?, ?it/s]

In [34]:
import pandas as pd

data = pd.read_csv('export_data.csv')

In [35]:
data

Unnamed: 0,name,image_src,url,table_check
0,รศ. พญ.กติกา นวพันธุ์,https://siph-space.sgp1.digitaloceanspaces.com...,https://www.siphhospital.com/th/medical-servic...,"เสาร์ 14:00 - 17:00 น. สัปดาห์ที่ 1,2,3,4,5"
1,รศ. พญ.กนกรัตน์ พรพาณิชย์,https://siph-space.sgp1.digitaloceanspaces.com...,https://www.siphhospital.com/th/medical-servic...,"พุธ 08:00 - 11:15 น. สัปดาห์ที่ 1,2,3,4,5"
2,อ. พญ.กนกรัตน์ สุวรรณสิทธิ์,https://siph-space.sgp1.digitaloceanspaces.com...,https://www.siphhospital.com/th/medical-servic...,"พฤหัสบดี 13:30 - 17:00 น. สัปดาห์ที่ 1,2,3,4,5..."
3,ทพญ.กนกอร ตันติพาณิชย์กูล,https://www.siphhospital.com/assets/img/no_pro...,https://www.siphhospital.com/th/medical-servic...,"พุธ 16:30 - 19:30 น. สัปดาห์ที่ 3,4"
4,ผศ. นพ.กมล เผือกเพ็ชร,https://siph-space.sgp1.digitaloceanspaces.com...,https://www.siphhospital.com/th/medical-servic...,"พุธ 16:00 - 18:00 น. สัปดาห์ที่ 1,3,5 อาทิตย์ ..."
...,...,...,...,...
616,อ. พญ.อรสุดา เลิศบรรณพงษ์,https://siph-space.sgp1.digitaloceanspaces.com...,https://www.siphhospital.com/th/medical-servic...,
617,รศ. พญ.อริศรา สุวรรณกูล,https://siph-space.sgp1.digitaloceanspaces.com...,https://www.siphhospital.com/th/medical-servic...,"พฤหัสบดี 09:30 - 11:00 น. สัปดาห์ที่ 1,2,3,4,5..."
618,ผศ. พญ.อวยพร เค้าสมบัติวัฒนา,https://siph-space.sgp1.digitaloceanspaces.com...,https://www.siphhospital.com/th/medical-servic...,"พฤหัสบดี 16:00 - 19:00 น. สัปดาห์ที่ 1,2,3,4,5"
619,รศ. นพ.อัคคพงษ์ นิติสิงห์,https://siph-space.sgp1.digitaloceanspaces.com...,https://www.siphhospital.com/th/medical-servic...,"จันทร์ 16:00 - 18:00 น. สัปดาห์ที่ 1,2,3,4,5 พ..."
