In [None]:
# Initialization
from selenium import webdriver
from selenium.webdriver.common.by import By as Enum_By
import time

# Constants
URL = 'https://web.pcc.gov.tw/prkms/tender/common/bulletion/indexBulletion'

# Handshake
def handshake() -> None:
    drv = webdriver.Chrome()

    # Test 1: Does the target website exist?
    drv.get(URL)
    assert '政府電子採購網-全文檢索' in drv.title

    # Test 2: Does the search box exist?
    search = drv.find_element(Enum_By.ID, 'dep')
    assert (search.tag_name == 'input') & (search.get_attribute('name') == 'querySentence') & (search.get_attribute('type') == 'text')

    # Test 3: Do the tender status checkboxes exist?
    tender_cat = drv.find_element(Enum_By.ID, 'scop1')
    assert (tender_cat.tag_name == 'input') & (tender_cat.get_attribute('name') == 'tenderStatusType') & (tender_cat.get_attribute('type') == 'checkbox')

    # Test 4: Do the time range options exist?
    time = drv.find_element(Enum_By.ID, 'level_113')
    assert (time.tag_name == 'input') & (time.get_attribute('value') == '113') & (time.get_attribute('type') == 'radio')

    # Test 5: Does the query button exist?
    query = drv.find_element(Enum_By.CSS_SELECTOR, 'tbody > tr > td > div > a[onclick][title=查詢]')

    del search, tender_cat, time, query
    drv.quit()

handshake()

In [None]:
def query_keyword(drv: webdriver.Chrome, q: str) -> None:
    search = drv.find_element(Enum_By.ID, 'dep')
    assert (search.tag_name == 'input') & (search.get_attribute('name') == 'querySentence') & (search.get_attribute('type') == 'text')
    search.clear()
    search.send_keys(q)

def select_tender_status(drv: webdriver.Chrome) -> None:
    tender = drv.find_element(Enum_By.ID, 'scop1')
    assert (tender.tag_name == 'input') & (tender.get_attribute('type') == 'checkbox') & (tender.get_attribute('value') == '招標')
    if (not tender.is_selected()):
        label = drv.find_element(Enum_By.CSS_SELECTOR, f'#scop1 + label[for=scop1]')
        label.click()
        assert tender.is_selected()

    award = drv.find_element(Enum_By.ID, 'scop2')
    assert (award.tag_name == 'input') & (award.get_attribute('type') == 'checkbox') & (award.get_attribute('value') == '決標')
    if (not award.is_selected()):
        label = drv.find_element(Enum_By.CSS_SELECTOR, f'#scop2 + label[for=scop2]')
        label.click()
        assert award.is_selected()

def select_year(drv: webdriver.Chrome, year: int) -> None:
    text = f'level_{year}'
    label = drv.find_element(Enum_By.CSS_SELECTOR, f'#{text} + label[for={text}]')
    label.click()
    radio = drv.find_element(Enum_By.ID, text)
    assert radio.is_selected()

def start_query(drv: webdriver.Chrome):
    query = drv.find_element(Enum_By.CSS_SELECTOR, 'tbody > tr > td > div > a[onclick][title=查詢]')
    query.click()

driver = webdriver.Chrome()
driver.get(URL)
query_keyword(driver, '台灣電力')
select_tender_status(driver)
select_year(driver, 100)
start_query(driver)
time.sleep(0.5)
driver.quit()

In [None]:
from dataclasses import asdict
import pandas as pd
from taipowerCrawler import handshake, get_records

# Make sure the public tender website is accessible.
handshake()

# Get tender records.
records = get_records('台灣電力')
df = pd.DataFrame([asdict(r) for r in records])
display(df)