In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
import time

In [4]:
# Set up Selenium WebDriver
options = webdriver.ChromeOptions()
# options.add_argument("--headless")  # Uncomment to run in background
driver = webdriver.Chrome(options=options)

In [5]:
# Open the website
url = "https://www.etenders.gov.za/Home/opportunities?id=2"
driver.get(url)

In [6]:
# Increase wait time
wait = WebDriverWait(driver, 20)

In [10]:
next_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(),'Next')]")))
next_button.click()
print("Clicked on 'Next' button successfully.")

Clicked on 'Next' button successfully.


In [14]:
close_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[contains(text(),'Close')]")))
close_button.click()
print("Clicked on 'Close' button successfully.")

TimeoutException: Message: 


In [16]:
# Wait until the table is present
wait.until(EC.presence_of_element_located((By.XPATH, "//table")))

<selenium.webdriver.remote.webelement.WebElement (session="ee90e14d6d9ed0f0e313529825887604", element="f.C15B3608994EBE17BE11204929875D42.d.77C6DCC6D2811A62FC514832F0380FAF.e.77")>

In [18]:
table = driver.find_element(By.XPATH, "//table")
driver.execute_script("arguments[0].scrollIntoView();", table)
time.sleep(10)  # Wait for data to load

In [20]:
# Define the start and end pages
start_page = 800  # Starting page
end_page = 1100    # Last page

In [28]:
# Navigate to the starting page
for _ in range(start_page - 1):
    try:
        # Re-locate the "Next" button to avoid stale element reference
        next_button = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.LINK_TEXT, "Next"))
        )
        if "disabled" in next_button.get_attribute("class"):
            print("Next button disabled before reaching start page.")
            break
        driver.execute_script("arguments[0].scrollIntoView();", next_button)
        time.sleep(1)

        # Re-locate before clicking to ensure fresh reference
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.LINK_TEXT, "Next"))
        )
        next_button.click()
        time.sleep(2)
    except Exception as e:
        print("Error navigating to start page:", e)
        break
time.sleep(5)

Error navigating to start page: Message: stale element reference: stale element not found in the current frame
  (Session info: chrome=133.0.6943.142); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Stacktrace:
0   chromedriver                        0x0000000102ebc2d4 cxxbridge1$str$ptr + 2739836
1   chromedriver                        0x0000000102eb4934 cxxbridge1$str$ptr + 2708700
2   chromedriver                        0x0000000102a15f90 cxxbridge1$string$len + 93360
3   chromedriver                        0x0000000102a1bc9c cxxbridge1$string$len + 117180
4   chromedriver                        0x0000000102a1dff8 cxxbridge1$string$len + 126232
5   chromedriver                        0x0000000102a9ec60 cxxbridge1$string$len + 653696
6   chromedriver                        0x0000000102a9de80 cxxbridge1$string$len + 650144
7   chromedriver                        0x0000000102a51060

In [32]:
# Store data
page_number = start_page
output_dir = "tenders_html"
os.makedirs(output_dir, exist_ok=True)

In [34]:
while page_number <= end_page:  
    try:
        # Re-fetch rows to prevent stale references
        rows = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "table.display tbody tr"))
        )

        for i in range(len(rows)):
            try:
                rows = driver.find_elements(By.CSS_SELECTOR, "table.display tbody tr")
                plus_button = WebDriverWait(driver, 10).until(
                    lambda drv: rows[i].find_element(By.CSS_SELECTOR, "td.details-control")
                )
                driver.execute_script("arguments[0].scrollIntoView();", plus_button)

                if plus_button.is_displayed() and plus_button.is_enabled():
                    plus_button.click()
                    WebDriverWait(driver, 20).until(
                        EC.presence_of_element_located((By.XPATH, "//table"))
                    )

            except Exception as e:
                print(f"Error extracting row {i}: {e}")
        
        # Save page content
        filename = f"page_{page_number}.html"
        with open(os.path.join(output_dir, filename), "w", encoding="utf-8") as f:
            f.write(driver.page_source)
        print(f"Saved: {filename}")

        # Handle pagination
        try:
            next_button = WebDriverWait(driver, 20).until(
                EC.element_to_be_clickable((By.LINK_TEXT, "Next"))
            )
            if "disabled" in next_button.get_attribute("class"):
                print("Next button disabled. Exiting pagination.")
                break
            driver.execute_script("arguments[0].scrollIntoView();", next_button)
            next_button.click()
            page_number += 1
            time.sleep(3)  # Allow page load time
        except Exception as e:
            print(f"Pagination error: {e}")
            break

    except Exception as e:
        print(f"Page load error: {e}")
        break


Error extracting row 1: Message: 
Stacktrace:
0   chromedriver                        0x0000000102ebc2d4 cxxbridge1$str$ptr + 2739836
1   chromedriver                        0x0000000102eb4934 cxxbridge1$str$ptr + 2708700
2   chromedriver                        0x0000000102a15f90 cxxbridge1$string$len + 93360
3   chromedriver                        0x0000000102a5cde4 cxxbridge1$string$len + 383748
4   chromedriver                        0x0000000102a5257c cxxbridge1$string$len + 340636
5   chromedriver                        0x0000000102a9de80 cxxbridge1$string$len + 650144
6   chromedriver                        0x0000000102a51060 cxxbridge1$string$len + 335232
7   chromedriver                        0x0000000102e84c38 cxxbridge1$str$ptr + 2512864
8   chromedriver                        0x0000000102e87f58 cxxbridge1$str$ptr + 2525952
9   chromedriver                        0x0000000102e6a578 cxxbridge1$str$ptr + 2404640
10  chromedriver                        0x0000000102e88818 cxxbri

KeyboardInterrupt: 

In [None]:
# Close browser
driver.quit()