In [41]:
import logging
import os
import time
import urllib.request
from logging.handlers import RotatingFileHandler
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.alert import Alert
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import (ElementClickInterceptedException, StaleElementReferenceException)

In [91]:
# Constants
MAX_ATTEMPTS = 3
WAIT_TIMEOUT = 5

In [40]:
destination = ["username", "tax_name", "tax_year", "tax_month"]
destination = "/".join(destination)
print(destination)

user_folder = os.path.join("C:/Users/Se_sA/Downloads", destination)
os.makedirs(user_folder, exist_ok=True)
print(user_folder)

filename = f"test_file_name"
downloaded_location = os.path.join(user_folder, filename)

print(downloaded_location)

# urllib.request.urlretrieve("https://efiling.rd.go.th/rd-cit-edge-printform-service/common/download/complete-form/P530009902879/2567-25670209-25LFGLTVNIOJRG6ZDM.pdf/TAX_FORM_P530009902879.pdf", downloaded_location)


username/tax_name/tax_year/tax_month
C:/Users/Se_sA/Downloads\username/tax_name/tax_year/tax_month
C:/Users/Se_sA/Downloads\username/tax_name/tax_year/tax_month\test_file_name


In [42]:
def setup_debug_logging():
    # Create logger
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # Create console handler and set level to INFO
    # console_handler = logging.StreamHandler()
    # console_handler.setLevel(logging.INFO)

    # Create file handler and set level to DEBUG
    file_handler = logging.FileHandler('activitylog.txt', mode='w')  # Change mode to 'w'
    file_handler.setLevel(logging.INFO)

    # Create formatter
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

    # Add formatter to console handler and file handler
    # console_handler.setFormatter(formatter)
    file_handler.setFormatter(formatter)

    # Add console handler and file handler to logger
    # logger.addHandler(console_handler)
    logger.addHandler(file_handler)


In [93]:
# Setup debug log file
setup_debug_logging()

def retry_function(func, *args, **kwargs):
    """Retry function with a maximum number of attempts."""
    for _ in range(MAX_ATTEMPTS):
        try:
            return func(*args, **kwargs)
        except Exception as e:
            logging.warning(f"Encountered exception: {e}, retrying...")
    raise Exception("Function failed after multiple attempts")
    
def find_element_with_retry(driver, locator):
    """Find element matching the locator with a retry mechanism."""
    return retry_function(WebDriverWait(driver, WAIT_TIMEOUT).until, EC.visibility_of_element_located(locator))

def find_all_elements_with_retry(driver, locator):
    """Find all elements matching the locator with a retry mechanism."""
    return retry_function(WebDriverWait(driver, WAIT_TIMEOUT).until, EC.visibility_of_all_elements_located(locator))

def find_clickable_with_retry(driver, locator):
    """Find a clickable element matching the locator with a retry mechanism."""
    return retry_function(WebDriverWait(driver, WAIT_TIMEOUT).until, EC.element_to_be_clickable(locator))

def click_element_with_retry(driver, element, fallback_locator=None):
    """
    Click on an element with a retry mechanism.

    Args:
        driver: Selenium WebDriver instance.
        element: Element to click.
        fallback_locator: Locator to use as a fallback if the original element is stale.

    Returns:
        None
    """
    for attempt in range(1, MAX_ATTEMPTS + 1):
        try:
            logging.info(f'Attempt {attempt}: Clicking on {element}')
            element.click()
            logging.info(f'Click successful on attempt {attempt}')
            return
        except ElementClickInterceptedException as intercepted_e:
            logging.warning(f"Attempt {attempt}: Click failed due to intercepted element: {intercepted_e}")
            logging.warning(f"Attempt {attempt}: Trying clicking button with JavaScript on element: {element}")
            try:
                driver.execute_script("arguments[0].click();", element)
                logging.info(f"Attempt {attempt}: Success clicking button with JavaScript")
                return
            except Exception as js_e:
                logging.warning(f"Attempt {attempt}: Failed clicking button with JavaScript: {e}")
                continue  # Retry finding elements
        except StaleElementReferenceException as stale_e:
            logging.warning(f"Click failed due to stale element: {stale_e}")
            if fallback_locator is None:
                logging.warning(f"No fallback locator set, skip trying")
                continue
            try:
                logging.warning(f"Attempt {attempt}: Trying locating an element")
                element = WebDriverWait(driver, WAIT_TIMEOUT).until(EC.element_to_be_clickable(fallback_locator))
                logging.info("Trying clicking a newly located element")
                click_element_with_retry(driver, element)
                logging.info(f"Attempt {attempt}: Success waiting for element to be clickable and clicking")
                return
            except Exception as click_e:
                logging.warning(f"Attempt {attempt}: Cannot click on an element: {click_e}")
                continue  # Retry finding elements
        except NoSuchElementException as no_ele_e:
            logging.warning(f"Click failed due to no element: {no_ele_e}")
            logging.info("Trying to locate element after element presented")
            element = WebDriverWait(driver, WAIT_TIMEOUT).until(EC.visibility_of_element_located(fallback_locator))
            try:
                logging.info("Trying clicking a newly located element")
                click_element_with_retry(driver, element)
                logging.info(f"Attempt {attempt}: Success waiting for element to be clickable and clicking")
            except Exception as click_e:
                logging.warning(f"Attempt {attempt}: Cannot click on an element: {click_e}")
                continue  # Retry finding elements
        except Exception as e:
            logging.error(f"Attempt {attempt}: Waiting for element to be clickable and clicking failed: {e}")
            move_element_to_viewport(driver, element)
            continue  # Retry finding elements
    raise RuntimeError(f"Failed to click element after {MAX_ATTEMPTS} attempts")

def is_element_in_viewport(driver, element):
    """
    Check if an element is within the viewport.

    Args:
        driver: Selenium WebDriver instance.
        element: WebElement.

    Returns:
        bool: True if element is in the viewport, False otherwise.
    """
    for _ in range(MAX_ATTEMPTS):  # Retry up to 3 times
        try:
            logging.info("Try moving to element with Selenium")
            ActionChains(driver).move_to_element(element).perform()
            logging.info("Successful moving to element")
            return element.is_displayed()
        except Exception as e:
            logging.warning("Cannot move to element by Selenium, try using JavaScript...")
            try:
                logging.info("Try moving to element with JavaScript")
                return driver.execute_script("""
                    var elem = arguments[0];
                    var bounding = elem.getBoundingClientRect();
                    return (
                        bounding.top >= 0 &&
                        bounding.left >= 0 &&
                        bounding.bottom <= (window.innerHeight || document.documentElement.clientHeight) &&
                        bounding.right <= (window.innerWidth || document.documentElement.clientWidth)
                    );
                """, element)
            except Exception as js_e:
                logging.warning(f"Encountered exception: {js_e}, retrying...")
                continue  # Retry checking element visibility
    logging.error("Failed to check element visibility after multiple attempts")
    return False  # Return False if unable to determine visibility

def move_element_to_viewport(driver, element):
    """
    Move the element to the viewport if it's not already there.

    Args:
        driver: Selenium WebDriver instance.
        element: WebElement.

    Returns:
        None
    """
    if not is_element_in_viewport(driver, element):
        try:
            logging.info("Try moving to element with Selenium")
            ActionChains(driver).move_to_element(element).perform()
            logging.info("Successful moving to element to viewport")
            return True
        except Exception as sel_e:
            logging.error("Cannot scroll to the element using Selenium", sel_e)
            try:
                logging.info("Try moving to element with JavaScript")
                driver.execute_script("arguments[0].scrollIntoView(true);", element)
                logging.info("Successful moving to element to viewport")
            except Exception as js_e:
                logging.error("Cannot scroll to the element using JavaScript", js_e)

def login(username, password, login_url):
    """
    Login to the website.

    Args:
        username: Username for login.
        password: Password for login.
        login_url: URL for login page.

    Returns:
        WebDriver instance after successful login.
    """
    logging.info("Logging in...")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.get(login_url)
    try:
        username_field = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'username')))
        username_field.send_keys(username)
        password_field = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'passwordField')))
        password_field.send_keys(password)
        password_field.send_keys(Keys.RETURN)
        WebDriverWait(driver, 10).until(EC.title_is('ยื่นแบบ'))
        logging.info("Login successful")
        return driver
    except Exception as e:
        logging.error("An error occurred during login: %s", e)
        return None

def navigate_to_pdf_page(driver):
    logging.info("Navigating to all tax form page...")
    driver.get('https://efiling.rd.go.th/rd-efiling-web/form-status')

def open_filter_panel(driver):
    logging.info("Opening filter panel...")
    try:
        filter_button = find_element_with_retry(driver, (By.XPATH, "//div[@class='collapsed' and @aria-expanded='true']"))
        click_element_with_retry(driver, filter_button)
    except Exception as e:
        logging.error(f"Failed to open filter panel: {e}")

def select_dropdown_item(driver, form, select_item):
    logging.info(f"Selecting '{select_item}' from dropdown menu...")
    try:
        dropdown_button = find_element_with_retry(driver, (By.CSS_SELECTOR, f"ng-select[formcontrolname='{form}']"))
        click_element_with_retry(driver, dropdown_button)
    except Exception as e:
        logging.error(f"Failed to open dropdown menu: {e}")

    try:
        select_item_button = find_element_with_retry(driver, (By.XPATH, f"//span[@class='ng-option-label ng-star-inserted' and contains(text(), '{select_item}')]"))
        click_element_with_retry(driver, select_item_button)
    except Exception as e:
        logging.error(f"Failed to select item from dropdown: {e}")

def input_item(driver, form, input_item):
    logging.info(f"Inputting '{input_item}' into form...")
    try:
        input_element = find_element_with_retry(driver, (By.XPATH, f"//input[@formcontrolname='{form}']"))
        input_element.send_keys(input_item)
    except Exception as e:
        logging.error(f"Failed to input item: {e}")

def fill_form(driver, filter_form):
    logging.info("Filling filter form...")
    for filter in filter_form:
        form_type = filter['type']
        form = filter['form']
        input_item = filter['item']

        if input_item is None:
            continue

        if form_type == 'dropdown':
            select_dropdown_item(driver, form, input_item)
        elif form_type == 'input':
            input_item(driver, form, input_item)
        else:
            continue
    
    # Click search button
    try:
        search_button = find_element_with_retry(driver, (By.XPATH, "//button[@type='submit']"))
        click_element_with_retry(driver, search_button)
    except Exception as e:
        logging.error(f"Failed to click search button: {e}")

def get_file_name(driver, filter_form):
    """
    Constructing a file name with the URL.

    Args:
        driver: Selenium WebDriver instance.
        filter_form: Dictionary containing filter information.

    Returns:
        str: File name.
    """
    logging.info("Finding and downloading PDF...")
    tax_name = filter_form[0]['item']
    tax_year = filter_form[1]['item']
    tax_month = filter_form[2]['item']

    # Dictionary mapping Thai month abbreviations to English month names
    thai_to_eng_month = {
        "ม.ค.": "1",
        "ก.พ.": "2",
        "มี.ค.": "3",
        "เม.ย.": "4",
        "พ.ค.": "5",
        "มิ.ย.": "6",
        "ก.ค.": "7",
        "ส.ค.": "8",
        "ก.ย.": "9",
        "ต.ค.": "10",
        "พ.ย.": "11",
        "ธ.ค.": "12"
    }

    # Convert variables to strings before joining
    tax_name = str(tax_name)
    tax_year = str(tax_year)
    tax_month = str(tax_month)

    # Check if the provided tax_month is in the list of prohibited names
    if tax_month in thai_to_eng_month:
        logging.info("Converting Thai's month into Eng")
        tax_month = thai_to_eng_month[tax_month]
    else:
        logging.warning("Provided tax month is not a valid Thai month abbreviation.")

    try:
        current_url = driver.current_url
        url_extr = current_url.split('/')[-1]  # Extract filename from URL
    except Exception as e:
        logging.warning("Cannot extract name from URL or URL not found")
        url_extr = ""

    logging.info("Creating file name")
    filename = f"{tax_year}_{tax_month}_{tax_name}_{url_extr}"

    return filename

def download_pdf(driver, download_directory, filename=None):
    """
    Download PDF file into the designated folder.

    Args:
        driver: Selenium WebDriver instance.
        download_directory: Directory to save the downloaded PDF file.
        filename: Name of the downloaded file.

    Returns:
        None
    """
    attempt = 0
    while attempt < MAX_ATTEMPTS:
        logging.info(f"Attemping download PDF on {attempt + 1} attempt")
        current_url = driver.current_url

        if filename is None:
            filename = "download_file.pdf"

        try:
            logging.info(f"Creating destination directory at {download_directory}")
            os.makedirs(download_directory, exist_ok=True)
        except Exception as e:
            logging.warning(f"Failed to create destination directory (Attempt {attempt + 1}): {e}")
            attempt += 1
            continue  # Try another attempt

        try:
            logging.info("Join file name with OS path")
            saved_directory = os.path.join(download_directory, filename)
        except Exception as e:
            logging.warning("Error, filename incorrect")
            attempt += 1
            continue  # Try another attempt

        try:
            logging.info("Retrieve PDF URL for downloading")
            urllib.request.urlretrieve(current_url, saved_directory)
            logging.info(f"PDF downloaded successfully to: {saved_directory}")
            return  # Exit the function after successful download
        except Exception as e:
            logging.warning(f"Failed to download PDF: {e}")
            attempt += 1
            continue  # Try another attempt

        attempt += 1

    logging.error("Failed to download PDF after multiple attempts")

def find_and_download_pdf(driver, filter_form, username, download_directory):
    """Find and download PDF."""
    logging.info("Finding and downloading PDF...")
    tax_name = filter_form[0]['item']
    tax_year = filter_form[1]['item']
    tax_month = filter_form[2]['item']

    try:
        logging.info("Formatting destination path")
        destination = [username, tax_year, tax_month]
        destination = "/".join(destination)
    except Exception as e:
        logging.error("Cannot formatting directory path:", e)
        break
    
    try:
        download_directory = os.path.join(download_directory, destination)
    except Exception as e:
        logging.error("Cannot joi destination folder:", e)
        break
    
    last_clicked_index = 0
            
    while True:
        
        try:
            button_elements = find_all_elements_with_retry(driver, (By.XPATH, '//button[@aria-controls="dropdown-basic" and @id="button-basic"]'))
            time.sleep(2)
        except Exception as e:
            logging.error("Failed to find dropdown button: %s", e)
            break

        if last_clicked_index + 1 > len(button_elements):
            break

        try:
            click_element_with_retry(driver, button_elements[last_clicked_index], fallback_locator=(By.XPATH, '//button[@aria-controls="dropdown-basic" and @id="button-basic"]'))
        except Exception as e:
            logging.error("Failed to click on dropdown menu", e)
            break
        
        try:
            dropdown_menu = find_clickable_with_retry(driver, (By.XPATH, '//a[@class="dropdown-item" and contains(text(), "พิมพ์ภาพแบบ/ภาพใบเสร็จ")]'))
        except Exception as e:
            logging.error("Failed to find dropdown item: %s", e)
            break

        try:
            click_element_with_retry(driver, dropdown_menu)
        except Exception as e:
            logging.error("Failed to click on dropdown item", e)
            break

        try:
            download_buttons = find_all_elements_with_retry(driver, (By.XPATH, '//button[contains(text(), "ดาวน์โหลด")]'))
        except Exception as e:
            logging.error("Failed to find download buttons: %s", e)
            break

        for download_button in download_buttons:

            try:
                click_element_with_retry(driver, download_button)
            except Exception as e:
                logging.error("Failed to click on download button", e)
                break

            try:
                logging.info("Switching to new tab")
                tabs = driver.window_handles
                driver.switch_to.window(tabs[-1])
            except Exception as e:
                logging.error("Error switching to new tab: %s", e)
                break
                
            try:
                logging.info("Joining destination to filename")
                filename = os.path.join(download_directory, get_file_name(driver, filter_form))
            except Exception as e:
            
            try:
                download_pdf(driver, download_directory, filename=filename)
            except Exception as e:
                logging.error("Error downloading PDF: %s", e)
                break

            try:
                driver.close()
            except Exception as e:
                logging.error("Error closing tab: %s", e)
                break

            try:
                driver.switch_to.window(tabs[0])
            except Exception as e:
                logging.error("Error switching to original tab: %s", e)
                break

        try:
            close_button = find_clickable_with_retry(driver, (By.XPATH, '//button[contains(@class, "btn button-box button-box-close-modal") and contains(text(), "ปิด")]'))
        except Exception as e:
            logging.error("Failed to find close button: %s", e)
            break

        try:
            click_element_with_retry(driver, close_button)
        except Exception as e:
            logging.error("Failed to click on close button")
            break

        logging.info(f"Current button click counting: {last_clicked_index}")
        last_clicked_index += 1   

def switch_to_next_page(driver):
    """Switch to the next page in the same URL."""
    logging.info("Switching to next page...")
    try:
        next_page_button = find_clickable_with_retry(driver, (By.XPATH, '//li[@title="หน้าถัดไป"]'))   
    except Exception as e:
        logging.error("Failed to switch to next page: %s", e)
        return False

    try:
        if "disabled" not in next_page_button.get_attribute("class"):
            logging.info(f'Next page class containing: {next_page_button.get_attribute("class")}')
            click_element_with_retry(driver, next_page_button)
            return True
        else:
            logging.info("No more pages to switch to")
            return False
    except Exception as e:
        logging.error("Failed to click next page: %s", e)
        return False

def logout(driver):
    """Logout from the site."""
    logging.info("Logging out...")
    driver.quit()

IndentationError: expected an indented block after 'except' statement on line 418 (2829277638.py, line 420)

In [89]:
# Main controller
def login_and_download_all_pdfs(username, password, login_url, filter_form, download_directory):
    driver = login(username, password, login_url)
    try:
        navigate_to_pdf_page(driver)
        
        # Open filter panel
        open_filter_panel(driver)

        # Fill filter form
        fill_form(driver, filter_form)

        # Wait for page to load
        time.sleep(2)
            
        # Download pdfs from every items shown in the page
        while True:
            find_and_download_pdf(driver, filter_form, username, download_directory)
            if (not switch_to_next_page(driver)):
                break

        time.sleep(5)

    finally:
        logout(driver)

In [90]:
# List of accounts with their respective credentials and specific page URLs
accounts = [
    {'username': '0105563074322', 'password': 'ASsumpt6666'},
    # {'username': '0115563009598', 'password': 'ASsumpt6666'},
    # Add more accounts as needed
]

DEFAULT_DOWNLOAD_DIRECTORY = "C:/Users/Se_sA/Downloads/EFillingController"

login_url = "https://efiling.rd.go.th/rd-efiling-web/login"

filter_form = [
    {'form': 'taxForm', 'item': 'ภ.ง.ด.1', 'type': 'dropdown'},
    {'form': 'taxYear', 'item': '2566', 'type': 'dropdown'},
    {'form': 'taxMonth', 'item': None, 'type': 'dropdown'},
    {'form': 'nid', 'item': None, 'type': 'input'},
    {'form': 'fullName', 'item': None, 'type': 'input'},
    {'form': 'refNo', 'item': None, 'type': 'input'},
    {'form': 'taxformStatus', 'item': None, 'type': 'dropdown'},
]

for account in accounts:
    login_and_download_all_pdfs(account['username'], account['password'], login_url, filter_form, DEFAULT_DOWNLOAD_DIRECTORY)

--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Se_sA\AppData\Local\Temp\ipykernel_25248\190936215.py", line 152, in click_element_with_retry
    element.click()
  File "C:\Users\Se_sA\anaconda3\envs\dl_env\lib\site-packages\selenium\webdriver\remote\webelement.py", line 94, in click
    self._execute(Command.CLICK_ELEMENT)
  File "C:\Users\Se_sA\anaconda3\envs\dl_env\lib\site-packages\selenium\webdriver\remote\webelement.py", line 395, in _execute
    return self._parent.execute(command, params)
  File "C:\Users\Se_sA\anaconda3\envs\dl_env\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 347, in execute
    self.error_handler.check_response(response)
  File "C:\Users\Se_sA\anaconda3\envs\dl_env\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 229, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: No node with given id found
  (S