In [1]:
from classes.string_modify import StringModification
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common.exceptions import ElementClickInterceptedException, WebDriverException
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException, TimeoutException
from typing import  Dict, List
import os
import time

# install pip install python-dotenv
from dotenv import load_dotenv
load_dotenv(dotenv_path = "D:\Comparison_scrape\env_variables\.env")

# get Store list
from utils.stores import STORES

url = os.getenv("URL")
email = os.getenv("EMAIL")
password = os.getenv("PASSWORD")
location = os.getenv("LOCATION")

In [2]:
def crawl_credentials(driver) -> None:
    """Login credentials"""
    driver.find_element(By.XPATH, "//input[@type='email']").send_keys(email)
    driver.find_element(By.XPATH, "//input[@type='password']").send_keys(password)
    driver.find_element(By.XPATH, "//button[@class='btn login-btn']").click()

    driver.find_element(By.XPATH, "//input[@type='password']").send_keys(location)
    driver.find_element(By.XPATH, "//input[@type='submit']").click()

def crawl_click_date(driver):
    wait = WebDriverWait(driver, 50)

    try:
        driver.find_element(By.XPATH, "//input[@class='dropdownInput selector-filter-input']").click()
        driver.find_element(By.XPATH, "//td[@data-filter='year']").click()
        driver.find_element(By.XPATH, "//div[@title='this year']").click()

        time.sleep(8)
        click_total = wait.until(EC.presence_of_element_located((By.XPATH, "//a[@id='totallinkneg60']")))
        click_total.click()
    except ElementClickInterceptedException as e:
        raise Exception(
            "Clickable button not found"
        ) from e
    
def check_inclusion_of_element(driver) -> Dict:
    target_element = driver.find_elements(By.XPATH, "//a[@class='dottedlink']")

    store_and_links = {}
    for store_target in target_element:
        if store_target.text in STORES:
            store_and_links[store_target.text] = store_target.get_attribute("href")

    return store_and_links

def is_stale(element, driver):
    try:
        # Check for staleness using JavaScript
        driver.execute_script("return arguments[0] !== document.activeElement", element)
        return False
    except (StaleElementReferenceException, WebDriverException):
        return True


def get_table_details(driver, links: dict) -> List[List[str]]:
    result = []
    target = []

    for link in links.values():
        driver.get(link)
        wait = WebDriverWait(driver, 15)
        time.sleep(2)
        try:
            while True:
                web_elements = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//tr[contains(@id, 'row')]")))
                
                try:
                    # Try to find the validator element
                    validator = wait.until(EC.presence_of_element_located((By.XPATH, "(//span[@class='content']/span)[2]")))
                    target.append(validator.text)
                except TimeoutException:
                    # Handle the absence of the validator element here (if needed)
                    target.append("No validator element")

                data_text = [data.text for data in web_elements]
                result.append(data_text)

                next_button_click = driver.find_element(By.XPATH, "//a[@aria-label='Next']")
                next_button_click.click()

                if not validator.text.strip():
                    break

                # Wait for the next page to load before proceeding
                wait.until(lambda driver: is_stale(web_elements[0], driver))
            
        except NoSuchElementException as e:
            print(f"Exception found: {e}")
            pass
        except StaleElementReferenceException as err:
            print(f"Exception found: {err}")
            pass
        
    return result, target

In [None]:
def main(url):
    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.get(url)

    crawl_credentials(driver)
    crawl_click_date(driver)
    stores_with_links = check_inclusion_of_element(driver)
    my_data = get_table_details(driver, stores_with_links)
    driver.quit()

    return my_data

final_data, next_labels = main(url)

In [4]:
datas = StringModification(final_data)
result = datas.modify()

In [None]:
with open("store_details.txt", "w") as file:
    for item in result:
        file.write(item + "\n")

In [5]:
# for element in result:
#     x = element.split()
#     if len(x) > 17:
#         print(x)

In [7]:
# def get_table_details(driver, links: dict) -> List[List[str]]:
#     result = []
#     target = []

#     for link in links.values():
#         driver.get(link)
#         wait = WebDriverWait(driver, 10)
#         try:
#             web_elements = driver.find_elements(By.XPATH, "//tr[contains(@id, 'row')]")
#             my_elements = driver.find_element(By.XPATH, "(//span[@class='content']/span)[2]")
#             data_text = [data.text for data in web_elements]
#             result.append(data_text)
#             target.append(my_elements.text)
            
#             while True:
#                 next_button = driver.find_element(By.XPATH, "//a[@aria-label='Next']")
#                 next_button.click()
#                 web_elements2 = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//tr[contains(@id, 'row')]")))
#                 data_text = [data.text for data in web_elements2]
#                 result.append(data_text)

#                 my_elements = driver.find_element(By.XPATH, "(//span[@class='content']/span)[2]")

#                 target.append(my_elements.text)

#                 if my_elements.text.strip() == "":
#                     break
            
#         except NoSuchElementException as e:
#             print(f"Exception found: {e}")
#             pass
#         except StaleElementReferenceException as err:
#             print(f"Exception found: {err}")
#             pass
        
#     return result, target