In [None]:
import os
import tempfile
import time
import csv
from selenium import webdriver
from selenium.common.exceptions import TimeoutException, ElementClickInterceptedException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

# --- Setup Chrome Options ---
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1920,1080")
temp_user_data_dir = tempfile.mkdtemp()
chrome_options.add_argument(f"--user-data-dir={temp_user_data_dir}")
# --------------------------

# --- Initialize WebDriver ---
print(f"Using temporary user data dir: {temp_user_data_dir}")
driver = webdriver.Chrome(options=chrome_options)
print("WebDriver initialized.")
# ---------------------------

url = "http://www.scstrade.com/MarketStatistics/MS_HistoricalPrices.aspx"
all_data_list = [] # List to store data from all pages
header = [] # To store the header row

try:
    print(f"Navigating to {url}")
    driver.get(url)
    print("Page loaded.")

    # --- Placeholders - ACTION: Use the combination you tested ---
    company_search_term = "ENGRO"
    # Make sure this is the exact text from the dropdown you want
    company_to_select_text = "EFERT - Engro Fertilizers Ltd." # Use the one that worked
    # Use the date range that worked
    start_date = "01/07/2024" # DD/MM/YYYY
    end_date = "01/10/2025"   # DD/MM/YYYY
    # -------------------------------------------------------------

    wait = WebDriverWait(driver, 15)
    long_wait = WebDriverWait(driver, 60)

    print("Locating input fields...")
    company_name_input = wait.until(EC.presence_of_element_located((By.ID, "tags")))
    start_date_input = driver.find_element(By.ID, "date1")
    end_date_input = driver.find_element(By.ID, "date2")
    view_price_button_locator = (By.ID, "btn1")
    view_price_button_element = driver.find_element(*view_price_button_locator)
    print("Input fields located.")

    # --- Handle Autocomplete Selection ---
    print(f"Entering Company Search Term: {company_search_term}")
    company_name_input.clear()
    company_name_input.send_keys(company_search_term)

    autocomplete_dropdown_locator = (By.ID, "ui-id-1")
    company_option_xpath = f"//ul[@id='ui-id-1']/li[contains(., '{company_to_select_text}')]"
    company_option_locator = (By.XPATH, company_option_xpath)

    try:
        print(f"Waiting for autocomplete option: '{company_to_select_text}'")
        option_element = wait.until(EC.element_to_be_clickable(company_option_locator))
        print("Autocomplete option found and clickable. Clicking it...")
        option_element.click()
        print(f"Clicked '{company_to_select_text}' from autocomplete.")
        time.sleep(0.5)
    except TimeoutException:
        print(f"FATAL: Could not find or click '{company_to_select_text}' in autocomplete.")
        raise
    # --- END Autocomplete Handling ---

    print(f"Entering Start Date: {start_date}")
    driver.execute_script(f"arguments[0].value = '{start_date}';", start_date_input)

    print(f"Entering End Date: {end_date}")
    driver.execute_script(f"arguments[0].value = '{end_date}';", end_date_input)

    print("Waiting for 'View Price' button to be clickable...")
    wait.until(EC.element_to_be_clickable(view_price_button_locator))

    print("Attempting standard click on 'View Price' button...")
    try:
        view_price_button_element.click()
        print("Standard click successful.")
        print("Pausing briefly for request initiation...")
        time.sleep(2)
    except Exception as e:
         print(f"FATAL: Error during standard click: {e}")
         raise

    # --- Wait for the *first* page of results to load ---
    results_table_locator = (By.ID, "list")
    data_rows_locator = (By.CSS_SELECTOR, "#list > tbody > tr.jqgrow")
    no_records_locator = (By.XPATH, "//div[@id='pager_right']//div[contains(text(), 'No records to view')]")
    loading_indicator_locator = (By.ID, "load_list")

    # *** FIX: Header Locators and Wait ***
    header_row_locator = (By.CSS_SELECTOR, "#gview_list .ui-jqgrid-htable tr.ui-jqgrid-labels")
    # Locator for the DIVs inside the TH elements which contain the text
    header_cell_text_locator = (By.CSS_SELECTOR, "th div.ui-th-div")
    # Locator for the *first* header cell text div to wait for visibility
    first_header_cell_text_locator = (By.CSS_SELECTOR, "#gview_list .ui-jqgrid-htable th:first-child div.ui-th-div")
    # *** END FIX ***

    print(f"Waiting for first page: loading indicator to disappear... (Timeout: 60s)")
    try:
        long_wait.until(EC.invisibility_of_element_located(loading_indicator_locator))
        print("Loading indicator disappeared for first page.")

        try:
             driver.find_element(*no_records_locator)
             print("Initial search returned 'No records to view'. Exiting.")
        except NoSuchElementException:
             print("Initial data potentially found. Proceeding to scrape.")

             # *** FIX: Wait for header visibility and extract ***
             try:
                print("Waiting for header row to be visible...")
                # Wait for the first header cell text to be visible specifically
                wait.until(EC.visibility_of_element_located(first_header_cell_text_locator))
                print("Header row visible. Extracting header...")
                # Find the header row element
                header_row_element = driver.find_element(*header_row_locator)
                # Find the text divs within that row
                header_cells = header_row_element.find_elements(*header_cell_text_locator)
                header = [cell.text.strip() for cell in header_cells if cell.text.strip()] # Get text from divs
                print("\nHeader:", header)
                if not header:
                     print("ERROR: Header extraction resulted in an empty list. Check selectors/timing.")
                     # Decide if you want to raise an error or try to proceed without header
                     # raise ValueError("Header extraction failed")
                elif len(header) != 7: # Assuming 7 columns based on data logs
                     print(f"WARN: Expected 7 header columns, but found {len(header)}. Header: {header}")


             except TimeoutException:
                print("FATAL: Timed out waiting for header to become visible.")
                raise
             except Exception as head_ex:
                print(f"FATAL: Error during header extraction: {head_ex}")
                raise
             # *** END FIX ***

             # --- Pagination Loop ---
             page_num = 1
             while True:
                 print(f"--- Scraping Page {page_num} ---")
                 time.sleep(0.5)
                 current_rows = driver.find_elements(*data_rows_locator)
                 print(f"Found {len(current_rows)} rows on page {page_num}.")

                 if not current_rows and page_num > 1:
                     print("No more rows found, might be end of pagination.")
                     break

                 for row in current_rows:
                     cols = row.find_elements(By.TAG_NAME, "td")
                     data = [col.text.strip() for col in cols]
                     # *** FIX: Use the extracted header length for comparison ***
                     if header and data and len(data) == len(header):
                         all_data_list.append(dict(zip(header, data)))
                         # print("Data:", data)
                     elif header and data:
                         print(f"WARN: Row data length mismatch on page {page_num}. Header len: {len(header)}, Row len: {len(data)}. Data: {data}")
                     elif not header and data:
                         print(f"WARN: Cannot create dictionary, header extraction failed. Row data: {data}")
                         # Optionally append raw list if header fails but you still want data
                         # all_data_list.append(data)


                 # --- Check for Next Page ---
                 next_button_locator = (By.ID, "next_pager")
                 try:
                     next_button = driver.find_element(*next_button_locator)
                     button_classes = next_button.get_attribute("class")
                     if "ui-state-disabled" in button_classes or "ui-disabled" in button_classes:
                         print("Next button is disabled. Reached the last page.")
                         break
                     else:
                         print("Clicking Next Page button...")
                         # Add slight scroll before click attempt
                         try:
                             driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
                             time.sleep(0.2)
                         except Exception: pass # Ignore if scroll fails

                         next_button.click()
                         print("Waiting for next page to load...")
                         wait.until(EC.invisibility_of_element_located(loading_indicator_locator))
                         print("Next page loaded.")
                         page_num += 1
                         time.sleep(1)

                 except NoSuchElementException:
                     print("Could not find the Next button. Assuming end of pagination.")
                     break
                 except Exception as page_e:
                     print(f"Error during pagination click/wait: {page_e}")
                     # Consider adding a screenshot here too if pagination fails mid-way
                     break
             # --- End Pagination Loop ---

    except TimeoutException:
        print("FATAL: Timed out waiting for the initial page results to load.")
        raise

except Exception as e:
    print(f"\n--- An error occurred ---")
    print(f"Error type: {type(e).__name__}")
    print(f"Error message: {e}")
    # ... (screenshot/source saving) ...
    try:
        screenshot_path = "error_screenshot.png"
        if 'driver' in locals() and driver:
            try:
                source_path = "error_page_source.html"
                with open(source_path, "w", encoding="utf-8") as f:
                    f.write(driver.page_source)
                print(f"Page source saved to {source_path}")
            except Exception as source_err:
                print(f"Could not save page source: {source_err}")
            driver.save_screenshot(screenshot_path)
            print(f"Screenshot saved to {screenshot_path}")
        else:
            print("Driver not available to save screenshot/source.")
    except Exception as screen_err:
        print(f"Could not save screenshot/source: {screen_err}")


finally:
    # ... (finally block) ...
    print("\nClosing browser...")
    if 'driver' in locals() and driver:
        driver.quit()
        print("Browser closed.")
    try:
        if 'temp_user_data_dir' in locals() and os.path.exists(temp_user_data_dir):
            import shutil
            print(f"Attempting to remove temporary user data dir: {temp_user_data_dir}")
            # shutil.rmtree(temp_user_data_dir)
            print("Temporary directory removal skipped (commented out).")
    except Exception as cleanup_error:
        print(f"Error cleaning up temp directory: {cleanup_error}")

# --- Post-processing: Save all data to CSV ---
# *** FIX: Check header is not empty BEFORE trying to write ***
if all_data_list and header: # Ensure header was successfully extracted
    output_filename = f"{company_to_select_text.replace(' ', '_').replace('.', '')}_history_{start_date.replace('/', '-')}_to_{end_date.replace('/', '-')}.csv"
    print(f"\nSaving {len(all_data_list)} records to {output_filename}...")
    try:
        with open(output_filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=header)
            writer.writeheader()
            writer.writerows(all_data_list)
        print("Data saved successfully.")
    except Exception as csv_err:
        print(f"Error saving data to CSV: {csv_err}")
elif not header and all_data_list:
     print("\nWARNING: Data was scraped but header extraction failed. Cannot save CSV correctly.")
elif not all_data_list:
     print("\nNo data was scraped. CSV file not created.")
else:
     print("\nUnknown state: Header might be empty or data list empty. CSV not created.")