In [1]:
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter.ttk import Progressbar
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from PIL import Image
import pandas as pd
import time
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
import google.generativeai as genai
from requests.exceptions import HTTPError
import requests
import os
# Chrome setup with blocking unwanted URLs
def setup_driver():
    chrome_options = Options()
    #chrome_options.add_argument("--headless")  # Headless mode
    chrome_options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})
    driver = webdriver.Chrome(options=chrome_options)
    driver.execute_cdp_cmd('Network.setBlockedURLs', {'urls': ['*clientlib-devtool.js']})  # Block dev tools
    return driver

# CAPTCHA handling function with retry functionality
def handle_captcha(driver):
    try:
        driver.implicitly_wait(7)
        time.sleep(5)
        screenshot = driver.get_screenshot_as_png()
        with open("full_page_screenshot.png", "wb") as file:
            file.write(screenshot)

        image = Image.open("full_page_screenshot.png")
        width, height = image.size
        left, top = width * 0.50, height * 0.20
        right, bottom = left + width * 0.20, top + height * 0.28
        padding = 10
        region_image = image.crop((left - padding, top - padding, right + padding, bottom + padding))
        region_image.save('captcha_regionS.png')

        genai.configure(api_key="AIzaSyCp-6vZIB6rf1WYWMx8vO5mNAAnc90Yj_s")
        def solve_captcha(image_path):
            model = genai.GenerativeModel("gemini-1.5-flash")
            image = Image.open(image_path)
            response = model.generate_content(["Perform the math operation in the picture and give output of the math operation only", image])
            return response.text.strip()

        captcha_solution = solve_captcha("captcha_regionS.png")
        captcha_input = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//*[@id='customCaptchaInput']")))
        captcha_input.clear()
        captcha_input.send_keys(captcha_solution)        
        submit_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "check")))
        submit_button.click()
        print(f"CAPTCHA solved: {captcha_solution}")
    except Exception as e:
        print(f"Error handling CAPTCHA: {e}")

# Function to check if CAPTCHA is present
def check_for_captcha(driver):
    try:
        captcha_element = driver.find_element(By.XPATH, "//*[@id='customCaptchaInput']")
        if captcha_element.is_displayed():
            print("CAPTCHA detected. Solving CAPTCHA...")
            handle_captcha(driver)
            return True
        return False
    except:
        return False


# Function to scrape director details
def scrape_director_details(driver, uid):
    try:
        if check_for_captcha(driver):
            handle_captcha(driver)
        time.sleep(3)
        table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "content")))
        rows = driver.find_elements(By.XPATH, "//tbody[@id='content']/tr")

        director_data = []
        for row in rows:
            columns = row.find_elements(By.TAG_NAME, "td")
            director_data.append([col.text for col in columns])

        df = pd.DataFrame(director_data, columns=["Sr. No", "DIN/PAN", "Name", "Designation", "Date of Appointment", "Cessation Date", "Signatory"])
        df['CIN'] = uid
        return df

    except Exception as e:
        print(f"Error scraping director details for {uid}: {e}")
        return pd.DataFrame()

# Function to handle retrying clicks
def retry_click(driver, by_locator, max_attempts=3):
    for attempt in range(max_attempts):
        try:
            element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(by_locator))
            element.click()
            return True
        except Exception as e:
            print(f"Attempt {attempt + 1}: Error clicking element: {e}")
    return False

def search_master_data(driver, uid):
    try:
        driver.get("https://www.mca.gov.in/content/mca/global/en/mca/master-data/MDS.html")
        search_box = WebDriverWait(driver,12).until(EC.presence_of_element_located((By.XPATH, "//*[@id='masterdata-search-box']")))
        search_box.clear()
        search_box.send_keys(uid)
        search_box.send_keys(Keys.RETURN)
        if check_for_captcha(driver):
            handle_captcha(driver)

        # Retry clicking the company name if necessary
        company_name_locator = (By.XPATH, "//*[@id='fohomepage-037f88dfd8']/div/div/div[3]/div/div[1]/div[1]/div/table[1]/tbody/tr/td[2]")
        if retry_click(driver, company_name_locator):
            if check_for_captcha(driver):
                handle_captcha(driver)
            retry_click(driver, company_name_locator)

        # Click on the "Directory Data" button
        button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//*[@id='formId']/button[3]"))
        )
        
        # Verify that it's the correct button
        import logging

        logging.basicConfig(level=logging.DEBUG)
        logger = logging.getLogger(__name__)
        if "tablinks directorData" in button.get_attribute("class") and "Director/Signatory details" in button.text:
            logger.debug("Correct button found, attempting to click")
            button.click()
            logger.info("Successfully clicked the Director/Signatory details button")
        else:
            logger.warning("Button found, but it doesn't match expected attributes")
            # Attempt to find the button using other attributes
            button = driver.find_element(By.XPATH, "//button[contains(@class, 'tablinks directorData') and contains(text(), 'Director/Signatory details')]")
            button.click()
            logger.info("Clicked alternative button matching description")

        # Scrape the director details
        df = scrape_director_details(driver, uid)
        return df

    except Exception as e:
        print(f"Error processing UID {uid}: {e}")
        return pd.DataFrame()

# Function to handle website blocking with exponential backoff
def wait_if_blocked(attempt):
    wait_time = min(3600, 10 * (2 ** attempt))  # Max wait time of 1 hour
    print(f"Blocked. Waiting for {wait_time} seconds...")
    time.sleep(wait_time)

def process_in_parallel(cins, progress_bar, status_label, batch_size=5):
    all_data = pd.DataFrame()
    total_cins = len(cins)
    attempt = 0
    
    for i in range(0, total_cins, batch_size):
        batch = cins[i:i + batch_size]
        print(f"Processing batch {i // batch_size + 1} with {len(batch)} CINs.")
        with ThreadPoolExecutor(max_workers=batch_size) as executor:
            futures = {executor.submit(search_master_data, setup_driver(), uid): uid for uid in batch}
            for future in as_completed(futures):
                uid = futures[future]
                try:
                    df = future.result()
                    if not df.empty:
                        all_data = pd.concat([all_data, df], ignore_index=True)
                except HTTPError as e:
                    if e.response.status_code == 404:
                        print(f"Website blocked for UID {uid}.")
                        wait_if_blocked(attempt)
                        attempt += 1
                        continue
                    else:
                        print(f"HTTP error for UID {uid}: {e}")
                except Exception as e:
                    print(f"Error processing future for UID {uid}: {e}")
        progress = min(100, ((i + len(batch)) / total_cins) * 100)
        progress_bar['value'] = progress
        status_label.config(text=f"Processed {i + len(batch)} of {total_cins} CINs")
        window.update_idletasks()
    return all_data

def read_cin_from_excel(file_path):
    df = pd.read_excel(file_path)
    return df['CIN'].head(10).tolist()

def process_excel(input_file, output_file, progress_bar, status_label):
    cin_values = read_cin_from_excel(input_file)
    all_data = process_in_parallel(cin_values, progress_bar, status_label)
    all_data.to_excel(output_file, index=False)
    messagebox.showinfo("Success", f"All data saved to {output_file}")
    window.quit()  # Close the window after successful data scraping

def start_scraping_thread(input_file, output_file, progress_bar, status_label):
    thread = threading.Thread(target=process_excel, args=(input_file, output_file, progress_bar, status_label))
    thread.start()

def select_input_file():
    file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx")])
    input_entry.delete(0, tk.END)
    input_entry.insert(0, file_path)

def select_output_folder():
    folder_path = filedialog.askdirectory()
    output_entry.delete(0, tk.END)
    output_entry.insert(0, folder_path)

def on_submit():
    input_file = input_entry.get()
    output_folder = output_entry.get()
    
    if not input_file or not output_folder:
        messagebox.showerror("Error", "Please select both input file and output folder.")
        return
    
    output_file = f"{output_folder}/output_directors.xlsx"
    start_scraping_thread(input_file, output_file, progress_bar, status_label)

window = tk.Tk()
window.title("CIN Data Scraper")
window.geometry("400x300")

input_label = tk.Label(window, text="Select Input Excel File:")
input_label.pack(pady=5)
input_entry = tk.Entry(window, width=40)
input_entry.pack(pady=5)
input_button = tk.Button(window, text="Browse", command=select_input_file)
input_button.pack(pady=5)

output_label = tk.Label(window, text="Select Output Folder:")
output_label.pack(pady=5)
output_entry = tk.Entry(window, width=40)
output_entry.pack(pady=5)
output_button = tk.Button(window, text="Browse", command=select_output_folder)
output_button.pack(pady=5)

submit_button = tk.Button(window, text="Submit", command=on_submit)
submit_button.pack(pady=10)

progress_bar = Progressbar(window, length=300, mode='determinate')
progress_bar.pack(pady=10)

status_label = tk.Label(window, text="")
status_label.pack(pady=5)
window.mainloop()



Processing batch 1 with 5 CINs.
CAPTCHA detected. Solving CAPTCHA...
CAPTCHA detected. Solving CAPTCHA...
CAPTCHA detected. Solving CAPTCHA...
CAPTCHA detected. Solving CAPTCHA...
CAPTCHA detected. Solving CAPTCHA...
CAPTCHA solved: 167
CAPTCHA solved: I cannot see or analyze any image. Please provide an image.
CAPTCHA solved: 82
CAPTCHA solved: 29
CAPTCHA solved: 111
CAPTCHA solved: I cannot see or analyze any image. Please provide an image.
CAPTCHA solved: 167
CAPTCHA solved: 82
Attempt 1: Error clicking element: Message: 
Stacktrace:
0   chromedriver                        0x0000000105173ed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x000000010516c344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000104d80264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000104dc4514 cxxbridge1$string$len + 368708
4   chromedriver                        0x0000000104dfe7d4 cxxbridge1$string$len + 606980
5   chromedr

DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:56898/session/c1c2a6c6f3d8335868f8b4ccad5449a6/execute/sync {'script': '/* getAttribute */return (function(){return (function(){var d=this||self;function f(a,b){function c(...', 'args': [{'element-6066-11e4-a52e-4f735466cecf': 'f.8DA19CBAD9802663E073C5CFC7A3F4A2.d.1E1C63E7CCA39C96E1137FBF3AE8A02F.e.142'}, 'class']}
DEBUG:urllib3.connectionpool:http://localhost:56898 "POST /session/c1c2a6c6f3d8335868f8b4ccad5449a6/execute/sync HTTP/1.1" 200 33
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"tablinks directorData"} | headers=HTTPHeaderDict({'Content-Length': '33', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:56898/session/c1c2a6c6f3d8335868f8b4ccad5449a6/element/f.8DA19CBAD9802663E073C5CFC7A3F4A2.d.1

Attempt 3: Error clicking element: Message: 
Stacktrace:
0   chromedriver                        0x0000000100603ed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x00000001005fc344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000100210264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000100254514 cxxbridge1$string$len + 368708
4   chromedriver                        0x000000010028e7d4 cxxbridge1$string$len + 606980
5   chromedriver                        0x0000000100249134 cxxbridge1$string$len + 322660
6   chromedriver                        0x0000000100249d84 cxxbridge1$string$len + 325812
7   chromedriver                        0x00000001005cbf90 cxxbridge1$str$ptr + 1677160
8   chromedriver                        0x00000001005d08fc cxxbridge1$str$ptr + 1695956
9   chromedriver                        0x00000001005b14b8 cxxbridge1$str$ptr + 1567888
10  chromedriver                        0x00000001005d

DEBUG:urllib3.connectionpool:http://localhost:56930 "POST /session/75c07b20e5e96354f207bea552115e68/element HTTP/1.1" 404 1784
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=404 | data={"value":{"error":"no such element","message":"no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//*[@id='fohomepage-037f88dfd8']/div/div/div[3]/div/div[1]/div[1]/div/table[1]/tbody/tr/td[2]\"}\n  (Session info: chrome=129.0.6668.58)","stacktrace":"0   chromedriver                        0x0000000102e67ed4 cxxbridge1$str$ptr + 1906348\n1   chromedriver                        0x0000000102e60344 cxxbridge1$str$ptr + 1874716\n2   chromedriver                        0x0000000102a74264 cxxbridge1$string$len + 89492\n3   chromedriver                        0x0000000102ab8514 cxxbridge1$string$len + 368708\n4   chromedriver                        0x0000000102af27d4 cxxbridge1$string$len + 606980\n5   chromedriver                        0x0000000102aad134 

Attempt 2: Error clicking element: Message: 
Stacktrace:
0   chromedriver                        0x0000000102e67ed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000102e60344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000102a74264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000102ab8514 cxxbridge1$string$len + 368708
4   chromedriver                        0x0000000102af27d4 cxxbridge1$string$len + 606980
5   chromedriver                        0x0000000102aad134 cxxbridge1$string$len + 322660
6   chromedriver                        0x0000000102aadd84 cxxbridge1$string$len + 325812
7   chromedriver                        0x0000000102e2ff90 cxxbridge1$str$ptr + 1677160
8   chromedriver                        0x0000000102e348fc cxxbridge1$str$ptr + 1695956
9   chromedriver                        0x0000000102e154b8 cxxbridge1$str$ptr + 1567888
10  chromedriver                        0x0000000102e3

DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:56898/session/c1c2a6c6f3d8335868f8b4ccad5449a6/element {'using': 'css selector', 'value': '[id="content"]'}
DEBUG:urllib3.connectionpool:http://localhost:56898 "POST /session/c1c2a6c6f3d8335868f8b4ccad5449a6/element HTTP/1.1" 200 127
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":{"element-6066-11e4-a52e-4f735466cecf":"f.8DA19CBAD9802663E073C5CFC7A3F4A2.d.1E1C63E7CCA39C96E1137FBF3AE8A02F.e.144"}} | headers=HTTPHeaderDict({'Content-Length': '127', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:56898/session/c1c2a6c6f3d8335868f8b4ccad5449a6/elements {'using': 'xpath', 'value': "//tbody[@id='content']/tr"}
DEBUG:urllib3.connectionpool:http://localhost:56898 "POST /session/c1c2a6c6f3d8335868f8b4ccad5449

Attempt 3: Error clicking element: Message: 
Stacktrace:
0   chromedriver                        0x0000000102e67ed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000102e60344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000102a74264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000102ab8514 cxxbridge1$string$len + 368708
4   chromedriver                        0x0000000102af27d4 cxxbridge1$string$len + 606980
5   chromedriver                        0x0000000102aad134 cxxbridge1$string$len + 322660
6   chromedriver                        0x0000000102aadd84 cxxbridge1$string$len + 325812
7   chromedriver                        0x0000000102e2ff90 cxxbridge1$str$ptr + 1677160
8   chromedriver                        0x0000000102e348fc cxxbridge1$str$ptr + 1695956
9   chromedriver                        0x0000000102e154b8 cxxbridge1$str$ptr + 1567888
10  chromedriver                        0x0000000102e3

DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:56930/session/75c07b20e5e96354f207bea552115e68/element {'using': 'css selector', 'value': '[id="content"]'}
DEBUG:urllib3.connectionpool:http://localhost:56930 "POST /session/75c07b20e5e96354f207bea552115e68/element HTTP/1.1" 200 127
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":{"element-6066-11e4-a52e-4f735466cecf":"f.900AD01960417AD06A1A534FA1C8CEA8.d.B600619EDB1F64952E20A27F21B14C19.e.174"}} | headers=HTTPHeaderDict({'Content-Length': '127', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:56930/session/75c07b20e5e96354f207bea552115e68/elements {'using': 'xpath', 'value': "//tbody[@id='content']/tr"}
DEBUG:urllib3.connectionpool:http://localhost:56930 "POST /session/75c07b20e5e96354f207bea552115e

Processing batch 2 with 5 CINs.


DEBUG:urllib3.connectionpool:http://localhost:57764 "POST /session HTTP/1.1" 200 890
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":{"capabilities":{"acceptInsecureCerts":false,"browserName":"chrome","browserVersion":"129.0.6668.58","chrome":{"chromedriverVersion":"129.0.6668.58 (81a06fb873a9b386848719cf9f93e59579fb5d4b-refs/branch-heads/6668@{#1318})","userDataDir":"/var/folders/x4/f0yhttf57678gq_cpd4czrfm0000gn/T/.org.chromium.Chromium.gXVypf"},"fedcm:accounts":true,"goog:chromeOptions":{"debuggerAddress":"localhost:57771"},"networkConnectionEnabled":false,"pageLoadStrategy":"normal","platformName":"mac","proxy":{},"setWindowRect":true,"strictFileInteractability":false,"timeouts":{"implicit":0,"pageLoad":300000,"script":30000},"unhandledPromptBehavior":"dismiss and notify","webauthn:extension:credBlob":true,"webauthn:extension:largeBlob":true,"webauthn:extension:minPinLength":true,"webauthn:extension:prf":true,"webauthn:virtualAuthentica

CAPTCHA detected. Solving CAPTCHA...


DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57764/session/dea03db32adbbf8704f2dedc14d77d02/element {'using': 'xpath', 'value': "//*[@id='masterdata-search-box']"}
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57811/session/1cefbd5b971fb3ae20c2d0c48f2a73d6/element {'using': 'xpath', 'value': "//*[@id='masterdata-search-box']"}
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57779/session/9a2515b0512f71833a6c6b36a55e96e4/element {'using': 'xpath', 'value': "//*[@id='masterdata-search-box']"}
DEBUG:urllib3.connectionpool:http://localhost:57811 "POST /session/1cefbd5b971fb3ae20c2d0c48f2a73d6/element HTTP/1.1" 404 1723
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=404 | data={"value":{"error":"no such element","message":"no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//*[@id='masterdata-search-box']\"}\n  (Session info: chrome=129.0.6668.58)","stacktrace

CAPTCHA detected. Solving CAPTCHA...


DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57811/session/1cefbd5b971fb3ae20c2d0c48f2a73d6/element {'using': 'xpath', 'value': "//*[@id='masterdata-search-box']"}
DEBUG:urllib3.connectionpool:http://localhost:57811 "POST /session/1cefbd5b971fb3ae20c2d0c48f2a73d6/element HTTP/1.1" 404 1723
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=404 | data={"value":{"error":"no such element","message":"no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//*[@id='masterdata-search-box']\"}\n  (Session info: chrome=129.0.6668.58)","stacktrace":"0   chromedriver                        0x0000000100d5bed4 cxxbridge1$str$ptr + 1906348\n1   chromedriver                        0x0000000100d54344 cxxbridge1$str$ptr + 1874716\n2   chromedriver                        0x0000000100968264 cxxbridge1$string$len + 89492\n3   chromedriver                        0x00000001009ac514 cxxbridge1$string$len + 368708\n4   chromedriver    

Error processing UID U85499AP2024OPC115576: Message: 
Stacktrace:
0   chromedriver                        0x0000000102977ed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000102970344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000102584264 cxxbridge1$string$len + 89492
3   chromedriver                        0x00000001025c8514 cxxbridge1$string$len + 368708
4   chromedriver                        0x00000001026027d4 cxxbridge1$string$len + 606980
5   chromedriver                        0x00000001025bd134 cxxbridge1$string$len + 322660
6   chromedriver                        0x00000001025bdd84 cxxbridge1$string$len + 325812
7   chromedriver                        0x000000010293ff90 cxxbridge1$str$ptr + 1677160
8   chromedriver                        0x00000001029448fc cxxbridge1$str$ptr + 1695956
9   chromedriver                        0x00000001029254b8 cxxbridge1$str$ptr + 1567888
10  chromedriver                        0x000

DEBUG:urllib3.connectionpool:http://localhost:57795 "POST /session/4006d5ba4d75c016cdf69b25579a8fb6/element/f.5C9B06A48379D96CABD4BDA104025ACB.d.0E850E3AFE3308D7524CE0457AD2B381.e.79/value HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57795/session/4006d5ba4d75c016cdf69b25579a8fb6/element {'using': 'css selector', 'value': '[id="check"]'}
DEBUG:urllib3.connectionpool:http://localhost:57795 "POST /session/4006d5ba4d75c016cdf69b25579a8fb6/element HTTP/1.1" 200 127
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":{"element-6066-11e4-a52e-4f735466cecf":"f.5C9B06A48379D96CABD4BDA104025ACB.d.0E850E3AFE3308D7524CE

Error processing UID U62091PN2024PTC233256: Message: 
Stacktrace:
0   chromedriver                        0x000000010330fed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000103308344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000102f1c264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000102f60514 cxxbridge1$string$len + 368708
4   chromedriver                        0x0000000102f9a7d4 cxxbridge1$string$len + 606980
5   chromedriver                        0x0000000102f55134 cxxbridge1$string$len + 322660
6   chromedriver                        0x0000000102f55d84 cxxbridge1$string$len + 325812
7   chromedriver                        0x00000001032d7f90 cxxbridge1$str$ptr + 1677160
8   chromedriver                        0x00000001032dc8fc cxxbridge1$str$ptr + 1695956
9   chromedriver                        0x00000001032bd4b8 cxxbridge1$str$ptr + 1567888
10  chromedriver                        0x000

DEBUG:urllib3.connectionpool:http://localhost:57795 "POST /session/4006d5ba4d75c016cdf69b25579a8fb6/element/f.5C9B06A48379D96CABD4BDA104025ACB.d.0E850E3AFE3308D7524CE0457AD2B381.e.104/click HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57795/session/4006d5ba4d75c016cdf69b25579a8fb6/timeouts {'implicit': 7000}
DEBUG:urllib3.connectionpool:http://localhost:57795 "POST /session/4006d5ba4d75c016cdf69b25579a8fb6/timeouts HTTP/1.1" 200 14
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":null} | headers=HTTPHeaderDict({'Content-Length': '14', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-

CAPTCHA solved: 77


DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57811/session/1cefbd5b971fb3ae20c2d0c48f2a73d6/element {'using': 'xpath', 'value': "//*[@id='masterdata-search-box']"}
DEBUG:urllib3.connectionpool:http://localhost:57811 "POST /session/1cefbd5b971fb3ae20c2d0c48f2a73d6/element HTTP/1.1" 404 1723
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=404 | data={"value":{"error":"no such element","message":"no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//*[@id='masterdata-search-box']\"}\n  (Session info: chrome=129.0.6668.58)","stacktrace":"0   chromedriver                        0x0000000100d5bed4 cxxbridge1$str$ptr + 1906348\n1   chromedriver                        0x0000000100d54344 cxxbridge1$str$ptr + 1874716\n2   chromedriver                        0x0000000100968264 cxxbridge1$string$len + 89492\n3   chromedriver                        0x00000001009ac514 cxxbridge1$string$len + 368708\n4   chromedriver    

Error processing UID U41001KA2024PTC191608: Message: 
Stacktrace:
0   chromedriver                        0x0000000100d5bed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000100d54344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000100968264 cxxbridge1$string$len + 89492
3   chromedriver                        0x00000001009ac514 cxxbridge1$string$len + 368708
4   chromedriver                        0x00000001009e67d4 cxxbridge1$string$len + 606980
5   chromedriver                        0x00000001009a1134 cxxbridge1$string$len + 322660
6   chromedriver                        0x00000001009a1d84 cxxbridge1$string$len + 325812
7   chromedriver                        0x0000000100d23f90 cxxbridge1$str$ptr + 1677160
8   chromedriver                        0x0000000100d288fc cxxbridge1$str$ptr + 1695956
9   chromedriver                        0x0000000100d094b8 cxxbridge1$str$ptr + 1567888
10  chromedriver                        0x000

DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57829/session/092f9a0466cc6ce488a761c40eeee4c2/element {'using': 'xpath', 'value': "//*[@id='customCaptchaInput']"}
DEBUG:urllib3.connectionpool:http://localhost:57829 "POST /session/092f9a0466cc6ce488a761c40eeee4c2/element HTTP/1.1" 200 126
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":{"element-6066-11e4-a52e-4f735466cecf":"f.251101C0A7C6DC22833AF1E000F28BFB.d.B1174B24ADED0786CA9FF0F0EA54D52C.e.89"}} | headers=HTTPHeaderDict({'Content-Length': '126', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57829/session/092f9a0466cc6ce488a761c40eeee4c2/execute/sync {'script': '/* isDisplayed */return (function(){return (function(){var g=this||self;\nfunction aa(a){var b=typeof...', 'args': [{'element-6066-

CAPTCHA solved: 121


DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:57795/session/4006d5ba4d75c016cdf69b25579a8fb6/screenshot {}
DEBUG:urllib3.connectionpool:http://localhost:57795 "GET /session/4006d5ba4d75c016cdf69b25579a8fb6/screenshot HTTP/1.1" 200 218728
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"iVBORw0KGgoAAAANSUhEUgAACWAAAAUCCAIAAAABs18cAAABK2lDQ1BTa2lhAAAokX2QMUvDUBSFv1cKomYRFR0cMnbRppWmDdahqbXo2Cqk3dI0FLFNQxrRvas/wtlNcBGhs4uT4CTi4i4IrpXXDClIPNPHuQfuPRdSmwBpDQZeGDTqpmq12urCBwLBTLYz8kmWgJ/XKPuy/U8uSYtdd+QAX0AYWK02iC6w1ov4SnIn4mvJl6EfgriRHJw0qiDugUxvjjtz7PiBzL8B5UH/wonvRnG90yZgAVvUGTKkRx+XLE3OOcMmi0YNgxK71KhQoUCFHHlKGOgU0KhiUqRKkUN0SuTJcTBjA13+M1o5fof9yXQ6fYy94wnc6bD0EHuZPVhR4Ok59uIf+3Zgz6w0kHJN+F4H5RZWP2F5DGzIcUJX9U9XlSM8HHZQyaORQ/8FDJRN2vTWQQEAACAASURBVHic7N11XBRbGwfws0XD0qGkSAgSooIIYneiYrfe67W763rNa/e1Xuva3d0tJqIoEoKIdHftvn8cnLvuLrgsLb/vhz/GmdmZ4+7OnpnznPMcFs/AgwAAAAAAAAAAAAAAAABAzcCu7AIAAAAAAAAAAAAAAAAAQMVBgBAAAAAAAAAAAA

Error handling CAPTCHA: Message: 

CAPTCHA detected. Solving CAPTCHA...


DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57829/session/092f9a0466cc6ce488a761c40eeee4c2/element {'using': 'xpath', 'value': "//*[@id='customCaptchaInput']"}
DEBUG:urllib3.connectionpool:http://localhost:57829 "POST /session/092f9a0466cc6ce488a761c40eeee4c2/element HTTP/1.1" 200 126
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":{"element-6066-11e4-a52e-4f735466cecf":"f.251101C0A7C6DC22833AF1E000F28BFB.d.B1174B24ADED0786CA9FF0F0EA54D52C.e.89"}} | headers=HTTPHeaderDict({'Content-Length': '126', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57829/session/092f9a0466cc6ce488a761c40eeee4c2/execute/sync {'script': '/* isDisplayed */return (function(){return (function(){var g=this||self;\nfunction aa(a){var b=typeof...', 'args': [{'element-6066-

Error handling CAPTCHA: Message: 

CAPTCHA detected. Solving CAPTCHA...


DEBUG:selenium.webdriver.remote.remote_connection:GET http://localhost:57795/session/4006d5ba4d75c016cdf69b25579a8fb6/screenshot {}
DEBUG:urllib3.connectionpool:http://localhost:57795 "GET /session/4006d5ba4d75c016cdf69b25579a8fb6/screenshot HTTP/1.1" 200 340636
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":"iVBORw0KGgoAAAANSUhEUgAACWAAAAUCCAIAAAABs18cAAABK2lDQ1BTa2lhAAAokX2QMUvDUBSFv1cKomYRFR0cMnbRppWmDdahqbXo2Cqk3dI0FLFNQxrRvas/wtlNcBGhs4uT4CTi4i4IrpXXDClIPNPHuQfuPRdSmwBpDQZeGDTqpmq12urCBwLBTLYz8kmWgJ/XKPuy/U8uSYtdd+QAX0AYWK02iC6w1ov4SnIn4mvJl6EfgriRHJw0qiDugUxvjjtz7PiBzL8B5UH/wonvRnG90yZgAVvUGTKkRx+XLE3OOcMmi0YNgxK71KhQoUCFHHlKGOgU0KhiUqRKkUN0SuTJcTBjA13+M1o5fof9yXQ6fYy94wnc6bD0EHuZPVhR4Ok59uIf+3Zgz6w0kHJN+F4H5RZWP2F5DGzIcUJX9U9XlSM8HHZQyaORQ/8FDJRN2vTWQQEAACAASURBVHic7L3bbxtnlq9dVSweJB4sURIl0k5kO7YcS2Orh3R3kp7ZnQxmJoMA7A8bmA+5+Abou9Fg/iVd9s1g983ewBamgSCNSRo9OaBDpuVE7kRyy3GikJIoyrJEShTJKn4Xy1p+XScWz5T8e2CkqeJbb71VpCh1PfqtJS/9279LYIjJZD

CAPTCHA solved: 77


DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57829/session/092f9a0466cc6ce488a761c40eeee4c2/element {'using': 'xpath', 'value': "//*[@id='customCaptchaInput']"}
DEBUG:urllib3.connectionpool:http://localhost:57829 "POST /session/092f9a0466cc6ce488a761c40eeee4c2/element HTTP/1.1" 200 126
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":{"element-6066-11e4-a52e-4f735466cecf":"f.251101C0A7C6DC22833AF1E000F28BFB.d.B1174B24ADED0786CA9FF0F0EA54D52C.e.89"}} | headers=HTTPHeaderDict({'Content-Length': '126', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57829/session/092f9a0466cc6ce488a761c40eeee4c2/execute/sync {'script': '/* isDisplayed */return (function(){return (function(){var g=this||self;\nfunction aa(a){var b=typeof...', 'args': [{'element-6066-

CAPTCHA solved: 94


DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57795/session/4006d5ba4d75c016cdf69b25579a8fb6/element {'using': 'xpath', 'value': "//*[@id='customCaptchaInput']"}
DEBUG:urllib3.connectionpool:http://localhost:57795 "POST /session/4006d5ba4d75c016cdf69b25579a8fb6/element HTTP/1.1" 200 126
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":{"element-6066-11e4-a52e-4f735466cecf":"f.5C9B06A48379D96CABD4BDA104025ACB.d.0E850E3AFE3308D7524CE0457AD2B381.e.79"}} | headers=HTTPHeaderDict({'Content-Length': '126', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57795/session/4006d5ba4d75c016cdf69b25579a8fb6/execute/sync {'script': '/* isDisplayed */return (function(){return (function(){var g=this||self;\nfunction aa(a){var b=typeof...', 'args': [{'element-6066-

CAPTCHA solved: 77


DEBUG:urllib3.connectionpool:http://localhost:57795 "POST /session/4006d5ba4d75c016cdf69b25579a8fb6/element/f.5C9B06A48379D96CABD4BDA104025ACB.d.0E850E3AFE3308D7524CE0457AD2B381.e.107/click HTTP/1.1" 400 2317
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=400 | data={"value":{"error":"element click intercepted","message":"element click intercepted: Element \u003Ctd class=\"companyname\">...\u003C/td> is not clickable at point (388, 618). Other element would receive the click: \u003Cdiv id=\"captchaModal\" class=\"modal\" style=\"font-size: 16px;\">...\u003C/div>\n  (Session info: chrome=129.0.6668.58)","stacktrace":"0   chromedriver                        0x000000010471bed4 cxxbridge1$str$ptr + 1906348\n1   chromedriver                        0x0000000104714344 cxxbridge1$str$ptr + 1874716\n2   chromedriver                        0x0000000104328264 cxxbridge1$string$len + 89492\n3   chromedriver                        0x0000000104371cdc cxxbridge1$string$len 

Attempt 1: Error clicking element: Message: element click intercepted: Element <td class="companyname">...</td> is not clickable at point (388, 618). Other element would receive the click: <div id="captchaModal" class="modal" style="font-size: 16px;">...</div>
  (Session info: chrome=129.0.6668.58)
Stacktrace:
0   chromedriver                        0x000000010471bed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000104714344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000104328264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000104371cdc cxxbridge1$string$len + 391180
4   chromedriver                        0x0000000104370330 cxxbridge1$string$len + 384608
5   chromedriver                        0x000000010436e544 cxxbridge1$string$len + 376948
6   chromedriver                        0x000000010436db60 cxxbridge1$string$len + 374416
7   chromedriver                        0x000000010436303c cxxb

DEBUG:urllib3.connectionpool:http://localhost:57795 "POST /session/4006d5ba4d75c016cdf69b25579a8fb6/element/f.5C9B06A48379D96CABD4BDA104025ACB.d.0E850E3AFE3308D7524CE0457AD2B381.e.107/click HTTP/1.1" 400 2317
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=400 | data={"value":{"error":"element click intercepted","message":"element click intercepted: Element \u003Ctd class=\"companyname\">...\u003C/td> is not clickable at point (388, 618). Other element would receive the click: \u003Cdiv id=\"captchaModal\" class=\"modal\" style=\"font-size: 16px;\">...\u003C/div>\n  (Session info: chrome=129.0.6668.58)","stacktrace":"0   chromedriver                        0x000000010471bed4 cxxbridge1$str$ptr + 1906348\n1   chromedriver                        0x0000000104714344 cxxbridge1$str$ptr + 1874716\n2   chromedriver                        0x0000000104328264 cxxbridge1$string$len + 89492\n3   chromedriver                        0x0000000104371cdc cxxbridge1$string$len 

Attempt 2: Error clicking element: Message: element click intercepted: Element <td class="companyname">...</td> is not clickable at point (388, 618). Other element would receive the click: <div id="captchaModal" class="modal" style="font-size: 16px;">...</div>
  (Session info: chrome=129.0.6668.58)
Stacktrace:
0   chromedriver                        0x000000010471bed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000104714344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000104328264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000104371cdc cxxbridge1$string$len + 391180
4   chromedriver                        0x0000000104370330 cxxbridge1$string$len + 384608
5   chromedriver                        0x000000010436e544 cxxbridge1$string$len + 376948
6   chromedriver                        0x000000010436db60 cxxbridge1$string$len + 374416
7   chromedriver                        0x000000010436303c cxxb

DEBUG:urllib3.connectionpool:http://localhost:57795 "POST /session/4006d5ba4d75c016cdf69b25579a8fb6/element/f.5C9B06A48379D96CABD4BDA104025ACB.d.0E850E3AFE3308D7524CE0457AD2B381.e.107/click HTTP/1.1" 400 2317
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=400 | data={"value":{"error":"element click intercepted","message":"element click intercepted: Element \u003Ctd class=\"companyname\">...\u003C/td> is not clickable at point (388, 618). Other element would receive the click: \u003Cdiv id=\"captchaModal\" class=\"modal\" style=\"font-size: 16px;\">...\u003C/div>\n  (Session info: chrome=129.0.6668.58)","stacktrace":"0   chromedriver                        0x000000010471bed4 cxxbridge1$str$ptr + 1906348\n1   chromedriver                        0x0000000104714344 cxxbridge1$str$ptr + 1874716\n2   chromedriver                        0x0000000104328264 cxxbridge1$string$len + 89492\n3   chromedriver                        0x0000000104371cdc cxxbridge1$string$len 

Attempt 3: Error clicking element: Message: element click intercepted: Element <td class="companyname">...</td> is not clickable at point (388, 618). Other element would receive the click: <div id="captchaModal" class="modal" style="font-size: 16px;">...</div>
  (Session info: chrome=129.0.6668.58)
Stacktrace:
0   chromedriver                        0x000000010471bed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000104714344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000104328264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000104371cdc cxxbridge1$string$len + 391180
4   chromedriver                        0x0000000104370330 cxxbridge1$string$len + 384608
5   chromedriver                        0x000000010436e544 cxxbridge1$string$len + 376948
6   chromedriver                        0x000000010436db60 cxxbridge1$string$len + 374416
7   chromedriver                        0x000000010436303c cxxb

DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57829/session/092f9a0466cc6ce488a761c40eeee4c2/element {'using': 'xpath', 'value': "//*[@id='customCaptchaInput']"}
DEBUG:urllib3.connectionpool:http://localhost:57829 "POST /session/092f9a0466cc6ce488a761c40eeee4c2/element HTTP/1.1" 200 127
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":{"element-6066-11e4-a52e-4f735466cecf":"f.251101C0A7C6DC22833AF1E000F28BFB.d.0EA1BEAC60AA0A844FFEC20A33579446.e.157"}} | headers=HTTPHeaderDict({'Content-Length': '127', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57829/session/092f9a0466cc6ce488a761c40eeee4c2/execute/sync {'script': '/* isDisplayed */return (function(){return (function(){var g=this||self;\nfunction aa(a){var b=typeof...', 'args': [{'element-6066

Error handling CAPTCHA: Message: 



DEBUG:urllib3.connectionpool:http://localhost:57795 "POST /session/4006d5ba4d75c016cdf69b25579a8fb6/element HTTP/1.1" 404 1718
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=404 | data={"value":{"error":"no such element","message":"no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//*[@id='formId']/button[3]\"}\n  (Session info: chrome=129.0.6668.58)","stacktrace":"0   chromedriver                        0x000000010471bed4 cxxbridge1$str$ptr + 1906348\n1   chromedriver                        0x0000000104714344 cxxbridge1$str$ptr + 1874716\n2   chromedriver                        0x0000000104328264 cxxbridge1$string$len + 89492\n3   chromedriver                        0x000000010436c514 cxxbridge1$string$len + 368708\n4   chromedriver                        0x00000001043a67d4 cxxbridge1$string$len + 606980\n5   chromedriver                        0x0000000104361134 cxxbridge1$string$len + 322660\n6   chromedriver                  

Error processing UID U13996DL2024PTC434969: Message: 
Stacktrace:
0   chromedriver                        0x000000010471bed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000104714344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000104328264 cxxbridge1$string$len + 89492
3   chromedriver                        0x000000010436c514 cxxbridge1$string$len + 368708
4   chromedriver                        0x00000001043a67d4 cxxbridge1$string$len + 606980
5   chromedriver                        0x0000000104361134 cxxbridge1$string$len + 322660
6   chromedriver                        0x0000000104361d84 cxxbridge1$string$len + 325812
7   chromedriver                        0x00000001046e3f90 cxxbridge1$str$ptr + 1677160
8   chromedriver                        0x00000001046e88fc cxxbridge1$str$ptr + 1695956
9   chromedriver                        0x00000001046c94b8 cxxbridge1$str$ptr + 1567888
10  chromedriver                        0x000

DEBUG:urllib3.connectionpool:http://localhost:57829 "POST /session/092f9a0466cc6ce488a761c40eeee4c2/element HTTP/1.1" 404 1784
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=404 | data={"value":{"error":"no such element","message":"no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//*[@id='fohomepage-037f88dfd8']/div/div/div[3]/div/div[1]/div[1]/div/table[1]/tbody/tr/td[2]\"}\n  (Session info: chrome=129.0.6668.58)","stacktrace":"0   chromedriver                        0x0000000104f97ed4 cxxbridge1$str$ptr + 1906348\n1   chromedriver                        0x0000000104f90344 cxxbridge1$str$ptr + 1874716\n2   chromedriver                        0x0000000104ba4264 cxxbridge1$string$len + 89492\n3   chromedriver                        0x0000000104be8514 cxxbridge1$string$len + 368708\n4   chromedriver                        0x0000000104c227d4 cxxbridge1$string$len + 606980\n5   chromedriver                        0x0000000104bdd134 

Attempt 1: Error clicking element: Message: 
Stacktrace:
0   chromedriver                        0x0000000104f97ed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000104f90344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000104ba4264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000104be8514 cxxbridge1$string$len + 368708
4   chromedriver                        0x0000000104c227d4 cxxbridge1$string$len + 606980
5   chromedriver                        0x0000000104bdd134 cxxbridge1$string$len + 322660
6   chromedriver                        0x0000000104bddd84 cxxbridge1$string$len + 325812
7   chromedriver                        0x0000000104f5ff90 cxxbridge1$str$ptr + 1677160
8   chromedriver                        0x0000000104f648fc cxxbridge1$str$ptr + 1695956
9   chromedriver                        0x0000000104f454b8 cxxbridge1$str$ptr + 1567888
10  chromedriver                        0x0000000104f6

DEBUG:urllib3.connectionpool:http://localhost:57829 "POST /session/092f9a0466cc6ce488a761c40eeee4c2/element HTTP/1.1" 404 1784
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=404 | data={"value":{"error":"no such element","message":"no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//*[@id='fohomepage-037f88dfd8']/div/div/div[3]/div/div[1]/div[1]/div/table[1]/tbody/tr/td[2]\"}\n  (Session info: chrome=129.0.6668.58)","stacktrace":"0   chromedriver                        0x0000000104f97ed4 cxxbridge1$str$ptr + 1906348\n1   chromedriver                        0x0000000104f90344 cxxbridge1$str$ptr + 1874716\n2   chromedriver                        0x0000000104ba4264 cxxbridge1$string$len + 89492\n3   chromedriver                        0x0000000104be8514 cxxbridge1$string$len + 368708\n4   chromedriver                        0x0000000104c227d4 cxxbridge1$string$len + 606980\n5   chromedriver                        0x0000000104bdd134 

Attempt 2: Error clicking element: Message: 
Stacktrace:
0   chromedriver                        0x0000000104f97ed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000104f90344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000104ba4264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000104be8514 cxxbridge1$string$len + 368708
4   chromedriver                        0x0000000104c227d4 cxxbridge1$string$len + 606980
5   chromedriver                        0x0000000104bdd134 cxxbridge1$string$len + 322660
6   chromedriver                        0x0000000104bddd84 cxxbridge1$string$len + 325812
7   chromedriver                        0x0000000104f5ff90 cxxbridge1$str$ptr + 1677160
8   chromedriver                        0x0000000104f648fc cxxbridge1$str$ptr + 1695956
9   chromedriver                        0x0000000104f454b8 cxxbridge1$str$ptr + 1567888
10  chromedriver                        0x0000000104f6

DEBUG:urllib3.connectionpool:http://localhost:57829 "POST /session/092f9a0466cc6ce488a761c40eeee4c2/element HTTP/1.1" 404 1784
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=404 | data={"value":{"error":"no such element","message":"no such element: Unable to locate element: {\"method\":\"xpath\",\"selector\":\"//*[@id='fohomepage-037f88dfd8']/div/div/div[3]/div/div[1]/div[1]/div/table[1]/tbody/tr/td[2]\"}\n  (Session info: chrome=129.0.6668.58)","stacktrace":"0   chromedriver                        0x0000000104f97ed4 cxxbridge1$str$ptr + 1906348\n1   chromedriver                        0x0000000104f90344 cxxbridge1$str$ptr + 1874716\n2   chromedriver                        0x0000000104ba4264 cxxbridge1$string$len + 89492\n3   chromedriver                        0x0000000104be8514 cxxbridge1$string$len + 368708\n4   chromedriver                        0x0000000104c227d4 cxxbridge1$string$len + 606980\n5   chromedriver                        0x0000000104bdd134 

Attempt 3: Error clicking element: Message: 
Stacktrace:
0   chromedriver                        0x0000000104f97ed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000104f90344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000104ba4264 cxxbridge1$string$len + 89492
3   chromedriver                        0x0000000104be8514 cxxbridge1$string$len + 368708
4   chromedriver                        0x0000000104c227d4 cxxbridge1$string$len + 606980
5   chromedriver                        0x0000000104bdd134 cxxbridge1$string$len + 322660
6   chromedriver                        0x0000000104bddd84 cxxbridge1$string$len + 325812
7   chromedriver                        0x0000000104f5ff90 cxxbridge1$str$ptr + 1677160
8   chromedriver                        0x0000000104f648fc cxxbridge1$str$ptr + 1695956
9   chromedriver                        0x0000000104f454b8 cxxbridge1$str$ptr + 1567888
10  chromedriver                        0x0000000104f6

DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57829/session/092f9a0466cc6ce488a761c40eeee4c2/element {'using': 'css selector', 'value': '[id="content"]'}
DEBUG:urllib3.connectionpool:http://localhost:57829 "POST /session/092f9a0466cc6ce488a761c40eeee4c2/element HTTP/1.1" 200 127
DEBUG:selenium.webdriver.remote.remote_connection:Remote response: status=200 | data={"value":{"element-6066-11e4-a52e-4f735466cecf":"f.251101C0A7C6DC22833AF1E000F28BFB.d.0EA1BEAC60AA0A844FFEC20A33579446.e.172"}} | headers=HTTPHeaderDict({'Content-Length': '127', 'Content-Type': 'application/json; charset=utf-8', 'cache-control': 'no-cache'})
DEBUG:selenium.webdriver.remote.remote_connection:Finished Request
DEBUG:selenium.webdriver.remote.remote_connection:POST http://localhost:57829/session/092f9a0466cc6ce488a761c40eeee4c2/elements {'using': 'xpath', 'value': "//tbody[@id='content']/tr"}
DEBUG:urllib3.connectionpool:http://localhost:57829 "POST /session/092f9a0466cc6ce488a761c40eeee4

: 

In [None]:
<button class="tablinks directorData" onclick="tabAction(event, 'details')" style="font-size: 14px;"> Director/Signatory details </button>

In [None]:
<tr style="font-size: 16px;"><td style="font-size: 14px;">1</td><td style="font-size: 14px;"><span class="redirect-Text" onclick="directorClickHandler(&quot;02926317&quot;,&quot;DIN&quot;)" style="font-size: 14px;">02926317</span></td><td style="font-size: 14px;">RAMESH YUGRAJ KAVEDIYA</td><td style="font-size: 14px;">Director</td><td style="font-size: 14px;">02/08/2024</td><td style="font-size: 14px;">-</td><td style="font-size: 14px;">Yes</td></tr>

In [None]:
//*[@id="content"]/tr[1]

In [None]:
//*[@id="content"]

In [None]:
<td class="companyname"> ZEROCLUTTER LIFESTYLE PRIVATE LIMITED </td>
//*[@id="fohomepage-037f88dfd8"]/div/div/div[3]/div/div[1]/div[1]/div/table[1]/tbody/tr/td[2]

In [1]:
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter.ttk import Progressbar
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from PIL import Image
import pandas as pd
import time
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests.exceptions import HTTPError
import os
import google.generativeai as genai

# Chrome setup with blocking unwanted URLs
def setup_driver():
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Headless mode (uncomment for headless mode)
    chrome_options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})
    driver = webdriver.Chrome(options=chrome_options)
    driver.execute_cdp_cmd('Network.setBlockedURLs', {'urls': ['*clientlib-devtool.js']})  # Block dev tools
    return driver

# Function to check if CAPTCHA is present
def check_for_captcha(driver):
    try:
        captcha_element = driver.find_element(By.XPATH, "//*[@id='customCaptchaInput']")
        if captcha_element.is_displayed():
            print("CAPTCHA detected.")
            return True
        return False
    except:
        return False

# CAPTCHA handling function with retry functionality
def handle_captcha(driver):
    try:
        driver.implicitly_wait(7)
        time.sleep(5)
        screenshot = driver.get_screenshot_as_png()
        with open("full_page_screenshot.png", "wb") as file:
            file.write(screenshot)

        image = Image.open("full_page_screenshot.png")
        width, height = image.size
        left, top = width * 0.50, height * 0.20
        right, bottom = left + width * 0.20, top + height * 0.28
        padding = 10
        region_image = image.crop((left - padding, top - padding, right + padding, bottom + padding))
        region_image.save('captcha_regionS.png')

        genai.configure(api_key="AIzaSyCp-6vZIB6rf1WYWMx8vO5mNAAnc90Yj_s")
        def solve_captcha(image_path):
            model = genai.GenerativeModel("gemini-1.5-flash")
            image = Image.open(image_path)
            response = model.generate_content(["Perform the math operation in the picture and give output of the math operation only", image])
            return response.text.strip()

        captcha_solution = solve_captcha("captcha_regionS.png")
        captcha_input = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//*[@id='customCaptchaInput']")))
        captcha_input.clear()
        captcha_input.send_keys(captcha_solution)        
        submit_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "check")))
        submit_button.click()
        print(f"CAPTCHA solved: {captcha_solution}")
    except Exception as e:
        print(f"Error handling CAPTCHA: {e}")

# Function to scrape director details
def scrape_director_details(driver, uid):
    try:
        if check_for_captcha(driver):
            handle_captcha(driver)
        time.sleep(3)
        table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "content")))
        rows = driver.find_elements(By.XPATH, "//tbody[@id='content']/tr")

        director_data = []
        for row in rows:
            columns = row.find_elements(By.TAG_NAME, "td")
            director_data.append([col.text for col in columns])

        df = pd.DataFrame(director_data, columns=["Sr. No", "DIN/PAN", "Name", "Designation", "Date of Appointment", "Cessation Date", "Signatory"])
        df['CIN'] = uid
        return df

    except Exception as e:
        print(f"Error scraping director details for {uid}: {e}")
        return pd.DataFrame()

# Function to retry clicks
def retry_click(driver, by_locator, max_attempts=3):
    for attempt in range(max_attempts):
        try:
            element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(by_locator))
            element.click()
            return True
        except Exception as e:
            print(f"Attempt {attempt + 1}: Error clicking element: {e}")
    return False

def search_master_data(uid):
    driver = setup_driver()
    try:
        driver.get("https://www.mca.gov.in/content/mca/global/en/mca/master-data/MDS.html")
        driver.implicitly_wait(5)
        search_box = WebDriverWait(driver, 12).until(EC.presence_of_element_located((By.XPATH, "//*[@id='masterdata-search-box']")))
        search_box.clear()
        search_box.send_keys(uid)
        search_box.send_keys(Keys.RETURN)
        if check_for_captcha(driver):
            handle_captcha(driver)

        # Retry clicking the company name if necessary
        company_name_locator = (By.XPATH, "//*[@id='fohomepage-037f88dfd8']/div/div/div[3]/div/div[1]/div[1]/div/table[1]/tbody/tr/td[2]")
        if retry_click(driver, company_name_locator):
            if check_for_captcha(driver):
                handle_captcha(driver)
            retry_click(driver, company_name_locator)

        # Click on the "Directory Data" button
        button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//*[@id='formId']/button[3]"))
        )
        button.click()

        # Scrape the director details
        df = scrape_director_details(driver, uid)
        return df

    except Exception as e:
        print(f"Error processing UID {uid}: {e}")
        return pd.DataFrame()

    finally:
        driver.quit()  # Ensure the driver is closed after each thread execution

# Function to handle retrying clicks
def wait_if_blocked(attempt):
    wait_time = min(3600, 10 * (2 ** attempt))  # Max wait time of 1 hour
    print(f"Blocked. Waiting for {wait_time} seconds...")
    time.sleep(wait_time)

def process_in_parallel(cins, progress_bar, status_label, batch_size=5):
    all_data = pd.DataFrame()
    total_cins = len(cins)
    attempt = 0
    
    for i in range(0, total_cins, batch_size):
        batch = cins[i:i + batch_size]
        print(f"Processing batch {i // batch_size + 1} with {len(batch)} CINs.")
        with ThreadPoolExecutor(max_workers=batch_size) as executor:
            futures = {executor.submit(search_master_data, uid): uid for uid in batch}
            for future in as_completed(futures):
                uid = futures[future]
                try:
                    df = future.result()
                    if not df.empty:
                        all_data = pd.concat([all_data, df], ignore_index=True)
                except HTTPError as e:
                    if e.response.status_code == 404:
                        print(f"Website blocked for UID {uid}.")
                        wait_if_blocked(attempt)
                        attempt += 1
                        continue
                    else:
                        print(f"HTTP error for UID {uid}: {e}")
                except Exception as e:
                    print(f"Error processing future for UID {uid}: {e}")
        progress = min(100, ((i + len(batch)) / total_cins) * 100)
        progress_bar['value'] = progress
        status_label.config(text=f"Processed {i + len(batch)} of {total_cins} CINs")
        window.update_idletasks()
    return all_data

def read_cin_from_excel(file_path):
    df = pd.read_excel(file_path)
    return df['CIN'].head(10).tolist()

def process_excel(input_file, output_file, progress_bar, status_label):
    cin_values = read_cin_from_excel(input_file)
    all_data = process_in_parallel(cin_values, progress_bar, status_label)
    all_data.to_excel(output_file, index=False)
    messagebox.showinfo("Success", f"All data saved to {output_file}")
    window.quit()  # Close the window after successful data scraping

def start_scraping_thread(input_file, output_file, progress_bar, status_label):
    thread = threading.Thread(target=process_excel, args=(input_file, output_file, progress_bar, status_label))
    thread.start()

def select_input_file():
    file_path = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx")])
    input_entry.delete(0, tk.END)
    input_entry.insert(0, file_path)

def select_output_folder():
    folder_path = filedialog.askdirectory()
    output_entry.delete(0, tk.END)
    output_entry.insert(0, folder_path)

def on_submit():
    input_file = input_entry.get()
    output_folder = output_entry.get()
    
    if not input_file or not output_folder:
        messagebox.showerror("Error", "Please select both input file and output folder.")
        return
    
    output_file = f"{output_folder}/output_directors.xlsx"
    start_scraping_thread(input_file, output_file, progress_bar, status_label)

window = tk.Tk()
window.title("CIN Data Scraper")
window.geometry("400x300")

input_label = tk.Label(window, text="Select Input Excel File:")
input_label.pack(pady=5)
input_entry = tk.Entry(window, width=40)
input_entry.pack(pady=5)
input_button = tk.Button(window, text="Browse", command=select_input_file)
input_button.pack(pady=5)

output_label = tk.Label(window, text="Select Output Folder:")
output_label.pack(pady=5)
output_entry = tk.Entry(window, width=40)
output_entry.pack(pady=5)
output_button = tk.Button(window, text="Browse", command=select_output_folder)
output_button.pack(pady=5)

submit_button = tk.Button(window, text="Submit", command=on_submit)
submit_button.pack(pady=10)

progress_bar = Progressbar(window, length=300, mode='determinate')
progress_bar.pack(pady=10)

status_label = tk.Label(window, text="Ready")
status_label.pack(pady=10)

window.mainloop()



Processing batch 1 with 5 CINs.
CAPTCHA detected.
CAPTCHA detected.
CAPTCHA detected.CAPTCHA detected.

CAPTCHA detected.
CAPTCHA solved: 88
CAPTCHA detected.
CAPTCHA solved: 127
CAPTCHA detected.
CAPTCHA solved: 33
Attempt 1: Error clicking element: Message: element click intercepted: Element <td class="companyname">...</td> is not clickable at point (396, 614). Other element would receive the click: <div id="captchaModal" class="modal" style="font-size: 16px;">...</div>
  (Session info: chrome=129.0.6668.58)
Stacktrace:
0   chromedriver                        0x0000000102b87ed4 cxxbridge1$str$ptr + 1906348
1   chromedriver                        0x0000000102b80344 cxxbridge1$str$ptr + 1874716
2   chromedriver                        0x0000000102794264 cxxbridge1$string$len + 89492
3   chromedriver                        0x00000001027ddcdc cxxbridge1$string$len + 391180
4   chromedriver                        0x00000001027dc330 cxxbridge1$string$len + 384608
5   chromedriver           