In [8]:
import csv
import logging
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed

logging.basicConfig(level=logging.INFO)

def is_valid_url(url):
    """Check if the URL is valid."""
    try:
        result = urlparse(url)
        return all([result.scheme, result.netloc])
    except ValueError:
        return False

def configure_browser():
    options = Options()
    options.add_argument("--headless")  # Enable headless mode
    options.add_argument("--disable-gpu")  # Disable GPU hardware acceleration
    options.add_argument("--window-size=1920x1080")  # Set window size
    options.add_argument("--disable-extensions")  # Disable extensions
    options.add_argument("--no-sandbox")  # Disable the sandbox for Chrome's renderer
    options.add_argument("--disable-images")  # Disable images
    options.page_load_strategy = 'eager'  # Use 'eager' page load strategy
    return webdriver.Chrome(options=options)

def find_google_oidc_on(website_url):
    logging.info(f"Processing website: {website_url}")
    driver = configure_browser()

    if not is_valid_url(website_url):
        logging.warning(f"Invalid URL: {website_url}")
        return website_url, False, "N/A"

    driver.get(website_url)
    logging.info("Website loaded")

    # Accept cookies
    cookie_buttons = [
        "Accept all & visit the site", "Accept", "Agree", "Close", "Ignore", 
        "Agree & Continue", "Agree & Close", "AGREE & CLOSE", "CONSENT", 
        "Consent", "ACCEPT","AGREE","Accept all cookies","Accept All Cookies","Allow","ALLOW","OK","Ok","I accept","I Accept","I ACCEPT"
    ]
    for button_text in cookie_buttons:
        try:
            accept_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, "//button[contains(., '" + button_text + "')] | //a[contains(., '" + button_text + "')]"))
            )
            accept_button.click()
            break
        except (NoSuchElementException, TimeoutException, ElementClickInterceptedException):
            continue


    # Look for login section
    login_section = ["Login", "Log in", "Sign in", "Sign In", "Log In", "LOGON", "SIGN IN", "LOGIN", "LOG IN", "Login/Register", "Account", "My Account"]
    login_found = False
    for login_pat in login_section:
        try:
            login_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, f"//*[contains(text(), '{login_pat}') and (self::a or self::button)]"))
            )
            login_button.click()
            login_found = True
            logging.info(f"Login section found and clicked: {login_pat}")
            break
        except TimeoutException:
            logging.info(f"Login section not found: {login_pat}")
            continue
            
    if not login_found:
        logging.info("No login section found with standard patterns. Proceeding without login click.")


    # Google OIDC patterns and checks
    google_oidc_patterns = [
        "Sign in with Google", "Continue with Google","Sign In with Google", "Login with Google", 
        "Sign in using Google", "Log In with Google", "Login using Google",
        "LOG IN using Google", "LOG IN with Google", "LOG IN WITH GOOGLE"
    ]

    oidc_method = "N/A"
    redirect_or_element_found = False
    text_pattern_found = False

    try:
        # Wait for redirection or specific elements
        WebDriverWait(driver, 10).until(
          #  lambda x: any(pattern in driver.page_source for pattern in google_oidc_patterns) or driver.current_url != website_url
            lambda x: driver.current_url != website_url or any(pattern in driver.page_source for pattern in google_oidc_patterns)
        )
        oidc_method = "Detected" if any(pattern in driver.page_source for pattern in google_oidc_patterns) else "N/A"
    except TimeoutException:
        logging.info("No OIDC redirect or element found.")
        redirect_or_element_found = True
    except TimeoutException:
        pass

    if redirect_or_element_found:
        if len(driver.window_handles) > 1:
            oidc_method = "Pop-up"
        elif driver.current_url != website_url:
            oidc_method = "Redirect URL"
        else:
            oidc_method = "Direct Link/Button"

    has_google_oidc = oidc_method != "N/A"
    driver.quit()
    logging.info(f"Finished processing {website_url} : Google OIDC - {has_google_oidc}, Method - {oidc_method}")
    return website_url, has_google_oidc, oidc_method

    # Print the results
    for website, uses_google_oidc, method in results:
        print(f"{website} : Google OIDC - {uses_google_oidc}, Methods - {method}")
    write_results_to_csv(results, output_csv)
    
def process_websites(websites, max_threads=8):
    results = []
    with ThreadPoolExecutor(max_workers=max_threads) as executor:
        futures = {executor.submit(find_google_oidc_on, website): website for website in websites}

        for future in as_completed(futures):
            result = future.result()
            results.append(result)
    return results

def read_websites_from_csv(input_csv):
    websites = []
    with open(input_csv, 'r', newline='', encoding='utf-8') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            websites.append(row[0])
    return websites

def write_results_to_csv(results, output_csv):
    with open(output_csv, 'w', newline='', encoding='utf-8') as file:
        csv_writer = csv.writer(file)
        csv_writer.writerow(['Website', 'Google OIDC Present', 'Method'])  # Header
        for website, uses_google_oidc, method in results:
            csv_writer.writerow([website, uses_google_oidc, method])

def main():
    input_csv = 'try7.csv'  # Replace with your input CSV file name
    output_csv = 'outpu-ds7.csv'  # Replace with your output CSV file name

    websites_to_test = read_websites_from_csv(input_csv)
    results = process_websites(websites_to_test, max_threads=8)

    
    # Print the results
    for website, uses_google_oidc, method in results:
        print(f"{website} : Google OIDC - {uses_google_oidc}, Methodssss - {method}")
    write_results_to_csv(results, output_csv)
    
if __name__ == "__main__":
    main()


INFO:root:Processing website: ﻿https://www.nytimes.com/
INFO:root:Processing website: https://www.shopify.com/
INFO:root:Processing website: https://www.w3schools.com/
INFO:root:Processing website: https://medium.com/
INFO:root:Processing website: https://www.udemy.com/
INFO:root:Processing website: https://www.researchgate.net/
INFO:root:Processing website: https://vimeo.com/
INFO:root:Processing website: https://www.grammarly.com/
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Processing website: https://www.theguardian.com/
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Login section not found: Login
INFO:root:Login section found and clicked: Log in
INFO:root:Finished processing https://www.grammarly.com/: Google OIDC - False, Method - N/A
INFO:root:Processing website: https://pixabay.com/re
INFO:root:Website loaded
INFO:root:Login section not found: Login
IN

INFO:root:Website loaded
INFO:root:Login section not found: SIGN IN
INFO:root:Login section not found: Login
INFO:root:Login section not found: SIGN IN
INFO:root:Login section not found: Sign In
INFO:root:No OIDC redirect or element found.
INFO:root:Login section not found: Login
INFO:root:Finished processing https://www.canva.com/: Google OIDC - True, Method - Direct Link/Button
INFO:root:Processing website: https://www.mobile.de/
INFO:root:Login section not found: LOGIN
INFO:root:Website loaded
INFO:root:Login section not found: Log in
INFO:root:Login section not found: LOGIN
INFO:root:Login section not found: Log In
INFO:root:Login section not found: Log in
INFO:root:Login section not found: LOG IN
INFO:root:Login section not found: Sign in
INFO:root:Login section found and clicked: Login
INFO:root:Login section not found: LOGON
INFO:root:Login section not found: LOG IN
INFO:root:Login section not found: Sign in
INFO:root:Finished processing https://cloudinary.com/: Google OIDC - Tr

INFO:root:Login section not found: Account
INFO:root:Login section not found: LOGIN
INFO:root:Login section not found: Account
INFO:root:Finished processing https://disqus.com/: Google OIDC - True, Method - Direct Link/Button
INFO:root:Login section not found: LOGON
INFO:root:Processing website: https://www.abc.net.au/
INFO:root:Login section not found: Login
INFO:root:Website loaded
INFO:root:Login section not found: LOG IN
INFO:root:Login section not found: My Account
INFO:root:No login section found with standard patterns. Proceeding without login click.
INFO:root:Login section not found: SIGN IN
INFO:root:Login section not found: My Account
INFO:root:No login section found with standard patterns. Proceeding without login click.
INFO:root:Login section not found: Log in
INFO:root:Login section not found: Login/Register
INFO:root:Login section not found: LOGIN
INFO:root:Login section not found: Login
INFO:root:No OIDC redirect or element found.
INFO:root:Login section not found: Sign

INFO:root:No login section found with standard patterns. Proceeding without login click.
INFO:root:Login section not found: LOGON
INFO:root:No OIDC redirect or element found.
INFO:root:Login section not found: SIGN IN
INFO:root:Finished processing https://www.mirror.co.uk/: Google OIDC - True, Method - Direct Link/Button
INFO:root:Login section not found: LOGIN
INFO:root:Login section not found: LOG IN
INFO:root:Login section not found: Login/Register
INFO:root:Login section not found: Account
INFO:root:Login section not found: Login
INFO:root:Login section not found: My Account
INFO:root:No login section found with standard patterns. Proceeding without login click.
INFO:root:Login section not found: Log in
INFO:root:Finished processing https://www.skyscanner.net/: Google OIDC - False, Method - N/A
INFO:root:Login section not found: Sign in
INFO:root:Login section not found: Sign In
INFO:root:Login section not found: Log In
INFO:root:Login section not found: LOGON
INFO:root:Login secti

ElementClickInterceptedException: Message: element click intercepted: Element <a href="https://profile.w3schools.com/log-in?redirect_url=https%3A%2F%2Fmy-learning.w3schools.com" class="user-anonymous tnb-login-btn w3-bar-item w3-btn bar-item-hover w3-right ws-light-green ga-top ga-top-login" title="Login to your account" aria-label="Login to your account">...</a> is not clickable at point (1813, 28). Other element would receive the click: <div class="sn-v-cell">...</div>
  (Session info: headless chrome=119.0.6045.159)
Stacktrace:
	GetHandleVerifier [0x00007FF73C5482B2+55298]
	(No symbol) [0x00007FF73C4B5E02]
	(No symbol) [0x00007FF73C3705AB]
	(No symbol) [0x00007FF73C3B7A77]
	(No symbol) [0x00007FF73C3B5E39]
	(No symbol) [0x00007FF73C3B3C08]
	(No symbol) [0x00007FF73C3B2C8A]
	(No symbol) [0x00007FF73C3A87BF]
	(No symbol) [0x00007FF73C3D20AA]
	(No symbol) [0x00007FF73C3A80CF]
	(No symbol) [0x00007FF73C3D22C0]
	(No symbol) [0x00007FF73C3EAAA4]
	(No symbol) [0x00007FF73C3D1E83]
	(No symbol) [0x00007FF73C3A670A]
	(No symbol) [0x00007FF73C3A7964]
	GetHandleVerifier [0x00007FF73C8C0AAB+3694587]
	GetHandleVerifier [0x00007FF73C91728E+4048862]
	GetHandleVerifier [0x00007FF73C90F173+4015811]
	GetHandleVerifier [0x00007FF73C5E47D6+695590]
	(No symbol) [0x00007FF73C4C0CE8]
	(No symbol) [0x00007FF73C4BCF34]
	(No symbol) [0x00007FF73C4BD062]
	(No symbol) [0x00007FF73C4AD3A3]
	BaseThreadInitThunk [0x00007FFC6590257D+29]
	RtlUserThreadStart [0x00007FFC66A2AA58+40]


# New Trial

In [18]:
import csv
import logging
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed

logging.basicConfig(level=logging.INFO)

def is_valid_url(url):
    """Check if the URL is valid."""
    try:
        result = urlparse(url)
        return all([result.scheme, result.netloc])
    except ValueError:
        return False

def configure_browser():
    options = Options()
    options.add_argument("--headless")  # Enable headless mode
    options.add_argument("--disable-gpu")  # Disable GPU hardware acceleration
    options.add_argument("--window-size=1920x1080")  # Set window size
    options.add_argument("--disable-extensions")  # Disable extensions
    options.add_argument("--no-sandbox")  # Disable the sandbox for Chrome's renderer
    options.add_argument("--disable-images")  # Disable images
    options.page_load_strategy = 'eager'  # Use 'eager' page load strategy
    return webdriver.Chrome(options=options)

def find_google_oidc_on(website_url):
    logging.info(f"Processing website: {website_url}")
    driver = configure_browser()

    if not is_valid_url(website_url):
        logging.warning(f"Invalid URL: {website_url}")
        return website_url, False, "N/A"

    driver.get(website_url)
    logging.info("Website loaded")

    # Accept cookies
    cookie_buttons = [
        "Accept all & visit the site", "Accept", "Agree", "Close", "Ignore", 
        "Agree & Continue", "Agree & Close", "AGREE & CLOSE", "CONSENT", 
        "Consent", "ACCEPT", "AGREE", "Accept all cookies", "Accept All Cookies", "Allow", "ALLOW", "OK", "Ok", "I accept", "I Accept", "I ACCEPT"
    ]
    for button_text in cookie_buttons:
        try:
            accept_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, "//button[contains(., '" + button_text + "')] | //a[contains(., '" + button_text + "')]"))
            )
            accept_button.click()
            break
        except (NoSuchElementException, TimeoutException, ElementClickInterceptedException):
            continue

    # Look for login section
    login_section = ["Login", "Log in", "Sign in", "Sign In", "Log In", "LOGON", "SIGN IN", "LOGIN", "LOG IN", "Login/Register", "Account", "My Account"]
    login_found = False
    for login_pat in login_section:
        try:
            try:
                login_button = WebDriverWait(driver, 5).until(
                    EC.presence_of_element_located((By.PARTIAL_LINK_TEXT, login_pat))
                )
            except TimeoutException:
                login_button = WebDriverWait(driver, 5).until(
                    EC.presence_of_element_located((By.XPATH, f"//*[contains(text(), '{login_pat}') and (self::a or self::button)]"))
                )
            
            driver.execute_script("arguments[0].scrollIntoView(true);", login_button)
            try:
                login_button.click()
            except ElementClickInterceptedException:
                driver.execute_script("arguments[0].click();", login_button)
            login_found = True
            logging.info(f"Login section found and clicked: {login_pat}")
            break
        except (TimeoutException, NoSuchElementException):
            logging.info(f"Login section not found: {login_pat}")
            continue

    if not login_found:
        logging.info("No login section found with standard patterns. Proceeding without login click.")

    # Google OIDC patterns and checks
    google_oidc_patterns = [
        "Sign in with Google", "Continue with Google", "Sign In with Google", "Login with Google", 
        "Sign in using Google", "Log In with Google", "Login using Google",
        "LOG IN using Google", "LOG IN with Google", "LOG IN WITH GOOGLE"
    ]

    has_google_oidc = check_for_google_oidc_patterns(driver)
    oidc_method = "Google OIDC" if has_google_oidc else "N/A"

    driver.quit()
    logging.info(f"Finished processing {website_url}: Google OIDC - {has_google_oidc}, Method - {oidc_method}")
    return website_url, has_google_oidc, oidc_method

def check_for_google_oidc_patterns(driver):
    try:
        WebDriverWait(driver, 10).until(
            lambda x: any(pattern in driver.page_source for pattern in google_oidc_patterns)
        )
        return any(pattern in driver.page_source for pattern in google_oidc_patterns)
    except TimeoutException:
        return False

def process_websites(websites, max_threads=15):
    results = []
    with ThreadPoolExecutor(max_workers=max_threads) as executor:
        futures = {executor.submit(find_google_oidc_on, website): website for website in websites}

        for future in as_completed(futures):
            result = future.result()
            results.append(result)
    return results

def read_websites_from_csv(input_csv):
    websites = []
    with open(input_csv, 'r', newline='', encoding='utf-8') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            websites.append(row[0])
    return websites

def write_results_to_csv(results, output_csv):
    with open(output_csv, 'w', newline='', encoding='utf-8') as file:
        csv_writer = csv.writer(file)
        csv_writer.writerow(['Website', 'Google OIDC Present', 'Method'])
        for website, uses_google_oidc, method in results:
            csv_writer.writerow([website, uses_google_oidc, method])

def main():
    input_csv = 'try7.csv'  # Replace with your input CSV file name
    output_csv = 'output-ds7.csv'  # Replace with your output CSV file name

    websites_to_test = read_websites_from_csv(input_csv)
    results = process_websites(websites_to_test, max_threads=15)

    for website, uses_google_oidc, method in results:
        print(f"{website}: Google OIDC - {uses_google_oidc}, Method - {method}")
    write_results_to_csv(results, output_csv)

if __name__ == "__main__":
    main()


INFO:root:Processing website: ﻿https://www.nytimes.com/
INFO:root:Processing website: https://www.shopify.com/
INFO:root:Processing website: https://www.w3schools.com/
INFO:root:Processing website: https://medium.com/
INFO:root:Processing website: https://www.udemy.com/
INFO:root:Processing website: https://www.researchgate.net/
INFO:root:Processing website: https://vimeo.com/
INFO:root:Processing website: https://www.grammarly.com/
INFO:root:Processing website: https://www.theguardian.com/
INFO:root:Processing website: https://pixabay.com/re
INFO:root:Processing website: https://www.chess.com/
INFO:root:Processing website: https://indianexpress.com/
INFO:root:Processing website: https://prezi.com/
INFO:root:Processing website: https://yahoo.com
INFO:root:Processing website: https://www.tiktok.com/
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Website loaded
INFO:root:Websi

INFO:root:Login section not found: Sign In
INFO:root:Login section not found: Login
INFO:root:Login section not found: SIGN IN
INFO:root:Website loaded
INFO:root:Login section not found: My Account
INFO:root:No login section found with standard patterns. Proceeding without login click.
INFO:root:Processing website: https://www.bhphotovideo.com/
INFO:root:Login section not found: Log In
INFO:root:Login section found and clicked: Sign In
INFO:root:Processing website: https://www.blizzard.com/en-gb/
INFO:root:Login section not found: Log in
INFO:root:Login section not found: Log in
INFO:root:Login section not found: LOGIN
INFO:root:Login section not found: Sign in
INFO:root:Login section not found: LOGON
INFO:root:Login section not found: LOGIN
INFO:root:Login section not found: LOG IN
INFO:root:Login section not found: Sign in
INFO:root:Login section not found: SIGN IN
INFO:root:Login section not found: Sign In
INFO:root:Login section not found: Sign In
INFO:root:Login section not found:

INFO:root:Login section not found: SIGN IN
INFO:root:Login section not found: Sign In
INFO:root:Login section not found: LOG IN
INFO:root:Login section not found: LOGIN
INFO:root:Login section not found: Sign In
INFO:root:Login section not found: Log In
INFO:root:Login section not found: LOGIN
INFO:root:Login section not found: Login/Register
INFO:root:Login section not found: Log In
INFO:root:Login section not found: LOGON
INFO:root:Login section not found: Log In
INFO:root:Login section not found: LOG IN
INFO:root:Login section not found: Account
INFO:root:Login section not found: LOGON
INFO:root:Login section not found: SIGN IN
INFO:root:Login section not found: LOGON
INFO:root:Login section not found: My Account
INFO:root:No login section found with standard patterns. Proceeding without login click.
INFO:root:Login section not found: Login/Register
INFO:root:Login section not found: LOGIN
INFO:root:Login section not found: SIGN IN
INFO:root:Login section not found: SIGN IN
INFO:roo

NameError: name 'google_oidc_patterns' is not defined