In [None]:
import csv
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed

def validity_check(url):
    try:
        output = urlparse(url)
        return all([output.scheme, output.netloc])
    except ValueError:
        return False
    
def configure_browser():
    options = Options()
    options.add_argument("--headless")  # Enable headless mode
    options.add_argument("--disable-gpu")  # Disable GPU hardware acceleration
    options.add_argument("--window-size=1920x1080")  # Set window size
    options.add_argument("--disable-extensions")  # Disable extensions
    options.add_argument("--no-sandbox")  # Disable the sandbox for Chrome's renderer
    options.add_argument("--disable-images")  # Disable images
    options.page_load_strategy = 'eager'  # Use 'eager' page load strategy
    return webdriver.Chrome(options=options)

def check_oidc(website_url):
    driver = configure_browser()

    if not validity_check(website_url):
        print(f"Invalid URL: {website_url}")
        return website_url, False, "N/A"

    driver.get(website_url)

    # Accept cookies
    cookie_buttons = [
        "Accept all & visit the site", "Accept", "Agree", "Close", "Ignore",  "Accept all",
        "Agree & Continue", "Agree & Close", "AGREE & CLOSE", "CONSENT", 
        "Consent", "ACCEPT","AGREE","Accept all cookies","Accept All Cookies","Allow","ALLOW","OK","Ok","I accept","I Accept","I ACCEPT"
    ]
    for button_text in cookie_buttons:
        try:
            accept_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, f"//button[contains(translate(text(),'ACCEPT','accept'),'{button_text.lower()}')] | //a[contains(translate(text(),'ACCEPT','accept'),'{button_text.lower()}')]"))
            )
            accept_button.click()
            break
        except (NoSuchElementException, TimeoutException, ElementClickInterceptedException):
            pass

    # Look for login section
    login_section = ["Login", "Log in", "Sign in", "Sign In", "Log In", "LOGON", "SIGN IN", "LOGIN", "LOG IN","Login/Register"]
    for login_pat in login_section:
        try:
            login_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.PARTIAL_LINK_TEXT, login_pat))
            )
            actions = ActionChains(driver)
            actions.move_to_element(login_button).perform()
            try:
                login_button.click()
            except ElementClickInterceptedException:
                driver.execute_script("arguments[0].click();", login_button)
            break
        except TimeoutException:
            pass

    # Google OIDC patterns and checks
    google_oidc_patterns = [
        "Sign in with Google", "Continue with Google","Sign In with Google", "Login with Google", 
        "Sign in using Google", "Log In with Google", "Login using Google",
        "LOG IN using Google", "LOG IN with Google", "LOG IN WITH GOOGLE"
    ]

    oidc_method = "N/A"
    redirect_or_element_found = False
    text_pattern_found = False

    try:
        # Wait for redirection or specific elements
        WebDriverWait(driver, 10).until(
            lambda x: driver.current_url != website_url or driver.find_element(By.XPATH, "//a[contains(@href,'google')] | //button[contains(@href,'google')] | //div[contains(@href,'google')]")
        )
        redirect_or_element_found = True
    except TimeoutException:
        pass

    # Check for text patterns
    if any(pattern in driver.page_source for pattern in google_oidc_patterns):
        text_pattern_found = True

    if redirect_or_element_found:
        if len(driver.window_handles) > 1:
            oidc_method = "Pop-up"
        elif driver.current_url != website_url:
            oidc_method = "Redirect URL"
        else:
            oidc_method = "Direct Link/Button"
    elif text_pattern_found:
        oidc_method = "Text Pattern"

    has_google_oidc = oidc_method != "N/A"
    driver.quit()
    
    # Print the result for the current website
    print(f"{website_url} : Google OIDC - {has_google_oidc}, Method - {oidc_method}")
    return website_url, has_google_oidc, oidc_method

def process_websites(websites, max_threads=5):
    results = []
    with ThreadPoolExecutor(max_workers=max_threads) as executor:
        futures = {executor.submit(check_oidc, website): website for website in websites}

        for future in as_completed(futures):
            result = future.result()
            results.append(result)
    return results

def read_websites_from_csv(input_csv):
    websites = []
    with open(input_csv, 'r', newline='', encoding='utf-8') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            websites.append(row[0])  # Assuming URLs are in the first column
    return websites

def write_results_to_csv(results, output_csv):
    with open(output_csv, 'w', newline='', encoding='utf-8') as file:
        csv_writer = csv.writer(file)
        csv_writer.writerow(['Website', 'Google OIDC Present', 'Method'])  # Header
        for website, uses_google_oidc, method in results:
            csv_writer.writerow([website, uses_google_oidc, method])

def main():
    input_csv = 'try7.csv'  # Replace with your input CSV file name
    output_csv = 'try7_Results.csv'  # Replace with your desired output CSV file name

    websites_to_test = read_websites_from_csv(input_csv)
    results = process_websites(websites_to_test, max_threads=10)
    
    # Print the results
    for website, uses_google_oidc, method in results:
        print(f"{website} : Google OIDC - {uses_google_oidc}, Method - {method}")
    write_results_to_csv(results, output_csv)

if __name__ == "__main__":
    main()

# All Complete dataset

In [None]:
import csv
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed

def validity_check(url):
    try:
        output = urlparse(url)
        return all([output.scheme, output.netloc])
    except ValueError:
        return False
    
def configure_browser():
    options = Options()
    options.add_argument("--headless")  # Enable headless mode
    options.add_argument("--disable-gpu")  # Disable GPU hardware acceleration
    options.add_argument("--window-size=1920x1080")  # Set window size
    options.add_argument("--disable-extensions")  # Disable extensions
    options.add_argument("--no-sandbox")  # Disable the sandbox for Chrome's renderer
    options.add_argument("--disable-images")  # Disable images
    options.page_load_strategy = 'eager'  # Use 'eager' page load strategy
    return webdriver.Chrome(options=options)

def check_oidc(website_url):
    driver = configure_browser()

    if not validity_check(website_url):
        print(f"Invalid URL: {website_url}")
        return website_url, False, "N/A"

    driver.get(website_url)

    # Accept cookies
    cookie_buttons = [
        "Accept all & visit the site", "Accept", "Agree", "Close", "Ignore",  "Accept all",
        "Agree & Continue", "Agree & Close", "AGREE & CLOSE", "CONSENT", 
        "Consent", "ACCEPT","AGREE","Accept all cookies","Accept All Cookies","Allow","ALLOW","OK","Ok","I accept","I Accept","I ACCEPT"
    ]
    for button_text in cookie_buttons:
        try:
            accept_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, f"//button[contains(translate(text(),'ACCEPT','accept'),'{button_text.lower()}')] | //a[contains(translate(text(),'ACCEPT','accept'),'{button_text.lower()}')]"))
            )
            accept_button.click()
            break
        except (NoSuchElementException, TimeoutException, ElementClickInterceptedException):
            pass

    # Look for login section
    login_section = ["Login", "Log in", "Sign in", "Sign In", "Log In", "LOGON", "SIGN IN", "LOGIN", "LOG IN","Login/Register"]
    for login_pat in login_section:
        try:
            login_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.PARTIAL_LINK_TEXT, login_pat))
            )
            actions = ActionChains(driver)
            actions.move_to_element(login_button).perform()
            try:
                login_button.click()
            except ElementClickInterceptedException:
                driver.execute_script("arguments[0].click();", login_button)
            break
        except TimeoutException:
            pass

    # Google OIDC patterns and checks
    google_oidc_patterns = [
        "Sign in with Google", "Continue with Google","Sign In with Google", "Login with Google", 
        "Sign in using Google", "Log In with Google", "Login using Google",
        "LOG IN using Google", "LOG IN with Google", "LOG IN WITH GOOGLE"
    ]

    oidc_method = "N/A"
    redirect_or_element_found = False
    text_pattern_found = False

    try:
        # Wait for redirection or specific elements
        WebDriverWait(driver, 10).until(
            lambda x: driver.current_url != website_url or driver.find_element(By.XPATH, "//a[contains(@href,'google')] | //button[contains(@href,'google')] | //div[contains(@href,'google')]")
        )
        redirect_or_element_found = True
    except TimeoutException:
        pass

    # Check for text patterns
    if any(pattern in driver.page_source for pattern in google_oidc_patterns):
        text_pattern_found = True

    if redirect_or_element_found:
        if len(driver.window_handles) > 1:
            oidc_method = "Pop-up"
        elif driver.current_url != website_url:
            oidc_method = "Redirect URL"
        else:
            oidc_method = "Direct Link/Button"
    elif text_pattern_found:
        oidc_method = "Text Pattern"

    has_google_oidc = oidc_method != "N/A"
    driver.quit()
    # Print the result for the current website
    print(f"{website_url} : Google OIDC - {has_google_oidc}, Method - {oidc_method}")
    return website_url, has_google_oidc, oidc_method

def process_websites(websites, max_threads=10):
    results = []
    with ThreadPoolExecutor(max_workers=max_threads) as executor:
        futures = {executor.submit(check_oidc, website): website for website in websites}

        for future in as_completed(futures):
            result = future.result()
            results.append(result)
    return results

def read_websites_from_csv(input_csv):
    websites = []
    with open(input_csv, 'r', newline='', encoding='utf-8') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            websites.append(row[0])  # Assuming URLs are in the first column
    return websites

def write_results_to_csv(results, output_csv):
    with open(output_csv, 'w', newline='', encoding='utf-8') as file:
        csv_writer = csv.writer(file)
        csv_writer.writerow(['Website', 'Google OIDC Present', 'Method'])  # Header
        for website, uses_google_oidc, method in results:
            csv_writer.writerow([website, uses_google_oidc, method])

def main():
    input_csv = 'dataset.csv'  # Replace with your input CSV file name
    output_csv = 'dataset_Results.csv'  # Replace with your desired output CSV file name

    websites_to_test = read_websites_from_csv(input_csv)
    results = process_websites(websites_to_test, max_threads=10)
    
    # Print the results
    for website, uses_google_oidc, method in results:
        print(f"{website} : Google OIDC - {uses_google_oidc}, Method - {method}")
    write_results_to_csv(results, output_csv)

if __name__ == "__main__":
    main()

In [None]:
import csv
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed

def validity_check(url):
    try:
        output = urlparse(url)
        return all([output.scheme, output.netloc])
    except ValueError:
        return False
    
def configure_browser():
    options = Options()
    options.add_argument("--headless")  # Enable headless mode
    options.add_argument("--disable-gpu")  # Disable GPU hardware acceleration
    options.add_argument("--window-size=1920x1080")  # Set window size
    options.add_argument("--disable-extensions")  # Disable extensions
    options.add_argument("--no-sandbox")  # Disable the sandbox for Chrome's renderer
    options.add_argument("--disable-images")  # Disable images
    options.page_load_strategy = 'eager'  # Use 'eager' page load strategy
    return webdriver.Chrome(options=options)

def check_oidc(website_url):
    driver = configure_browser()

    if not validity_check(website_url):
        print(f"Invalid URL: {website_url}")
        return website_url, False, "N/A"

    driver.get(website_url)

    # Accept cookies
    cookie_buttons = [
        "Accept all & visit the site", "Accept", "Agree", "Close", "Ignore",  "Accept all",
        "Agree & Continue", "Agree & Close", "AGREE & CLOSE", "CONSENT", 
        "Consent", "ACCEPT","AGREE","Accept all cookies","Accept All Cookies","Allow","ALLOW","OK","Ok","I accept","I Accept","I ACCEPT"
    ]
    for button_text in cookie_buttons:
        try:
            accept_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, f"//button[contains(translate(text(),'ACCEPT','accept'),'{button_text.lower()}')] | //a[contains(translate(text(),'ACCEPT','accept'),'{button_text.lower()}')]"))
            )
            accept_button.click()
            break
        except (NoSuchElementException, TimeoutException, ElementClickInterceptedException):
            pass

    # Look for login section
    login_section = ["Login", "Log in", "Sign in", "Sign In", "Log In", "LOGON", "SIGN IN", "LOGIN", "LOG IN","Login/Register"]
    for login_pat in login_section:
        try:
            login_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.PARTIAL_LINK_TEXT, login_pat))
            )
            actions = ActionChains(driver)
            actions.move_to_element(login_button).perform()
            try:
                login_button.click()
            except ElementClickInterceptedException:
                driver.execute_script("arguments[0].click();", login_button)
            break
        except TimeoutException:
            pass

    # Google OIDC patterns and checks
    google_oidc_patterns = [
        "Sign in with Google", "Continue with Google","Sign In with Google", "Login with Google", 
        "Sign in using Google", "Log In with Google", "Login using Google",
        "LOG IN using Google", "LOG IN with Google", "LOG IN WITH GOOGLE"
    ]

    oidc_method = "N/A"
    redirect_or_element_found = False
    text_pattern_found = False

    try:
        # Wait for redirection or specific elements
        WebDriverWait(driver, 10).until(
            lambda x: driver.current_url != website_url or driver.find_element(By.XPATH, "//a[contains(@href,'google')] | //button[contains(@href,'google')] | //div[contains(@href,'google')]")
        )
        redirect_or_element_found = True
    except TimeoutException:
        pass

    # Check for text patterns
    if any(pattern in driver.page_source for pattern in google_oidc_patterns):
        text_pattern_found = True

    if redirect_or_element_found:
        if len(driver.window_handles) > 1:
            oidc_method = "Pop-up"
        elif driver.current_url != website_url:
            oidc_method = "Redirect URL"
        else:
            oidc_method = "Direct Link/Button"
    elif text_pattern_found:
        oidc_method = "Text Pattern"

    has_google_oidc = oidc_method != "N/A"
    driver.quit()
    # Print the result for the current website
    print(f"{website_url} : Google OIDC - {has_google_oidc}, Method - {oidc_method}")
    return website_url, has_google_oidc, oidc_method

def process_websites(websites, max_threads=5):
    results = []
    with ThreadPoolExecutor(max_workers=max_threads) as executor:
        futures = {executor.submit(check_oidc, website): website for website in websites}

        for future in as_completed(futures):
            result = future.result()
            results.append(result)
    return results

def read_websites_from_csv(input_csv):
    websites = []
    with open(input_csv, 'r', newline='', encoding='utf-8') as file:
        csv_reader = csv.reader(file)
        for row in csv_reader:
            websites.append(row[0])  # Assuming URLs are in the first column
    return websites

def write_results_to_csv(results, output_csv):
    with open(output_csv, 'w', newline='', encoding='utf-8') as file:
        csv_writer = csv.writer(file)
        csv_writer.writerow(['Website', 'Google OIDC Present', 'Method'])  # Header
        for website, uses_google_oidc, method in results:
            csv_writer.writerow([website, uses_google_oidc, method])

def main():
    input_csv = 'dataset.csv'  # Replace with your input CSV file name
    output_csv = 'dataset_Results.csv'  # Replace with your desired output CSV file name

    websites_to_test = read_websites_from_csv(input_csv)
    results = process_websites(websites_to_test, max_threads=5)
    
    # Print the results
    for website, uses_google_oidc, method in results:
        print(f"{website} : Google OIDC - {uses_google_oidc}, Method - {method}")
    write_results_to_csv(results, output_csv)

if __name__ == "__main__":
    main()

Invalid URL: ﻿Websites
https://facebook.com : Google OIDC - True, Method - Redirect URL
https://google.com : Google OIDC - True, Method - Pop-up
https://youtube.com : Google OIDC - True, Method - Redirect URL
https://baidu.com : Google OIDC - True, Method - Redirect URL
https://bilibili.com : Google OIDC - True, Method - Redirect URL
https://twitter.com : Google OIDC - True, Method - Redirect URL
https://qq.com : Google OIDC - True, Method - Redirect URL
https://amazon.com : Google OIDC - True, Method - Redirect URL
https://zhihu.com : Google OIDC - True, Method - Redirect URL
https://linkedin.com : Google OIDC - True, Method - Redirect URL
https://wikipedia.org : Google OIDC - True, Method - Redirect URL
https://instagram.com : Google OIDC - True, Method - Redirect URL
https://reddit.com : Google OIDC - True, Method - Pop-up
https://bing.com : Google OIDC - True, Method - Redirect URL
https://yahoo.com : Google OIDC - True, Method - Redirect URL
https://openai.com : Google OIDC - True

https://udemy.com : Google OIDC - True, Method - Redirect URL
https://gitee.com : Google OIDC - True, Method - Redirect URL
https://amazon.it : Google OIDC - True, Method - Redirect URL
https://missav.com : Google OIDC - True, Method - Redirect URL
https://mega.nz : Google OIDC - True, Method - Redirect URL
https://speedtest.net : Google OIDC - True, Method - Redirect URL
https://quizlet.com : Google OIDC - True, Method - Redirect URL
https://intuit.com : Google OIDC - True, Method - Redirect URL
https://researchgate.net : Google OIDC - True, Method - Redirect URL
https://wetransfer.com : Google OIDC - True, Method - Redirect URL
https://archive.org : Google OIDC - True, Method - Redirect URL
https://chase.com : Google OIDC - True, Method - Redirect URL
https://vimeo.com : Google OIDC - True, Method - Redirect URL
https://ozon.ru : Google OIDC - True, Method - Redirect URL
https://binance.com : Google OIDC - True, Method - Redirect URL
https://youku.com : Google OIDC - True, Method - R

https://dell.com : Google OIDC - True, Method - Redirect URL
https://www.gov.uk : Google OIDC - True, Method - Redirect URL
https://hotstar.com : Google OIDC - True, Method - Redirect URL
https://360.com : Google OIDC - True, Method - Redirect URL
https://nga.cn : Google OIDC - True, Method - Redirect URL
https://wellsfargo.com : Google OIDC - True, Method - Redirect URL
https://wikimedia.org : Google OIDC - True, Method - Redirect URL
https://cupfox.app : Google OIDC - True, Method - Redirect URL
https://quark.cn : Google OIDC - True, Method - Redirect URL
https://googleadservices.com : Google OIDC - True, Method - Redirect URL
https://similarweb.com : Google OIDC - True, Method - Redirect URL
https://xueqiu.com : Google OIDC - True, Method - Redirect URL
https://blog.jp : Google OIDC - True, Method - Redirect URL
https://wix.com : Google OIDC - True, Method - Redirect URL
https://myanimelist.net : Google OIDC - True, Method - Redirect URL
https://chegg.com : Google OIDC - True, Metho