In [4]:
!pip install playwright

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple


In [5]:
# Cell 1: Imports and synchronous DNS lookup function (Playwright sync API)
# Note: Install dependencies first:
# pip install playwright
# playwright install

from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
import time
import os
from pathlib import Path

def _find_domain_input_sync(page):
    """
    Try several locators to find the domain input field reliably.
    Returns a locator or raises RuntimeError if not found.
    """
    # try common placeholder / label / name
    candidates = [
        lambda: page.get_by_placeholder('Name'),
        lambda: page.get_by_placeholder('Enter domain'),
        lambda: page.get_by_label('Domain name'),
        lambda: page.locator('input[name="domain"]'),
        lambda: page.locator('input[id*="domain"]'),
        lambda: page.locator('input[type="text"]'),
        lambda: page.locator('xpath=//input[contains(@placeholder,"Name") or contains(@placeholder,"domain") or contains(@aria-label,"domain")]'),
    ]
    for get in candidates:
        try:
            loc = get()
            # ensure visible / actionable
            if loc and loc.count() and loc.is_visible():
                return loc
        except Exception:
            continue
    # fallback: return first input element
    try:
        fallback = page.locator('input').first
        if fallback and fallback.count():
            return fallback
    except Exception:
        pass
    raise RuntimeError("Could not locate domain input field on the page")

def _click_dig_button_sync(page):
    """
    Try several ways to click the Dig/Submit button.
    Returns True if clicked, False otherwise.
    """
    tries = [
        lambda: page.get_by_role('button', name='Dig'),
        lambda: page.get_by_role('button', name='DIG'),
        lambda: page.get_by_text('Dig'),
        lambda: page.get_by_text('DIG'),
        lambda: page.locator('button[type="submit"]'),
        lambda: page.locator('button:has-text("Dig")'),
    ]
    for get in tries:
        try:
            btn = get()
            if btn and getattr(btn, "count", lambda: 1)() and btn.is_visible():
                btn.click()
                return True
        except Exception:
            continue
    return False

def perform_dns_lookup(fqdn, headless=True, screenshot_dir='.', timeout_ms=30000):
    """
    Perform a DNS lookup for fqdn using Google Admin Toolbox Dig (sync).

    Args:
      fqdn (str): Fully qualified domain name to query.
      headless (bool): Whether to run browser headless.
      screenshot_dir (str): Where to write screenshots.
      timeout_ms (int): Navigation / wait timeout in milliseconds.

    Returns:
      dict: {'fqdn', 'raw_output', 'screenshot'}
    """
    screenshot_dir = Path(screenshot_dir)
    screenshot_dir.mkdir(parents=True, exist_ok=True)
    screenshot_path = screenshot_dir / f"{fqdn.replace('.', '_')}_dns_results.png"
    raw_output = None

    with sync_playwright() as playwright:
        browser = playwright.chromium.launch(headless=headless)
        try:
            page = browser.new_page()
            # Navigate to the tool (use the hash for ANY default)
            page.goto('https://toolbox.googleapps.com/apps/dig/#ANY/', timeout=timeout_ms)
            page.wait_for_load_state('networkidle', timeout=timeout_ms)

            # Find domain input and fill value
            try:
                domain_input = _find_domain_input_sync(page)
                # ensure it's empty then fill
                try:
                    domain_input.fill('')  # clear if supported
                except Exception:
                    pass
                domain_input.fill(fqdn)
            except Exception as e:
                # best-effort: try evaluating JS to set value
                page.evaluate("() => { const i = document.querySelector('input'); if(i) i.value = ''; }")
                page.evaluate(f"() => {{ const i = document.querySelector('input'); if(i) i.value = '{fqdn}'; }}")

            # Click dig/submit
            clicked = _click_dig_button_sync(page)
            if not clicked:
                # fallback: press Enter in the input
                try:
                    domain_input.press('Enter')
                except Exception:
                    pass

            # Wait for results: look for a <pre> or .results area
            try:
                page.wait_for_selector('pre', timeout=timeout_ms)
            except PlaywrightTimeoutError:
                # sometimes results appear inside an element with class or id; do a small wait
                time.sleep(2)

            # Try clicking "Raw View" if present
            try:
                raw_view = page.get_by_text('Raw View')
                if raw_view and raw_view.is_visible():
                    raw_view.click()
                    time.sleep(0.8)
            except Exception:
                pass

            # Extract raw output from <pre> or fallback to a results container
            try:
                locator = page.locator('pre')
                if locator.count():
                    raw_output = locator.first.text_content(timeout=3000)
            except Exception:
                raw_output = None

            if not raw_output:
                # fallback selectors
                for sel in ['.results', '.dig-output', '#raw', '.output', 'textarea']:
                    try:
                        loc = page.locator(sel)
                        if loc.count():
                            raw_output = loc.first.text_content(timeout=2000)
                            if raw_output:
                                break
                    except Exception:
                        continue

            # Take screenshot
            try:
                page.screenshot(path=str(screenshot_path), full_page=True)
            except Exception:
                # try smaller screenshot
                page.screenshot(path=str(screenshot_path))

        finally:
            browser.close()

    return {
        'fqdn': fqdn,
        'raw_output': raw_output,
        'screenshot': str(screenshot_path)
    }

In [6]:
# Cell 2: Async version and concurrent runner (Playwright async API)
# Note: Install dependencies first:
# pip install playwright
# playwright install

import asyncio
from playwright.async_api import async_playwright, TimeoutError as AsyncTimeoutError

async def _find_domain_input_async(page):
    candidates = [
        lambda: page.get_by_placeholder('Name'),
        lambda: page.get_by_placeholder('Enter domain'),
        lambda: page.get_by_label('Domain name'),
        lambda: page.locator('input[name="domain"]'),
        lambda: page.locator('input[id*="domain"]'),
        lambda: page.locator('input[type="text"]'),
        lambda: page.locator('xpath=//input[contains(@placeholder,"Name") or contains(@placeholder,"domain") or contains(@aria-label,"domain")]'),
    ]
    for get in candidates:
        try:
            loc = get()
            if loc and (await loc.count()) and await loc.is_visible():
                return loc
        except Exception:
            continue
    # fallback
    try:
        fallback = page.locator('input').first
        if await fallback.count():
            return fallback
    except Exception:
        pass
    raise RuntimeError("Could not locate domain input field (async)")

async def _click_dig_button_async(page):
    tries = [
        lambda: page.get_by_role('button', name='Dig'),
        lambda: page.get_by_text('Dig'),
        lambda: page.locator('button[type="submit"]'),
        lambda: page.locator('button:has-text("Dig")'),
    ]
    for get in tries:
        try:
            btn = get()
            if btn and (await getattr(btn, "count")()) and await btn.is_visible():
                await btn.click()
                return True
        except Exception:
            continue
    return False

async def perform_dns_lookup_async(fqdn, headless=True, screenshot_dir='.', timeout_ms=30000):
    """
    Async DNS lookup using Playwright async API.
    """
    screenshot_dir = Path(screenshot_dir)
    screenshot_dir.mkdir(parents=True, exist_ok=True)
    screenshot_path = screenshot_dir / f"{fqdn.replace('.', '_')}_dns_results.png"
    raw_output = None

    async with async_playwright() as playwright:
        browser = await playwright.chromium.launch(headless=headless)
        page = await browser.new_page()
        try:
            await page.goto('https://toolbox.googleapps.com/apps/dig/#ANY/', timeout=timeout_ms)
            await page.wait_for_load_state('networkidle', timeout=timeout_ms)

            try:
                domain_input = await _find_domain_input_async(page)
                try:
                    await domain_input.fill('')
                except Exception:
                    pass
                await domain_input.fill(fqdn)
            except Exception:
                # fallback JS set
                await page.evaluate("() => { const i = document.querySelector('input'); if(i) i.value = ''; }")
                await page.evaluate(f"() => {{ const i = document.querySelector('input'); if(i) i.value = '{fqdn}'; }}")

            clicked = await _click_dig_button_async(page)
            if not clicked:
                try:
                    await domain_input.press('Enter')
                except Exception:
                    pass

            # Wait for results
            try:
                await page.wait_for_selector('pre', timeout=timeout_ms)
            except AsyncTimeoutError:
                await asyncio.sleep(1.5)

            # Click raw view
            try:
                raw_view = page.get_by_text('Raw View')
                if await raw_view.is_visible():
                    await raw_view.click()
                    await asyncio.sleep(0.6)
            except Exception:
                pass

            # Extract raw output
            try:
                locator = page.locator('pre')
                if await locator.count():
                    raw_output = await locator.first.text_content(timeout=2000)
            except Exception:
                raw_output = None

            if not raw_output:
                for sel in ['.results', '.dig-output', '#raw', '.output', 'textarea']:
                    try:
                        loc = page.locator(sel)
                        if await loc.count():
                            raw_output = await loc.first.text_content(timeout=1000)
                            if raw_output:
                                break
                    except Exception:
                        continue

            # screenshot
            try:
                await page.screenshot(path=str(screenshot_path), full_page=True)
            except Exception:
                await page.screenshot(path=str(screenshot_path))

        finally:
            await browser.close()

    return {'fqdn': fqdn, 'raw_output': raw_output, 'screenshot': str(screenshot_path)}

async def perform_many_async(domains, headless=True, screenshot_dir='.', concurrency=4):
    """
    Run multiple lookups concurrently (bounded concurrency).
    """
    sem = asyncio.Semaphore(concurrency)
    results = []

    async def worker(domain):
        async with sem:
            return await perform_dns_lookup_async(domain, headless=headless, screenshot_dir=screenshot_dir)

    tasks = [asyncio.create_task(worker(d)) for d in domains]
    for t in asyncio.as_completed(tasks):
        try:
            res = await t
            results.append(res)
        except Exception as e:
            results.append({'fqdn': None, 'error': str(e)})
    return results

In [7]:
# Cell 3: Usage examples (run these cells manually in your notebook)
# Sync example:
if __name__ == "__main__" and False:  # set to True to run as script from a notebook cell
    # Example: single synchronous lookup
    result = perform_dns_lookup('hsbc.co.uk', headless=True, screenshot_dir='.')
    print(f"DNS Results for {result['fqdn']}:")
    print(result['raw_output'] or "<no raw output captured>")
    print("Screenshot saved to:", result['screenshot'])

# Async example (recommended for multiple domains):
async def _example_async_run():
    domains = ['www.apple.com', 'www.google.com', 'www.github.com']
    results = await perform_many_async(domains, headless=True, screenshot_dir='.')
    for r in results:
        print("\n" + "="*60)
        print("Results for:", r.get('fqdn'))
        print("="*60)
        print(r.get('raw_output') or "<no raw output captured>")
        print("Screenshot:", r.get('screenshot'))

# To run the async example in a notebook cell, uncomment and run:
# import asyncio
# asyncio.run(_example_async_run())

# Quick helper for interactive notebook usage:
def lookup_sync_interactive(domain, headless=True, screenshot_dir='.'):
    """Call sync function from a notebook cell and print results nicely."""
    res = perform_dns_lookup(domain, headless=headless, screenshot_dir=screenshot_dir)
    print(f"=== Results for {res['fqdn']} ===")
    print(res['raw_output'] or "<no raw output captured>")
    print("Screenshot:", res['screenshot'])
    return res



In [8]:
# Example: to invoke from notebook, run:
lookup_sync_interactive('hsbc.co.uk', headless=False, screenshot_dir='playwright_screens')

Error: It looks like you are using Playwright Sync API inside the asyncio loop.
Please use the Async API instead.

In [9]:
# Cell 1: Imports and synchronous DNS lookup function (Playwright sync API)
# Requirements:
# pip install playwright
# playwright install

from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
import time
import os
from pathlib import Path

def _find_domain_input_sync(page):
    """
    Try several locators to find the domain input field reliably.
    Returns a locator or raises RuntimeError if not found.
    """
    candidates = [
        lambda: page.get_by_placeholder('Name'),
        lambda: page.get_by_placeholder('Enter domain'),
        lambda: page.get_by_label('Domain name'),
        lambda: page.locator('input[name="domain"]'),
        lambda: page.locator('input[id*="domain"]'),
        lambda: page.locator('input[type="text"]'),
        lambda: page.locator('xpath=//input[contains(@placeholder,"Name") or contains(@placeholder,"domain") or contains(@aria-label,"domain")]'),
    ]
    for get in candidates:
        try:
            loc = get()
            # ensure visible / actionable
            if loc and getattr(loc, "count", lambda: 1)() and loc.is_visible():
                return loc
        except Exception:
            continue
    # fallback: return first input element
    try:
        fallback = page.locator('input').first
        if fallback and getattr(fallback, "count", lambda: 1)():
            return fallback
    except Exception:
        pass
    raise RuntimeError("Could not locate domain input field on the page")

def _click_dig_button_sync(page):
    """
    Try several ways to click the Dig/Submit button.
    Returns True if clicked, False otherwise.
    """
    tries = [
        lambda: page.get_by_role('button', name='Dig'),
        lambda: page.get_by_role('button', name='DIG'),
        lambda: page.get_by_text('Dig'),
        lambda: page.get_by_text('DIG'),
        lambda: page.locator('button[type="submit"]'),
        lambda: page.locator('button:has-text("Dig")'),
    ]
    for get in tries:
        try:
            btn = get()
            if btn and getattr(btn, "count", lambda: 1)() and btn.is_visible():
                btn.click()
                return True
        except Exception:
            continue
    return False

def perform_dns_lookup(fqdn, headless=True, screenshot_dir='.', timeout_ms=30000):
    """
    Perform a DNS lookup for fqdn using Google Admin Toolbox Dig (sync).

    Args:
      fqdn (str): Fully qualified domain name to query.
      headless (bool): Whether to run browser headless.
      screenshot_dir (str): Where to write screenshots.
      timeout_ms (int): Navigation / wait timeout in milliseconds.

    Returns:
      dict: {'fqdn', 'raw_output', 'screenshot'}
    """
    screenshot_dir = Path(screenshot_dir)
    screenshot_dir.mkdir(parents=True, exist_ok=True)
    screenshot_path = screenshot_dir / f"{fqdn.replace('.', '_')}_dns_results.png"
    raw_output = None

    with sync_playwright() as playwright:
        browser = playwright.chromium.launch(headless=headless)
        try:
            page = browser.new_page()
            # Navigate to the tool (use the hash for ANY default)
            page.goto('https://toolbox.googleapps.com/apps/dig/#ANY/', timeout=timeout_ms)
            page.wait_for_load_state('networkidle', timeout=timeout_ms)

            # Find domain input and fill value
            try:
                domain_input = _find_domain_input_sync(page)
                # ensure it's empty then fill
                try:
                    domain_input.fill('')  # clear if supported
                except Exception:
                    pass
                domain_input.fill(fqdn)
            except Exception:
                # best-effort: try evaluating JS to set value
                page.evaluate("() => { const i = document.querySelector('input'); if(i) i.value = ''; }")
                page.evaluate(f"() => {{ const i = document.querySelector('input'); if(i) i.value = '{fqdn}'; }}")

            # Click dig/submit
            clicked = _click_dig_button_sync(page)
            if not clicked:
                # fallback: press Enter in the input
                try:
                    domain_input.press('Enter')
                except Exception:
                    pass

            # Wait for results: look for a <pre> or .results area
            try:
                page.wait_for_selector('pre', timeout=timeout_ms)
            except PlaywrightTimeoutError:
                # sometimes results appear inside an element with class or id; do a small wait
                time.sleep(2)

            # Try clicking "Raw View" if present
            try:
                raw_view = page.get_by_text('Raw View')
                if raw_view and raw_view.is_visible():
                    raw_view.click()
                    time.sleep(0.8)
            except Exception:
                pass

            # Extract raw output from <pre> or fallback to a results container
            try:
                locator = page.locator('pre')
                if getattr(locator, "count", lambda: 0)():
                    raw_output = locator.first.text_content(timeout=3000)
            except Exception:
                raw_output = None

            if not raw_output:
                # fallback selectors
                for sel in ['.results', '.dig-output', '#raw', '.output', 'textarea']:
                    try:
                        loc = page.locator(sel)
                        if getattr(loc, "count", lambda: 0)():
                            raw_output = loc.first.text_content(timeout=2000)
                            if raw_output:
                                break
                    except Exception:
                        continue

            # Take screenshot
            try:
                page.screenshot(path=str(screenshot_path), full_page=True)
            except Exception:
                # try smaller screenshot
                page.screenshot(path=str(screenshot_path))

        finally:
            browser.close()

    return {
        'fqdn': fqdn,
        'raw_output': raw_output,
        'screenshot': str(screenshot_path)
    }

In [10]:
# Cell 2: Async version and concurrent runner (Playwright async API)
# Requirements:
# pip install playwright
# playwright install

import asyncio
from playwright.async_api import async_playwright, TimeoutError as AsyncTimeoutError
from pathlib import Path

async def _find_domain_input_async(page):
    candidates = [
        lambda: page.get_by_placeholder('Name'),
        lambda: page.get_by_placeholder('Enter domain'),
        lambda: page.get_by_label('Domain name'),
        lambda: page.locator('input[name="domain"]'),
        lambda: page.locator('input[id*="domain"]'),
        lambda: page.locator('input[type="text"]'),
        lambda: page.locator('xpath=//input[contains(@placeholder,"Name") or contains(@placeholder,"domain") or contains(@aria-label,"domain")]'),
    ]
    for get in candidates:
        try:
            loc = get()
            if loc and (await getattr(loc, "count")()) and await loc.is_visible():
                return loc
        except Exception:
            continue
    # fallback
    try:
        fallback = page.locator('input').first
        if await getattr(fallback, "count")():
            return fallback
    except Exception:
        pass
    raise RuntimeError("Could not locate domain input field (async)")

async def _click_dig_button_async(page):
    tries = [
        lambda: page.get_by_role('button', name='Dig'),
        lambda: page.get_by_text('Dig'),
        lambda: page.locator('button[type="submit"]'),
        lambda: page.locator('button:has-text("Dig")'),
    ]
    for get in tries:
        try:
            btn = get()
            if btn and (await getattr(btn, "count")()) and await btn.is_visible():
                await btn.click()
                return True
        except Exception:
            continue
    return False

async def perform_dns_lookup_async(fqdn, headless=True, screenshot_dir='.', timeout_ms=30000):
    """
    Async DNS lookup using Playwright async API.
    """
    screenshot_dir = Path(screenshot_dir)
    screenshot_dir.mkdir(parents=True, exist_ok=True)
    screenshot_path = screenshot_dir / f"{fqdn.replace('.', '_')}_dns_results.png"
    raw_output = None

    async with async_playwright() as playwright:
        browser = await playwright.chromium.launch(headless=headless)
        page = await browser.new_page()
        try:
            await page.goto('https://toolbox.googleapps.com/apps/dig/#ANY/', timeout=timeout_ms)
            await page.wait_for_load_state('networkidle', timeout=timeout_ms)

            try:
                domain_input = await _find_domain_input_async(page)
                try:
                    await domain_input.fill('')
                except Exception:
                    pass
                await domain_input.fill(fqdn)
            except Exception:
                # fallback JS set
                await page.evaluate("() => { const i = document.querySelector('input'); if(i) i.value = ''; }")
                await page.evaluate(f"() => {{ const i = document.querySelector('input'); if(i) i.value = '{fqdn}'; }}")

            clicked = await _click_dig_button_async(page)
            if not clicked:
                try:
                    await domain_input.press('Enter')
                except Exception:
                    pass

            # Wait for results
            try:
                await page.wait_for_selector('pre', timeout=timeout_ms)
            except AsyncTimeoutError:
                await asyncio.sleep(1.5)

            # Click raw view
            try:
                raw_view = page.get_by_text('Raw View')
                if await raw_view.is_visible():
                    await raw_view.click()
                    await asyncio.sleep(0.6)
            except Exception:
                pass

            # Extract raw output
            try:
                locator = page.locator('pre')
                if await getattr(locator, "count")():
                    raw_output = await locator.first.text_content(timeout=2000)
            except Exception:
                raw_output = None

            if not raw_output:
                for sel in ['.results', '.dig-output', '#raw', '.output', 'textarea']:
                    try:
                        loc = page.locator(sel)
                        if await getattr(loc, "count")():
                            raw_output = await loc.first.text_content(timeout=1000)
                            if raw_output:
                                break
                    except Exception:
                        continue

            # screenshot
            try:
                await page.screenshot(path=str(screenshot_path), full_page=True)
            except Exception:
                await page.screenshot(path=str(screenshot_path))

        finally:
            await browser.close()

    return {'fqdn': fqdn, 'raw_output': raw_output, 'screenshot': str(screenshot_path)}

async def perform_many_async(domains, headless=True, screenshot_dir='.', concurrency=4):
    """
    Run multiple lookups concurrently (bounded concurrency).
    """
    sem = asyncio.Semaphore(concurrency)
    results = []

    async def worker(domain):
        async with sem:
            return await perform_dns_lookup_async(domain, headless=headless, screenshot_dir=screenshot_dir)

    tasks = [asyncio.create_task(worker(d)) for d in domains]
    for t in asyncio.as_completed(tasks):
        try:
            res = await t
            results.append(res)
        except Exception as e:
            results.append({'fqdn': None, 'error': str(e)})
    return results

In [11]:
# Cell 3: Usage examples (run these cells manually in your notebook)

def lookup_sync_interactive(domain, headless=True, screenshot_dir='.'):
    """Call sync function from a notebook cell and print results nicely."""
    res = perform_dns_lookup(domain, headless=headless, screenshot_dir=screenshot_dir)
    print(f"=== Results for {res['fqdn']} ===")
    print(res['raw_output'] or "<no raw output captured>")
    print("Screenshot:", res['screenshot'])
    return res

# Async example helper:
async def _example_async_run():
    domains = ['www.apple.com', 'hsbc.co.uk']
    results = await perform_many_async(domains, headless=True, screenshot_dir='playwright_screens')
    for r in results:
        print("\n" + "="*60)
        print("Results for:", r.get('fqdn'))
        print("="*60)
        print(r.get('raw_output') or "<no raw output captured>")
        print("Screenshot:", r.get('screenshot'))

# -----------------------------------------------------------------------------
# Active example invocation (uncomment or run as-is in a notebook).
# This line will execute a synchronous lookup and write a screenshot to 'playwright_screens'.
# Set headless=True to avoid opening a visible browser window.
# -----------------------------------------------------------------------------
# Example call: change domain/headless/screenshot_dir as you like
lookup_sync_interactive('www.apple.com', headless=False, screenshot_dir='playwright_screens')

# To run the async example inside notebook, you could run:
# import asyncio
# asyncio.run(_example_async_run())

Error: It looks like you are using Playwright Sync API inside the asyncio loop.
Please use the Async API instead.