# core

> Seting up playwright and other various attributes of its object Like `Page` and `Locators`

In [None]:
#| default_exp core

In [None]:
#| hide
%load_ext autoreload
%autoreload 2
from nbdev.showdoc import *

In [None]:
#| export
from playwright.async_api import  *
from playwright_stealth import stealth_async
from typing import AsyncIterator, List, Tuple, Optional
from contextlib import asynccontextmanager
from dataclasses import dataclass
from fastcore.all import *  # For delegates
from fastcore.utils import *  # For log_args
from fastcore.xtras import * 
import traceback
from datetime import datetime
from functools import partialmethod
from html2text import HTML2Text

In [None]:
#| export 
from functools import wraps
import asyncio

def retry_async(max_retries:int=3, backoff_base:float=1.5):
    """
    Decorator for retrying an asynchronous function upon exception.
    Retries the execution of an async function a specified number of times 
    (`max_retries`) with an exponential backoff delay between attempts (`backoff_base`).
    If all attempts fail, the exception from the last attempt is raised.
    """
    def _retry(func):
        @wraps(func)
        async def wrapper(*args, **kwargs):
            for attempt in range(max_retries):
                try: return await func(*args, **kwargs)
                except Exception as e:
                    if attempt == max_retries - 1: raise e
                    await asyncio.sleep(backoff_base ** attempt)
        return wrapper
    return _retry

## setup_browser
Setup browser by opening up chrome and all the other objects

In [None]:
#| export 
@dataclass
class BrowserResources:
    pw_obj: Optional[Playwright] = None
    brow: Optional[Browser] = None
    ctx: Optional[BrowserContext] = None
    pages: List[Page] = None
    is_valid: bool = False # for error handling

class PageCreationError(Exception): pass #Raised when a page fails to be created
class BrowserCleanupError(Exception): pass #Raised when browser resources fail to cleanup

@asynccontextmanager
async def setup_browser(*args, n: int = 1, stealth: bool = False, **kwargs) -> AsyncIterator[BrowserResources]:
    """Browser context manager returns n pages and stealth for mode"""
    obj = BrowserResources()
    try:
        if n <= 0:
            raise ValueError(f"Invalid number of pages: {n}")
        
        obj.pw_obj = await async_playwright().start()
        obj.brow = await obj.pw_obj.chromium.launch(*args, **kwargs)
        obj.ctx = await obj.brow.new_context(
            ignore_https_errors=True,
            bypass_csp=True,
            accept_downloads=True,
            storage_state={'cookies': [], 'origins': []})
        obj.pages = []
        
        for _ in range(n):
            try:
                pg = await obj.ctx.new_page()
                if stealth: await stealth_async(pg)
                obj.pages.append(pg)
            except Exception as e:
                raise PageCreationError(f"Failed to create page {_}")
                
        obj.is_valid = True
        yield obj
        
    except Exception as e: raise e
        
    finally:
        try:
            if obj.pages:
                for pg in obj.pages:
                    await pg.close()
            if obj.ctx: await obj.ctx.close()
            if obj.brow: await obj.brow.close()
            if obj.pw_obj: await obj.pw_obj.stop()
        except Exception as e:
            raise BrowserCleanupError(f"Failed to cleanup browser resources: {str(e)}")

In [None]:
try:
        async with setup_browser(n=0, stealth=False) as obj:
                if obj.is_valid:
                        ...
except ValueError as e:
        assert str(e) == "Invalid number of pages: 0"

In [None]:
async with setup_browser(n=1, stealth=False) as obj:
        if obj.is_valid:
            pg = await obj.pages[0].goto('http://example.org')
            assert  pg.status == 200

## Page's Monkey patching 

In [None]:
#| export
async def _page_ready(page: Page, pause=50, timeout=5000):
    """Wait until main content of page is ready"""
    await page.wait_for_load_state('domcontentloaded')
    await page.wait_for_load_state('networkidle')
    await page.wait_for_timeout(pause)
    try: 
        await page.wait_for_selector('meta', state="attached", timeout=timeout)
    except TimeoutError:
        pass

async def _frames_ready(page: Page, pause=50, timeout=5000):
    """Wait until all visible frames are ready"""
    iframes = await page.query_selector_all('iframe:visible')
    if not iframes: 
        return
    for iframe in iframes:
        await iframe.wait_for_element_state('visible', timeout=timeout)
        await page.wait_for_timeout(pause)
        frame = await iframe.content_frame()
        if frame:
            await frame.wait_for_load_state('domcontentloaded', timeout=timeout)
            await frame.wait_for_load_state('networkidle', timeout=timeout)

async def wait_page(page: Page, pause=50, timeout=5000):
    """
    moneky patching `Page.wait_page`.
    Wait until page and frames are ready to be loaded
    """
    await _page_ready(page, pause, timeout)
    await _frames_ready(page, pause, timeout)

Page.wait = partialmethod(wait_page)

##  

In [None]:
async with setup_browser(n=1, headless=True) as obj:
        if obj.is_valid:
                pg = await obj.pages[0].goto('http://example.org')
                await obj.pages[0].wait()
                assert  pg.status == 200

In [None]:
#| export 
async def find_ele(page: Page, locator: str)->Optional[List[Locator]]:
    """
    To locate elements on a web page using a given locator and return a list of those elements. 
    Logs error if the  `locator` object is not present and returns `None`.
    """
    try:
        elements = await page.locator(locator).all()
        if elements: return elements
        
        raise ValueError(f"Element not found.")
            
    except Exception as e:
        print(f"Error find : @{page.url}  for {locator} :->  {e}  ")
    return None

Page.find_ele = partialmethod(find_ele)

In [None]:
async with setup_browser(n=1, headless=True) as obj:
    if obj.is_valid:
        await obj.pages[0].goto('https://nbdev.fast.ai/')
        await obj.pages[0].wait()
        assert len(await obj.pages[0].find_ele("//span[contains(text(), 'Blog')]") ) != 0
        assert await obj.pages[0].find_ele("//span[contains(text(), 'blah')]") is None

Error find : @https://nbdev.fast.ai/  for //span[contains(text(), 'blah')] :->  Element not found.  


In [None]:
#| export 
from  urllib.parse import unquote
LINK_SELECTORS = {
    'a': '[href]',
    'iframe': '[src]',
    'frame': '[src]',
    'embed': '[src]',
    'object': '[data]',
    'img': '[src]',
    'link': '[href]',
    'script': '[src]',
    'area': '[href]',
    'video': '[src]'
}
async def find_all_links(page: Page):
    
    all_elements = page.locator(', '.join(LINK_SELECTORS))
    
    # Get attributes
    links = await all_elements.evaluate_all('''
        elements => elements.map(el => ({
            type: el.tagName.toLowerCase(),
            url: el.href || el.src || el.data || null
        }))
    ''')
    return [unquote(dic['url']) for dic in links if dic['url'] ]

Page.all_links = partialmethod(find_all_links)

In [None]:
async with setup_browser(n=1, headless=True) as obj:
        if obj.is_valid:
            pg = obj.pages[0]
            await pg.goto('https://nbdev.fast.ai/',timeout= 10000)
            await pg.wait(10000)

            links = await find_all_links(obj.pages[0])
            assert type(links) == list

## Locator's Monkey patching 

In [None]:
#| export 
async def left_click(element:Locator, timeout=5000):
    """for a given element it performs single click operation """
    try:
        await element.wait_for(state='visible', timeout=timeout)
        await element.scroll_into_view_if_needed() # Ensure element is clickable
        await element.click()
        return True
    except Exception as e:
        print(f"Click attempt failed: {e}")
    return False
Locator.left_click = partialmethod(left_click)

In [None]:
url = 'https://solveit.fast.ai/'
async with setup_browser(n=1, headless=False) as obj:
    if obj.is_valid:
        await obj.pages[0].goto(url)
        await obj.pages[0].wait()
        loc = await obj.pages[0].find_ele("//a[contains(text(), 'Course Details')]") 
        assert len(loc) != 0
                        
        await loc[0].left_click()
        assert url != obj.pages[0].url

In [None]:
#| export 
async def enter_txt(element: Locator, text: str, timeout: int = 5000) -> bool:
    """For a given element of type `Locator`, it types the specified text"""
    try:
        await element.wait_for(state='visible', timeout=timeout)
        await element.scroll_into_view_if_needed()
        await element.focus()
        await element.fill('')  # Clear existing text
        await element.fill(text)
        return True
    except Exception as e:
        print(f"Type operation failed: {e}")
        return False

Locator.enter_txt = partialmethod(enter_txt)

In [None]:
async with setup_browser(n=1, headless=True) as obj:
    if obj.is_valid:
        await obj.pages[0].goto('https://nbdev.fast.ai/')
        await obj.pages[0].wait()
        loc = await obj.pages[0].find_ele("//button[1]") 
        assert len(loc) != 0
                        
        await loc[0].left_click()
        await obj.pages[0].wait()
        inp =  await obj.pages[0].find_ele("//input") 
        await inp[0].left_click()
        await inp[0].enter_txt("type_text")

        assert  await inp[0].input_value() == "type_text"

In [None]:
#| export 
async def get_text(element: Locator, timeout: int = 5000) -> Optional[str]:
    """Gets the text content of an `Locator` element"""
    try:
        await element.wait_for(state='visible', timeout=timeout)
        return await element.text_content()
    except Exception as e:
        print(f"Get text failed: {e}")
        return None

Locator.get_text = partialmethod(get_text)

In [None]:
async with setup_browser(n=1, headless=True) as obj:
    if obj.is_valid:
        await obj.pages[0].goto('https://nbdev.fast.ai/')
        await obj.pages[0].wait()
        loc = await obj.pages[0].find_ele('//a[@role="button"][1]') 
        assert len(loc) != 0
                        
        assert await loc[0].get_text() == "Get started"

In [1]:
#| hide
import nbdev; nbdev.nbdev_export()

Note nbdev2 no longer supports nbdev1 syntax. Run `nbdev_migrate` to upgrade.
See https://nbdev.fast.ai/getting_started.html for more information.
  warn(f"Notebook '{nbname}' uses `#|export` without `#|default_exp` cell.\n"
