diff --git a/examples/cdp_mode/playwright/raw_seatgeek_sync.py b/examples/cdp_mode/playwright/raw_seatgeek_sync.py new file mode 100644 index 00000000000..5f64a307ab3 --- /dev/null +++ b/examples/cdp_mode/playwright/raw_seatgeek_sync.py @@ -0,0 +1,24 @@ +from playwright.sync_api import sync_playwright +from seleniumbase import sb_cdp + +sb = sb_cdp.Chrome(locale="en", ad_block=True) +endpoint_url = sb.get_endpoint_url() + +with sync_playwright() as p: + browser = p.chromium.connect_over_cdp(endpoint_url) + context = browser.contexts[0] + page = context.pages[0] + page.goto("https://seatgeek.com/") + input_field = 'input[name="search"]' + page.wait_for_selector(input_field) + sb.sleep(1.6) + query = "Jerry Seinfeld" + sb.press_keys(input_field, query) + sb.sleep(1.6) + page.click("li#active-result-item") + sb.sleep(4.2) + print('*** SeatGeek Search for "%s":' % query) + items = page.locator('[data-testid="listing-item"]') + for i in range(items.count()): + item_text = items.nth(i).inner_text() + print(item_text.replace("\n\n", "\n")) diff --git a/examples/cdp_mode/raw_async.py b/examples/cdp_mode/raw_async.py index 745070feb4b..0b42701b86d 100644 --- a/examples/cdp_mode/raw_async.py +++ b/examples/cdp_mode/raw_async.py @@ -6,7 +6,7 @@ async def main(): - url = "seleniumbase.io/simple/login" + url = "https://seleniumbase.io/simple/login" driver = await cdp_driver.start_async() page = await driver.get(url, lang="en") print(await page.get_title()) diff --git a/examples/cdp_mode/raw_basic_async.py b/examples/cdp_mode/raw_basic_async.py index 5c3c1f6c536..bbba2e8d26a 100644 --- a/examples/cdp_mode/raw_basic_async.py +++ b/examples/cdp_mode/raw_basic_async.py @@ -4,7 +4,7 @@ async def main(): - url = "seleniumbase.io/simple/login" + url = "https://seleniumbase.io/simple/login" driver = await cdp_driver.start_async() page = await driver.get(url, lang="en") print(await page.get_title()) @@ -21,7 +21,6 @@ async def main(): driver.stop() if __name__ == "__main__": - # Call an async function with awaited methods loop = asyncio.new_event_loop() with decorators.print_runtime("raw_basic_async.py"): loop.run_until_complete(main()) diff --git a/examples/cdp_mode/raw_basic_cdp.py b/examples/cdp_mode/raw_basic_cdp.py new file mode 100644 index 00000000000..126ddcb8360 --- /dev/null +++ b/examples/cdp_mode/raw_basic_cdp.py @@ -0,0 +1,17 @@ +from seleniumbase import sb_cdp + +url = "https://seleniumbase.io/simple/login" +sb = sb_cdp.Chrome(url) +sb.type("#username", "demo_user") +sb.type("#password", "secret_pass") +sb.click('a:contains("Sign in")') +sb.assert_exact_text("Welcome!", "h1") +sb.assert_element("img#image1") +sb.highlight("#image1") +top_nav = sb.find_element("div.topnav") +links = top_nav.query_selector_all("a") +for nav_item in links: + print(nav_item.text) +sb.click_link("Sign out") +sb.assert_text("signed out", "#top_message") +sb.driver.stop() diff --git a/examples/cdp_mode/raw_cdp_login.py b/examples/cdp_mode/raw_cdp_login.py new file mode 100644 index 00000000000..062a994defc --- /dev/null +++ b/examples/cdp_mode/raw_cdp_login.py @@ -0,0 +1,25 @@ +from seleniumbase import decorators +from seleniumbase import sb_cdp + + +def main(): + url = "https://seleniumbase.io/simple/login" + sb = sb_cdp.Chrome(url) + sb.type("#username", "demo_user") + sb.type("#password", "secret_pass") + sb.click('a:contains("Sign in")') + sb.assert_exact_text("Welcome!", "h1") + sb.assert_element("img#image1") + sb.highlight("#image1") + top_nav = sb.find_element("div.topnav") + links = top_nav.query_selector_all("a") + for nav_item in links: + print(nav_item.text) + sb.click_link("Sign out") + sb.assert_text("signed out", "#top_message") + sb.driver.stop() + + +if __name__ == "__main__": + with decorators.print_runtime("raw_cdp_login.py"): + main() diff --git a/examples/cdp_mode/raw_cf.py b/examples/cdp_mode/raw_cf.py index 5be45bbdbd3..79dc5339a23 100644 --- a/examples/cdp_mode/raw_cf.py +++ b/examples/cdp_mode/raw_cf.py @@ -4,13 +4,13 @@ with SB(uc=True, test=True, locale="en", guest=True) as sb: url = "https://www.cloudflare.com/login" sb.activate_cdp_mode(url) - sb.sleep(4) + sb.sleep(3) sb.uc_gui_handle_captcha() # PyAutoGUI press Tab and Spacebar - sb.sleep(2) + sb.sleep(3) with SB(uc=True, test=True, locale="en", guest=True) as sb: url = "https://www.cloudflare.com/login" sb.activate_cdp_mode(url) sb.sleep(4) sb.uc_gui_click_captcha() # PyAutoGUI click. (Linux needs it) - sb.sleep(2) + sb.sleep(3) diff --git a/examples/cdp_mode/raw_cf_captcha.py b/examples/cdp_mode/raw_cf_captcha.py new file mode 100644 index 00000000000..7c91735dd03 --- /dev/null +++ b/examples/cdp_mode/raw_cf_captcha.py @@ -0,0 +1,8 @@ +from seleniumbase import SB + +with SB(uc=True, test=True, guest=True) as sb: + url = "https://www.cloudflare.com/login" + sb.activate_cdp_mode(url) + sb.sleep(3) + sb.solve_captcha() + sb.sleep(3) diff --git a/examples/cdp_mode/raw_homedepot.py b/examples/cdp_mode/raw_homedepot.py new file mode 100644 index 00000000000..9cba9eb94f1 --- /dev/null +++ b/examples/cdp_mode/raw_homedepot.py @@ -0,0 +1,34 @@ +from seleniumbase import SB + +with SB(uc=True, test=True, ad_block=True) as sb: + url = "https://www.homedepot.com/" + sb.activate_cdp_mode(url) + sb.sleep(1.8) + search_box = "input#typeahead-search-field-input" + search = "Computer Chair" + category = "Gaming Chairs" + required_text = "Chair" + sb.click(search_box) + sb.sleep(1.2) + sb.press_keys(search_box, search) + sb.sleep(0.6) + sb.click("button#typeahead-search-icon-button") + sb.sleep(3.8) + sb.click('a[aria-label="%s"]' % category) + sb.sleep(3.2) + print('*** Home Depot Search for "%s":' % search) + print(' (Results must contain "%s".)' % required_text) + unique_item_text = [] + items = sb.find_elements('div[data-testid="product-pod"]') + for item in items: + if required_text in item.text: + description = item.querySelector( + 'span[data-testid="attribute-product-label"]' + ) + if description and description.text not in unique_item_text: + unique_item_text.append(description.text) + print("* " + description.text) + price = item.querySelector('[class*="sm:sui-text-4xl"]') + if price: + price_text = "$%s" % price.text + print(" (" + price_text + ")") diff --git a/examples/cdp_mode/raw_mobile_async.py b/examples/cdp_mode/raw_mobile_async.py index dd1fc72d9f5..a6bfd024bbe 100644 --- a/examples/cdp_mode/raw_mobile_async.py +++ b/examples/cdp_mode/raw_mobile_async.py @@ -25,7 +25,6 @@ async def main(): driver.stop() if __name__ == "__main__": - # Call an async function with awaited methods loop = asyncio.new_event_loop() with decorators.print_runtime("raw_mobile_async.py"): loop.run_until_complete(main()) diff --git a/examples/cdp_mode/raw_priceline.py b/examples/cdp_mode/raw_priceline.py index 1ee38865fc9..3703bf4a8c7 100644 --- a/examples/cdp_mode/raw_priceline.py +++ b/examples/cdp_mode/raw_priceline.py @@ -1,12 +1,12 @@ from seleniumbase import SB -with SB(uc=True, test=True, locale="en", ad_block=True) as sb: +with SB(uc=True, test=True, locale="en", incognito=True) as sb: url = "https://www.priceline.com" sb.activate_cdp_mode(url) sb.sleep(2.5) sb.click('input[name="endLocation"]') sb.sleep(1.2) - location = "Portland, Oregon, US" + location = "Portland, OR" selection = "Oregon, United States" # (Dropdown option) sb.press_keys('input[name="endLocation"]', location) sb.sleep(1.5) diff --git a/examples/cdp_mode/raw_softpedia.py b/examples/cdp_mode/raw_softpedia.py new file mode 100644 index 00000000000..2777ab13202 --- /dev/null +++ b/examples/cdp_mode/raw_softpedia.py @@ -0,0 +1,26 @@ +from seleniumbase import SB + +with SB(uc=True, test=True, ad_block=True) as sb: + url = "https://www.softpedia.com/" + sb.activate_cdp_mode(url) + search_box = 'input[name="search_term"]' + search = "3D Model Lab" + sb.click(search_box) + sb.press_keys(search_box, search + "\n") + sb.sleep(2) + sb.remove_elements("#adcontainer1") + sb.sleep(2.5) + print('*** Softpedia Search for "%s":' % search) + links = [] + item_container = 'div[style="min-height:100px;"]' + sb.wait_for_element(item_container) + items = sb.find_elements(item_container) + for item in items: + result = item.querySelector("h4 a") + links.append(result.get_attribute("href")) + print("* " + result.text) + print(item.querySelector("p").get_attribute("title")) + for link in links: + sb.open(link) + sb.remove_elements("div.ad") + sb.sleep(2) diff --git a/help_docs/syntax_formats.md b/help_docs/syntax_formats.md index 9fe32f5381d..26013ccb530 100644 --- a/help_docs/syntax_formats.md +++ b/help_docs/syntax_formats.md @@ -32,8 +32,8 @@
async/await API is used. Here's an example:
```python
import asyncio
@@ -1053,9 +1053,33 @@ if __name__ == "__main__":
(See examples/cdp_mode/raw_basic_async.py for the test.)
-sb_cdp Sync API is used. Here's an example:
+
+```python
+from seleniumbase import sb_cdp
+
+url = "https://seleniumbase.io/simple/login"
+sb = sb_cdp.Chrome(url)
+sb.type("#username", "demo_user")
+sb.type("#password", "secret_pass")
+sb.click('a:contains("Sign in")')
+sb.assert_exact_text("Welcome!", "h1")
+sb.assert_element("img#image1")
+sb.highlight("#image1")
+top_nav = sb.find_element("div.topnav")
+links = top_nav.query_selector_all("a")
+for nav_item in links:
+ print(nav_item.text)
+sb.click_link("Sign out")
+sb.assert_text("signed out", "#top_message")
+sb.driver.stop()
+```
+
+(See examples/cdp_mode/raw_basic_cdp.py for the test.)
+
+Here's a Pure CDP Mode example that bypasses bot-detection to scrape data from a website:
```python
from seleniumbase import sb_cdp
diff --git a/mkdocs_build/requirements.txt b/mkdocs_build/requirements.txt
index 51eb1b0a35a..8e672b613e6 100644
--- a/mkdocs_build/requirements.txt
+++ b/mkdocs_build/requirements.txt
@@ -2,7 +2,7 @@
# Minimum Python version: 3.10 (for generating docs only)
regex>=2025.11.3
-pymdown-extensions>=10.18
+pymdown-extensions>=10.19
pipdeptree>=2.30.0
python-dateutil>=2.8.2
Markdown==3.10
diff --git a/seleniumbase/__version__.py b/seleniumbase/__version__.py
index 505895bdc30..6c126ba3490 100755
--- a/seleniumbase/__version__.py
+++ b/seleniumbase/__version__.py
@@ -1,2 +1,2 @@
# seleniumbase package
-__version__ = "4.45.1"
+__version__ = "4.45.2"
diff --git a/seleniumbase/core/browser_launcher.py b/seleniumbase/core/browser_launcher.py
index bfcd9bd273e..24e931a8367 100644
--- a/seleniumbase/core/browser_launcher.py
+++ b/seleniumbase/core/browser_launcher.py
@@ -2769,6 +2769,9 @@ def _set_chrome_options(
included_disabled_features.append("SidePanelPinning")
included_disabled_features.append("UserAgentClientHint")
included_disabled_features.append("DisableLoadExtensionCommandLineSwitch")
+ included_disabled_features.append("Bluetooth")
+ included_disabled_features.append("WebBluetooth")
+ included_disabled_features.append("UnifiedWebBluetooth")
included_disabled_features.append("WebAuthentication")
included_disabled_features.append("PasskeyAuth")
for item in extra_disabled_features:
@@ -4782,6 +4785,11 @@ def get_local_driver(
included_disabled_features.append(
"DisableLoadExtensionCommandLineSwitch"
)
+ included_disabled_features.append("Bluetooth")
+ included_disabled_features.append("WebBluetooth")
+ included_disabled_features.append("UnifiedWebBluetooth")
+ included_disabled_features.append("WebAuthentication")
+ included_disabled_features.append("PasskeyAuth")
for item in extra_disabled_features:
if item not in included_disabled_features:
included_disabled_features.append(item)
diff --git a/seleniumbase/core/sb_cdp.py b/seleniumbase/core/sb_cdp.py
index aad1d27ad0c..8e0dc607292 100644
--- a/seleniumbase/core/sb_cdp.py
+++ b/seleniumbase/core/sb_cdp.py
@@ -184,9 +184,13 @@ def get_rd_url(self):
and also applies nest-asyncio for nested event loops so
that SeleniumBase methods can be called from Playwright
without encountering event loop error messages such as:
- Cannot run the event loop while another loop is running."""
+ Cannot run the event loop while another loop is running.
+ Also sets an environment variable to hide this warning:
+ Deprecation: "url.parse() behavior is not standardized".
+ (github.com/microsoft/playwright-python/issues/3016)"""
import nest_asyncio
nest_asyncio.apply()
+ os.environ["NODE_NO_WARNINGS"] = "1"
driver = self.driver
if hasattr(driver, "cdp_base"):
driver = driver.cdp_base
diff --git a/seleniumbase/undetected/cdp_driver/browser.py b/seleniumbase/undetected/cdp_driver/browser.py
index 58e7fc96434..a375eb34734 100644
--- a/seleniumbase/undetected/cdp_driver/browser.py
+++ b/seleniumbase/undetected/cdp_driver/browser.py
@@ -633,7 +633,7 @@ async def start(self=None) -> Browser:
""" % (dashes, message, dashes)
)
self.connection = Connection(
- self.info.webSocketDebuggerUrl, _owner=self
+ self.info.webSocketDebuggerUrl, browser=self
)
if self.config.autodiscover_targets:
logger.info("Enabling autodiscover targets")
@@ -807,7 +807,7 @@ async def update_targets(self):
f"/{t.target_id}"
),
target=t,
- _owner=self,
+ browser=self,
)
)
await asyncio.sleep(0)
diff --git a/seleniumbase/undetected/cdp_driver/cdp_util.py b/seleniumbase/undetected/cdp_driver/cdp_util.py
index 6bb92657f4c..8b75d26ae69 100644
--- a/seleniumbase/undetected/cdp_driver/cdp_util.py
+++ b/seleniumbase/undetected/cdp_driver/cdp_util.py
@@ -719,18 +719,6 @@ def start_sync(*args, **kwargs) -> Browser:
loop = kwargs["loop"]
else:
loop = asyncio.new_event_loop()
- if "user_data_dir" in kwargs and kwargs["user_data_dir"]:
- headless = False
- if "headless" in kwargs:
- headless = kwargs["headless"]
- decoy_args = kwargs
- decoy_args["headless"] = True
- driver = loop.run_until_complete(start(**decoy_args))
- kwargs["headless"] = headless
- kwargs["user_data_dir"] = driver.config.user_data_dir
- time.sleep(0.2)
- driver.stop() # Due to Chrome-130, must stop & start
- time.sleep(0.1)
return loop.run_until_complete(start(*args, **kwargs))
diff --git a/seleniumbase/undetected/cdp_driver/config.py b/seleniumbase/undetected/cdp_driver/config.py
index 1f78f1148ed..31a29a1bd13 100644
--- a/seleniumbase/undetected/cdp_driver/config.py
+++ b/seleniumbase/undetected/cdp_driver/config.py
@@ -206,6 +206,7 @@ def __call__(self):
"OptimizationTargetPrediction,OptimizationGuideModelDownloading,"
"SidePanelPinning,UserAgentClientHint,PrivacySandboxSettings4,"
"OptimizationHintsFetching,InterestFeedContentSuggestions,"
+ "Bluetooth,WebBluetooth,UnifiedWebBluetooth,"
"DisableLoadExtensionCommandLineSwitch,"
"WebAuthentication,PasskeyAuth"
]
diff --git a/seleniumbase/undetected/cdp_driver/connection.py b/seleniumbase/undetected/cdp_driver/connection.py
index 35c83c22efb..3d192749112 100644
--- a/seleniumbase/undetected/cdp_driver/connection.py
+++ b/seleniumbase/undetected/cdp_driver/connection.py
@@ -184,13 +184,13 @@ def __init__(
self,
websocket_url=None,
target=None,
- _owner=None,
+ browser=None,
**kwargs,
):
super().__init__()
self._target = target
self.__count__ = itertools.count(0)
- self._owner = _owner
+ self.browser = browser
self.websocket_url: str = websocket_url
self.websocket = None
self.mapper = {}
@@ -426,8 +426,8 @@ async def send(
await self.aopen()
if not self.websocket or self.websocket.state is State.CLOSED:
return
- if self._owner:
- browser = self._owner
+ if self.browser:
+ browser = self.browser
if browser.config:
if browser.config.expert:
await self._prepare_expert()
@@ -610,11 +610,11 @@ async def listener_loop(self):
# Probably an event
try:
event = cdp.util.parse_json_event(message)
- event_tx = EventTransaction(event)
- if not self.connection.mapper:
- self.connection.__count__ = itertools.count(0)
- event_tx.id = next(self.connection.__count__)
- self.connection.mapper[event_tx.id] = event_tx
+ # event_tx = EventTransaction(event)
+ # if not self.connection.mapper:
+ # self.connection.__count__ = itertools.count(0)
+ # event_tx.id = next(self.connection.__count__)
+ # self.connection.mapper[event_tx.id] = event_tx
except Exception as e:
logger.info(
"%s: %s during parsing of json from event : %s"
@@ -639,12 +639,12 @@ async def listener_loop(self):
or inspect.iscoroutine(callback)
):
try:
- await callback(event, self.connection)
+ asyncio.create_task(callback(event, self))
except TypeError:
- await callback(event)
+ asyncio.create_task(callback(event))
else:
try:
- callback(event, self.connection)
+ callback(event, self)
except TypeError:
callback(event)
except Exception as e: