From c7cf942e81af23b816242361cde13fd4989ef9c3 Mon Sep 17 00:00:00 2001 From: Din Date: Thu, 4 Sep 2025 21:20:07 +0100 Subject: [PATCH 01/12] wip (attempt 0): don't enable rrweb if zone detected --- src/lmnr/sdk/browser/browser_use_cdp_otel.py | 20 +- src/lmnr/sdk/browser/cdp_utils.py | 259 +++++++++++++------ 2 files changed, 201 insertions(+), 78 deletions(-) diff --git a/src/lmnr/sdk/browser/browser_use_cdp_otel.py b/src/lmnr/sdk/browser/browser_use_cdp_otel.py index 910d63a6..4b4f5d04 100644 --- a/src/lmnr/sdk/browser/browser_use_cdp_otel.py +++ b/src/lmnr/sdk/browser/browser_use_cdp_otel.py @@ -1,3 +1,6 @@ +import asyncio +import uuid + from lmnr.sdk.client.asynchronous.async_client import AsyncLaminarClient from lmnr.sdk.browser.utils import with_tracer_and_client_wrapper from lmnr.version import __version__ @@ -12,7 +15,6 @@ from opentelemetry.trace import get_tracer, Tracer from typing import Collection from wrapt import wrap_function_wrapper -import uuid # Stable versions, e.g. 0.6.0, satisfy this condition too _instruments = ("browser-use >= 0.6.0rc1",) @@ -33,11 +35,9 @@ ] -@with_tracer_and_client_wrapper -async def _wrap( - tracer: Tracer, client: AsyncLaminarClient, to_wrap, wrapped, instance, args, kwargs -): - result = await wrapped(*args, **kwargs) +async def process_wrapped_result(result, instance, client, to_wrap): + if not instance._cached_browser_state_summary: + return if to_wrap.get("action") == "inject_session_recorder": is_registered = await is_recorder_present(result) @@ -50,6 +50,14 @@ async def _wrap( cdp_session = await instance.get_or_create_cdp_session(target_id) await take_full_snapshot(cdp_session) + +@with_tracer_and_client_wrapper +async def _wrap( + tracer: Tracer, client: AsyncLaminarClient, to_wrap, wrapped, instance, args, kwargs +): + result = await wrapped(*args, **kwargs) + asyncio.create_task(process_wrapped_result(result, instance, client, to_wrap)) + return result diff --git a/src/lmnr/sdk/browser/cdp_utils.py b/src/lmnr/sdk/browser/cdp_utils.py index a8136b67..d8e82269 100644 --- a/src/lmnr/sdk/browser/cdp_utils.py +++ b/src/lmnr/sdk/browser/cdp_utils.py @@ -458,51 +458,69 @@ }); return base64url.slice(base64url.indexOf(',') + 1); } - - window.lmnrRrweb.record({ - async emit(event) { - try { - const isLarge = isLargeEvent(event.type); - const compressedResult = isLarge ? - await compressLargeObject(event.data) : - await compressSmallObject(event.data); - - const base64Data = await bufferToBase64(compressedResult); - const eventToSend = { - ...event, - data: base64Data, - }; - window.lmnrRrwebEventsBatch.push(eventToSend); - } catch (error) { - console.warn('Failed to push event to batch', error); + + if (!window.lmnrStartedRecordingEvents) { + const detectionResults = { + hasZoneJs: !!(window.Zone || Object.keys(window).some(k => k.startsWith('__zone_symbol__'))), + mutationObserverState: { + current: window.MutationObserver, + isNative: window.MutationObserver.toString().includes('[native code]'), + hasZoneSymbols: false, + originalFound: null, + restorationMethod: null } - }, - recordCanvas: true, - collectFonts: true, - recordCrossOriginIframes: true, - maskInputOptions: { - password: true, - textarea: maskInputOptions.textarea || false, - text: maskInputOptions.text || false, - number: maskInputOptions.number || false, - select: maskInputOptions.select || false, - email: maskInputOptions.email || false, - tel: maskInputOptions.tel || false, + }; + + if (detectionResults.hasZoneJs) { + console.warn('Zone.js detected, skipping session recorder'); + return; } - }); - - function heartbeat() { - // Add heartbeat events - setInterval(() => { - window.lmnrRrweb.record.addCustomEvent('heartbeat', { - title: document.title, - url: document.URL, - }) - }, HEARTBEAT_INTERVAL - ); - } + window.lmnrRrweb.record({ + async emit(event) { + try { + const isLarge = isLargeEvent(event.type); + const compressedResult = isLarge ? + await compressLargeObject(event.data) : + await compressSmallObject(event.data); + + const base64Data = await bufferToBase64(compressedResult); + const eventToSend = { + ...event, + data: base64Data, + }; + window.lmnrRrwebEventsBatch.push(eventToSend); + } catch (error) { + console.warn('Failed to push event to batch', error); + } + }, + recordCanvas: true, + collectFonts: true, + recordCrossOriginIframes: true, + maskInputOptions: { + password: true, + textarea: maskInputOptions.textarea || false, + text: maskInputOptions.text || false, + number: maskInputOptions.number || false, + select: maskInputOptions.select || false, + email: maskInputOptions.email || false, + tel: maskInputOptions.tel || false, + } + }); - heartbeat(); + function heartbeat() { + // Add heartbeat events + setInterval(() => { + window.lmnrRrweb.record.addCustomEvent('heartbeat', { + title: document.title, + url: document.URL, + }) + }, HEARTBEAT_INTERVAL + ); + } + + heartbeat(); + window.lmnrStartedRecordingEvents = true; + } } """ @@ -534,10 +552,96 @@ def get_mask_input_setting() -> MaskInputOptions: ) -# browser_use.browser.session.CDPSession (browser-use >= 1.0.0) +async def is_error_page(cdp_session): + cdp_client = cdp_session.cdp_client + + try: + # Get the current page URL + result = await cdp_client.send.Runtime.evaluate( + { + "expression": "window.location.href", + "returnByValue": True, + }, + session_id=cdp_session.session_id, + ) + + url = result.get("result", {}).get("value", "") + + # Comprehensive list of browser error URLs + error_url_patterns = [ + # Chrome error pages + "chrome-error://", + "chrome://network-error/", + "chrome://network-errors/", + # Chrome crash and debugging pages + "chrome://crash/", + "chrome://crashdump/", + "chrome://kill/", + "chrome://hang/", + "chrome://shorthang/", + "chrome://gpuclean/", + "chrome://gpucrash/", + "chrome://gpuhang/", + "chrome://memory-exhaust/", + "chrome://memory-pressure-critical/", + "chrome://memory-pressure-moderate/", + "chrome://inducebrowsercrashforrealz/", + "chrome://inducebrowserdcheckforrealz/", + "chrome://inducebrowserheapcorruption/", + "chrome://heapcorruptioncrash/", + "chrome://badcastcrash/", + "chrome://ppapiflashcrash/", + "chrome://ppapiflashhang/", + "chrome://quit/", + "chrome://restart/", + # Firefox error pages + "about:neterror", + "about:certerror", + "about:blocked", + # Firefox crash and debugging pages + "about:crashcontent", + "about:crashparent", + "about:crashes", + "about:tabcrashed", + # Edge error pages (similar to Chrome) + "edge-error://", + "edge://crash/", + "edge://kill/", + "edge://hang/", + # Safari/WebKit error indicators (data URLs with error content) + "webkit-error://", + ] + + # Check if current URL matches any error pattern + if any(url.startswith(pattern) for pattern in error_url_patterns): + logger.debug(f"Detected browser error page from URL: {url}") + return True + + # Additional check for data URLs that might contain error pages + if url.startswith("data:") and any( + error_term in url.lower() + for error_term in ["error", "crash", "failed", "unavailable", "not found"] + ): + logger.debug(f"Detected error page from data URL: {url[:100]}...") + return True + + except Exception as e: + logger.debug(f"Error during session recorder injection: {e}") + return False + + +# browser_use.browser.session.CDPSession (browser-use >= 0.6.0) async def inject_session_recorder(cdp_session): cdp_client = cdp_session.cdp_client try: + # Check if this is an error page - if so, don't inject the recorder + try: + if await is_error_page(cdp_session): + logger.debug("Skipping session recorder injection on error page") + return + except Exception as e: + logger.debug(f"Failed to check if page is error page: {e}") + try: is_loaded = await is_recorder_present(cdp_session) except Exception as e: @@ -549,12 +653,14 @@ async def inject_session_recorder(cdp_session): async def load_session_recorder(): try: - await cdp_client.send.Runtime.evaluate( - { - "expression": f"({RRWEB_CONTENT})()", - "awaitPromise": True, - }, - session_id=cdp_session.session_id, + await asyncio.wait_for( + cdp_client.send.Runtime.evaluate( + { + "expression": f"({RRWEB_CONTENT})()", + }, + session_id=cdp_session.session_id, + ), + timeout=2.5, ) return True except Exception as e: @@ -563,26 +669,29 @@ async def load_session_recorder(): if not await retry_async( load_session_recorder, + retries=3, delay=1, - error_message="Failed to load session recorder", + error_message="Failed to load session recorder processor", ): return try: - await cdp_client.send.Runtime.evaluate( - { - "expression": f"({INJECT_PLACEHOLDER})({orjson.dumps(get_mask_input_setting()).decode("utf-8")})", - }, - session_id=cdp_session.session_id, + await asyncio.create_task( + cdp_client.send.Runtime.evaluate( + { + "expression": f"({INJECT_PLACEHOLDER})({orjson.dumps(get_mask_input_setting()).decode('utf-8')})", + }, + session_id=cdp_session.session_id, + ) ) except Exception as e: - logger.debug(f"Failed to inject session recorder placeholder: {e}") + logger.debug(f"Failed to inject recorder processor: {e}") except Exception as e: - logger.error(f"Error during session recorder injection: {e}") + logger.debug(f"Error during session recorder injection: {e}") -# browser_use.browser.session.CDPSession (browser-use >= 1.0.0) +# browser_use.browser.session.CDPSession (browser-use >= 0.6.0) @observe(name="cdp_use.session", ignore_input=True, ignore_output=True) async def start_recording_events( cdp_session, @@ -668,7 +777,7 @@ async def send_events_callback(event, cdp_session_id: str | None = None): register_on_target_created(cdp_session, lmnr_session_id, client) -# browser_use.browser.session.CDPSession (browser-use >= 1.0.0) +# browser_use.browser.session.CDPSession (browser-use >= 0.6.0) async def enable_target_discovery(cdp_session): cdp_client = cdp_session.cdp_client await cdp_client.send.Target.setDiscoverTargets( @@ -679,7 +788,7 @@ async def enable_target_discovery(cdp_session): ) -# browser_use.browser.session.CDPSession (browser-use >= 1.0.0) +# browser_use.browser.session.CDPSession (browser-use >= 0.6.0) def register_on_target_created( cdp_session, lmnr_session_id: str, client: AsyncLaminarClient ): @@ -692,21 +801,27 @@ def on_target_created(event, cdp_session_id: str | None = None): cdp_session.cdp_client.register.Target.targetCreated(on_target_created) -# browser_use.browser.session.CDPSession (browser-use >= 1.0.0) +# browser_use.browser.session.CDPSession (browser-use >= 0.6.0) async def is_recorder_present(cdp_session) -> bool: cdp_client = cdp_session.cdp_client - result = await cdp_client.send.Runtime.evaluate( - { - "expression": """(()=>{ - return typeof window.lmnrRrweb !== 'undefined'; - })()""", - }, - session_id=cdp_session.session_id, - ) - if result and "result" in result and "value" in result["result"]: - return result["result"]["value"] - return False + try: + result = await asyncio.wait_for( + cdp_client.send.Runtime.evaluate( + { + "expression": "typeof window.lmnrRrweb !== 'undefined'", + }, + session_id=cdp_session.session_id, + ), + timeout=1, + ) + if result and "result" in result and "value" in result["result"]: + return result["result"]["value"] + return False + except asyncio.TimeoutError: + return True + except Exception: + return True async def take_full_snapshot(cdp_session): From fcb5a112ad2692a7a20ddc97617414bb4058aa38 Mon Sep 17 00:00:00 2001 From: Din Date: Fri, 5 Sep 2025 00:27:08 +0100 Subject: [PATCH 02/12] wip (attempt 1): isolated world --- src/lmnr/sdk/browser/cdp_utils.py | 110 +++++++++--------------------- 1 file changed, 31 insertions(+), 79 deletions(-) diff --git a/src/lmnr/sdk/browser/cdp_utils.py b/src/lmnr/sdk/browser/cdp_utils.py index d8e82269..1588cfe0 100644 --- a/src/lmnr/sdk/browser/cdp_utils.py +++ b/src/lmnr/sdk/browser/cdp_utils.py @@ -1,3 +1,4 @@ +import uuid import orjson import logging import os @@ -17,6 +18,8 @@ OLD_BUFFER_TIMEOUT = 60 +frame_to_isolated_context_id = {} + current_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(current_dir, "recorder", "record.umd.min.cjs"), "r") as f: RRWEB_CONTENT = f"() => {{ {f.read()} }}" @@ -552,96 +555,36 @@ def get_mask_input_setting() -> MaskInputOptions: ) -async def is_error_page(cdp_session): - cdp_client = cdp_session.cdp_client +async def get_isolated_context_id(cdp_session): + tree = await cdp_session.cdp_client.send.Page.getFrameTree( + session_id=cdp_session.session_id + ) + frame = tree.get("frameTree", {}).get("frame", {}) + frame_id = frame.get("id", str(uuid.uuid4())) + loader_id = frame.get("loaderI", str(uuid.uuid4())) + url = frame.get("url", "about:blank") - try: - # Get the current page URL - result = await cdp_client.send.Runtime.evaluate( + key = f"{frame_id}_{loader_id}_{url}" + + if key not in frame_to_isolated_context_id: + result = await cdp_session.cdp_client.send.Page.createIsolatedWorld( { - "expression": "window.location.href", - "returnByValue": True, + "frameId": frame_id, + "worldName": "laminar-isolated-context", }, session_id=cdp_session.session_id, ) - - url = result.get("result", {}).get("value", "") - - # Comprehensive list of browser error URLs - error_url_patterns = [ - # Chrome error pages - "chrome-error://", - "chrome://network-error/", - "chrome://network-errors/", - # Chrome crash and debugging pages - "chrome://crash/", - "chrome://crashdump/", - "chrome://kill/", - "chrome://hang/", - "chrome://shorthang/", - "chrome://gpuclean/", - "chrome://gpucrash/", - "chrome://gpuhang/", - "chrome://memory-exhaust/", - "chrome://memory-pressure-critical/", - "chrome://memory-pressure-moderate/", - "chrome://inducebrowsercrashforrealz/", - "chrome://inducebrowserdcheckforrealz/", - "chrome://inducebrowserheapcorruption/", - "chrome://heapcorruptioncrash/", - "chrome://badcastcrash/", - "chrome://ppapiflashcrash/", - "chrome://ppapiflashhang/", - "chrome://quit/", - "chrome://restart/", - # Firefox error pages - "about:neterror", - "about:certerror", - "about:blocked", - # Firefox crash and debugging pages - "about:crashcontent", - "about:crashparent", - "about:crashes", - "about:tabcrashed", - # Edge error pages (similar to Chrome) - "edge-error://", - "edge://crash/", - "edge://kill/", - "edge://hang/", - # Safari/WebKit error indicators (data URLs with error content) - "webkit-error://", - ] - - # Check if current URL matches any error pattern - if any(url.startswith(pattern) for pattern in error_url_patterns): - logger.debug(f"Detected browser error page from URL: {url}") - return True - - # Additional check for data URLs that might contain error pages - if url.startswith("data:") and any( - error_term in url.lower() - for error_term in ["error", "crash", "failed", "unavailable", "not found"] - ): - logger.debug(f"Detected error page from data URL: {url[:100]}...") - return True - - except Exception as e: - logger.debug(f"Error during session recorder injection: {e}") - return False + isolated_context_id = result["executionContextId"] + frame_to_isolated_context_id[key] = isolated_context_id + return isolated_context_id + else: + return frame_to_isolated_context_id[key] # browser_use.browser.session.CDPSession (browser-use >= 0.6.0) async def inject_session_recorder(cdp_session): cdp_client = cdp_session.cdp_client try: - # Check if this is an error page - if so, don't inject the recorder - try: - if await is_error_page(cdp_session): - logger.debug("Skipping session recorder injection on error page") - return - except Exception as e: - logger.debug(f"Failed to check if page is error page: {e}") - try: is_loaded = await is_recorder_present(cdp_session) except Exception as e: @@ -651,12 +594,16 @@ async def inject_session_recorder(cdp_session): if is_loaded: return + isolated_context_id = await get_isolated_context_id(cdp_session) + async def load_session_recorder(): try: + await asyncio.wait_for( cdp_client.send.Runtime.evaluate( { "expression": f"({RRWEB_CONTENT})()", + "contextId": isolated_context_id, }, session_id=cdp_session.session_id, ), @@ -680,6 +627,7 @@ async def load_session_recorder(): cdp_client.send.Runtime.evaluate( { "expression": f"({INJECT_PLACEHOLDER})({orjson.dumps(get_mask_input_setting()).decode('utf-8')})", + "contextId": isolated_context_id, }, session_id=cdp_session.session_id, ) @@ -804,12 +752,14 @@ def on_target_created(event, cdp_session_id: str | None = None): # browser_use.browser.session.CDPSession (browser-use >= 0.6.0) async def is_recorder_present(cdp_session) -> bool: cdp_client = cdp_session.cdp_client + isolated_context_id = await get_isolated_context_id(cdp_session) try: result = await asyncio.wait_for( cdp_client.send.Runtime.evaluate( { "expression": "typeof window.lmnrRrweb !== 'undefined'", + "contextId": isolated_context_id, }, session_id=cdp_session.session_id, ), @@ -826,6 +776,7 @@ async def is_recorder_present(cdp_session) -> bool: async def take_full_snapshot(cdp_session): cdp_client = cdp_session.cdp_client + isolated_context_id = await get_isolated_context_id(cdp_session) result = await cdp_client.send.Runtime.evaluate( { "expression": """(() => { @@ -840,6 +791,7 @@ async def take_full_snapshot(cdp_session): } return false; })()""", + "contextId": isolated_context_id, }, session_id=cdp_session.session_id, ) From db44831d9a57b8c39f1a5b948c81bd0f890cf657 Mon Sep 17 00:00:00 2001 From: Din Date: Fri, 5 Sep 2025 13:17:24 +0100 Subject: [PATCH 03/12] wip: isolated context continued --- src/lmnr/sdk/browser/browser_use_cdp_otel.py | 6 +- src/lmnr/sdk/browser/cdp_utils.py | 218 +++++++++++++++---- 2 files changed, 175 insertions(+), 49 deletions(-) diff --git a/src/lmnr/sdk/browser/browser_use_cdp_otel.py b/src/lmnr/sdk/browser/browser_use_cdp_otel.py index 4b4f5d04..dbb2b2fb 100644 --- a/src/lmnr/sdk/browser/browser_use_cdp_otel.py +++ b/src/lmnr/sdk/browser/browser_use_cdp_otel.py @@ -36,17 +36,16 @@ async def process_wrapped_result(result, instance, client, to_wrap): - if not instance._cached_browser_state_summary: - return - if to_wrap.get("action") == "inject_session_recorder": is_registered = await is_recorder_present(result) if not is_registered: await start_recording_events(result, str(uuid.uuid4()), client) if to_wrap.get("action") == "take_full_snapshot": + print("wrapped on switch tab event") target_id = result if target_id: + print(f"target id: {target_id}") cdp_session = await instance.get_or_create_cdp_session(target_id) await take_full_snapshot(cdp_session) @@ -63,6 +62,7 @@ async def _wrap( class BrowserUseInstrumentor(BaseInstrumentor): def __init__(self, async_client: AsyncLaminarClient): + print("BrowserUseInstrumentor with full snapshot logs") super().__init__() self.async_client = async_client diff --git a/src/lmnr/sdk/browser/cdp_utils.py b/src/lmnr/sdk/browser/cdp_utils.py index 1588cfe0..db637239 100644 --- a/src/lmnr/sdk/browser/cdp_utils.py +++ b/src/lmnr/sdk/browser/cdp_utils.py @@ -1,9 +1,9 @@ -import uuid -import orjson +import asyncio import logging +import orjson import os import time -import asyncio +import uuid from opentelemetry import trace @@ -17,8 +17,10 @@ logger = logging.getLogger(__name__) OLD_BUFFER_TIMEOUT = 60 +CDP_OPERATION_TIMEOUT_SECONDS = 10 -frame_to_isolated_context_id = {} +# CDP ContextId is int +frame_to_isolated_context_id: dict[str, int] = {} current_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(current_dir, "recorder", "record.umd.min.cjs"), "r") as f: @@ -451,8 +453,6 @@ } } - setInterval(sendBatchIfReady, BATCH_TIMEOUT); - async function bufferToBase64(buffer) { const base64url = await new Promise(r => { const reader = new FileReader() @@ -463,21 +463,8 @@ } if (!window.lmnrStartedRecordingEvents) { - const detectionResults = { - hasZoneJs: !!(window.Zone || Object.keys(window).some(k => k.startsWith('__zone_symbol__'))), - mutationObserverState: { - current: window.MutationObserver, - isNative: window.MutationObserver.toString().includes('[native code]'), - hasZoneSymbols: false, - originalFound: null, - restorationMethod: null - } - }; + setInterval(sendBatchIfReady, BATCH_TIMEOUT); - if (detectionResults.hasZoneJs) { - console.warn('Zone.js detected, skipping session recorder'); - return; - } window.lmnrRrweb.record({ async emit(event) { try { @@ -528,6 +515,90 @@ """ +async def is_error_page(cdp_session): + cdp_client = cdp_session.cdp_client + + try: + # Get the current page URL + result = await asyncio.wait_for( + cdp_client.send.Runtime.evaluate( + { + "expression": "window.location.href", + "returnByValue": True, + }, + session_id=cdp_session.session_id, + ), + timeout=CDP_OPERATION_TIMEOUT_SECONDS, + ) + + url = result.get("result", {}).get("value", "") + + # Comprehensive list of browser error URLs + error_url_patterns = [ + # Chrome error pages + "chrome-error://", + "chrome://network-error/", + "chrome://network-errors/", + # Chrome crash and debugging pages + "chrome://crash/", + "chrome://crashdump/", + "chrome://kill/", + "chrome://hang/", + "chrome://shorthang/", + "chrome://gpuclean/", + "chrome://gpucrash/", + "chrome://gpuhang/", + "chrome://memory-exhaust/", + "chrome://memory-pressure-critical/", + "chrome://memory-pressure-moderate/", + "chrome://inducebrowsercrashforrealz/", + "chrome://inducebrowserdcheckforrealz/", + "chrome://inducebrowserheapcorruption/", + "chrome://heapcorruptioncrash/", + "chrome://badcastcrash/", + "chrome://ppapiflashcrash/", + "chrome://ppapiflashhang/", + "chrome://quit/", + "chrome://restart/", + # Firefox error pages + "about:neterror", + "about:certerror", + "about:blocked", + # Firefox crash and debugging pages + "about:crashcontent", + "about:crashparent", + "about:crashes", + "about:tabcrashed", + # Edge error pages (similar to Chrome) + "edge-error://", + "edge://crash/", + "edge://kill/", + "edge://hang/", + # Safari/WebKit error indicators (data URLs with error content) + "webkit-error://", + ] + + # Check if current URL matches any error pattern + if any(url.startswith(pattern) for pattern in error_url_patterns): + logger.debug(f"Detected browser error page from URL: {url}") + return True + + # Additional check for data URLs that might contain error pages + if url.startswith("data:") and any( + error_term in url.lower() + for error_term in ["error", "crash", "failed", "unavailable", "not found"] + ): + logger.debug(f"Detected error page from data URL: {url[:100]}...") + return True + + except asyncio.TimeoutError: + logger.debug("Timeout error when checking if error page") + return True + except Exception as e: + logger.debug(f"Error during checking if error page: {e}") + return False + + def get_mask_input_setting() -> MaskInputOptions: """Get the mask_input setting from session recording configuration.""" try: @@ -555,36 +626,54 @@ def get_mask_input_setting() -> MaskInputOptions: ) -async def get_isolated_context_id(cdp_session): +async def get_isolated_context_id(cdp_session) -> int | None: tree = await cdp_session.cdp_client.send.Page.getFrameTree( session_id=cdp_session.session_id ) frame = tree.get("frameTree", {}).get("frame", {}) frame_id = frame.get("id", str(uuid.uuid4())) - loader_id = frame.get("loaderI", str(uuid.uuid4())) + loader_id = frame.get("loaderId", str(uuid.uuid4())) url = frame.get("url", "about:blank") key = f"{frame_id}_{loader_id}_{url}" - if key not in frame_to_isolated_context_id: - result = await cdp_session.cdp_client.send.Page.createIsolatedWorld( - { - "frameId": frame_id, - "worldName": "laminar-isolated-context", - }, - session_id=cdp_session.session_id, - ) - isolated_context_id = result["executionContextId"] - frame_to_isolated_context_id[key] = isolated_context_id - return isolated_context_id - else: + if key in frame_to_isolated_context_id: + print(f"Key: {key} existing context id: {frame_to_isolated_context_id[key]}") return frame_to_isolated_context_id[key] + try: + result = await asyncio.wait_for( + cdp_session.cdp_client.send.Page.createIsolatedWorld( + { + "frameId": frame_id, + "worldName": "laminar-isolated-context", + }, + session_id=cdp_session.session_id, + ), + timeout=CDP_OPERATION_TIMEOUT_SECONDS, + ) + except Exception as e: + logger.debug(f"Failed to get isolated context id: {e}") + return None + isolated_context_id = result["executionContextId"] + frame_to_isolated_context_id[key] = isolated_context_id + print(f"Key: {key} new context id: {isolated_context_id}") + return isolated_context_id + # browser_use.browser.session.CDPSession (browser-use >= 0.6.0) async def inject_session_recorder(cdp_session): cdp_client = cdp_session.cdp_client try: + try: + is_error = await is_error_page(cdp_session) + except Exception as e: + logger.debug(f"Failed to check if error page: {e}") + is_error = False + + if is_error: + logger.debug("Error page detected, skipping session recorder injection") + return try: is_loaded = await is_recorder_present(cdp_session) except Exception as e: @@ -595,6 +684,9 @@ async def inject_session_recorder(cdp_session): return isolated_context_id = await get_isolated_context_id(cdp_session) + if isolated_context_id is None: + logger.debug("Failed to get isolated context id") + return async def load_session_recorder(): try: @@ -607,7 +699,7 @@ async def load_session_recorder(): }, session_id=cdp_session.session_id, ), - timeout=2.5, + timeout=CDP_OPERATION_TIMEOUT_SECONDS, ) return True except Exception as e: @@ -623,15 +715,18 @@ async def load_session_recorder(): return try: - await asyncio.create_task( + await asyncio.wait_for( cdp_client.send.Runtime.evaluate( { "expression": f"({INJECT_PLACEHOLDER})({orjson.dumps(get_mask_input_setting()).decode('utf-8')})", "contextId": isolated_context_id, }, session_id=cdp_session.session_id, - ) + ), + timeout=CDP_OPERATION_TIMEOUT_SECONDS, ) + except asyncio.TimeoutError: + logger.debug("Timeout error when injecting session recorder") except Exception as e: logger.debug(f"Failed to inject recorder processor: {e}") @@ -746,13 +841,21 @@ def on_target_created(event, cdp_session_id: str | None = None): if target_info["type"] == "page": asyncio.create_task(inject_session_recorder(cdp_session=cdp_session)) - cdp_session.cdp_client.register.Target.targetCreated(on_target_created) + try: + cdp_session.cdp_client.register.Target.targetCreated(on_target_created) + except Exception as e: + logger.debug(f"Failed to register on target created: {e}") # browser_use.browser.session.CDPSession (browser-use >= 0.6.0) async def is_recorder_present(cdp_session) -> bool: + # This function returns True on any error, because it is safer to not record + # events than to try to inject the recorder into a broken context. cdp_client = cdp_session.cdp_client isolated_context_id = await get_isolated_context_id(cdp_session) + if isolated_context_id is None: + logger.debug("Failed to get isolated context id") + return True try: result = await asyncio.wait_for( @@ -763,26 +866,38 @@ async def is_recorder_present(cdp_session) -> bool: }, session_id=cdp_session.session_id, ), - timeout=1, + timeout=CDP_OPERATION_TIMEOUT_SECONDS, ) if result and "result" in result and "value" in result["result"]: return result["result"]["value"] return False except asyncio.TimeoutError: + logger.debug("Timeout error when checking if session recorder is present") return True except Exception: + logger.debug("Exception when checking if session recorder is present") return True async def take_full_snapshot(cdp_session): cdp_client = cdp_session.cdp_client + print("Taking full snapshot") isolated_context_id = await get_isolated_context_id(cdp_session) - result = await cdp_client.send.Runtime.evaluate( - { - "expression": """(() => { + print(f"Isolated context id: {isolated_context_id}") + if isolated_context_id is None: + logger.debug("Failed to get isolated context id") + return False + + try: + result = await asyncio.wait_for( + cdp_client.send.Runtime.evaluate( + { + "expression": """(() => { if (window.lmnrRrweb) { + console.log("Taking full snapshot, lmnrRrweb is present") try { window.lmnrRrweb.record.takeFullSnapshot(); + console.log("Full snapshot taken successfully") return true; } catch (e) { console.error("Error taking full snapshot:", e); @@ -791,10 +906,21 @@ async def take_full_snapshot(cdp_session): } return false; })()""", - "contextId": isolated_context_id, - }, - session_id=cdp_session.session_id, - ) + "contextId": isolated_context_id, + }, + session_id=cdp_session.session_id, + ), + timeout=CDP_OPERATION_TIMEOUT_SECONDS, + ) + except asyncio.TimeoutError: + print("Timeout error when taking full snapshot") + logger.debug("Timeout error when taking full snapshot") + return False + except Exception as e: + print(f"Error when taking full snapshot: {e}") + logger.debug(f"Error when taking full snapshot: {e}") + return False + print(f"Result: {result}") if result and "result" in result and "value" in result["result"]: return result["result"]["value"] return False From 2cd0a30b810be2c5b240d78d042ab7131eaa59eb Mon Sep 17 00:00:00 2001 From: Din Date: Fri, 5 Sep 2025 13:22:05 +0100 Subject: [PATCH 04/12] fix full snapshots, by not storing URL in key --- src/lmnr/sdk/browser/browser_use_cdp_otel.py | 3 --- src/lmnr/sdk/browser/cdp_utils.py | 10 +--------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/lmnr/sdk/browser/browser_use_cdp_otel.py b/src/lmnr/sdk/browser/browser_use_cdp_otel.py index dbb2b2fb..ced3d5cd 100644 --- a/src/lmnr/sdk/browser/browser_use_cdp_otel.py +++ b/src/lmnr/sdk/browser/browser_use_cdp_otel.py @@ -42,10 +42,8 @@ async def process_wrapped_result(result, instance, client, to_wrap): await start_recording_events(result, str(uuid.uuid4()), client) if to_wrap.get("action") == "take_full_snapshot": - print("wrapped on switch tab event") target_id = result if target_id: - print(f"target id: {target_id}") cdp_session = await instance.get_or_create_cdp_session(target_id) await take_full_snapshot(cdp_session) @@ -62,7 +60,6 @@ async def _wrap( class BrowserUseInstrumentor(BaseInstrumentor): def __init__(self, async_client: AsyncLaminarClient): - print("BrowserUseInstrumentor with full snapshot logs") super().__init__() self.async_client = async_client diff --git a/src/lmnr/sdk/browser/cdp_utils.py b/src/lmnr/sdk/browser/cdp_utils.py index db637239..beac0354 100644 --- a/src/lmnr/sdk/browser/cdp_utils.py +++ b/src/lmnr/sdk/browser/cdp_utils.py @@ -633,12 +633,10 @@ async def get_isolated_context_id(cdp_session) -> int | None: frame = tree.get("frameTree", {}).get("frame", {}) frame_id = frame.get("id", str(uuid.uuid4())) loader_id = frame.get("loaderId", str(uuid.uuid4())) - url = frame.get("url", "about:blank") - key = f"{frame_id}_{loader_id}_{url}" + key = f"{frame_id}_{loader_id}" if key in frame_to_isolated_context_id: - print(f"Key: {key} existing context id: {frame_to_isolated_context_id[key]}") return frame_to_isolated_context_id[key] try: @@ -657,7 +655,6 @@ async def get_isolated_context_id(cdp_session) -> int | None: return None isolated_context_id = result["executionContextId"] frame_to_isolated_context_id[key] = isolated_context_id - print(f"Key: {key} new context id: {isolated_context_id}") return isolated_context_id @@ -881,9 +878,7 @@ async def is_recorder_present(cdp_session) -> bool: async def take_full_snapshot(cdp_session): cdp_client = cdp_session.cdp_client - print("Taking full snapshot") isolated_context_id = await get_isolated_context_id(cdp_session) - print(f"Isolated context id: {isolated_context_id}") if isolated_context_id is None: logger.debug("Failed to get isolated context id") return False @@ -913,14 +908,11 @@ async def take_full_snapshot(cdp_session): timeout=CDP_OPERATION_TIMEOUT_SECONDS, ) except asyncio.TimeoutError: - print("Timeout error when taking full snapshot") logger.debug("Timeout error when taking full snapshot") return False except Exception as e: - print(f"Error when taking full snapshot: {e}") logger.debug(f"Error when taking full snapshot: {e}") return False - print(f"Result: {result}") if result and "result" in result and "value" in result["result"]: return result["result"]["value"] return False From 08eb9bfcb315c94c0c10c617c7a3f1dcf1ae88e0 Mon Sep 17 00:00:00 2001 From: Din Date: Fri, 5 Sep 2025 15:01:24 +0100 Subject: [PATCH 05/12] bubus instrumentation to keep spans hierarchy correct --- .../opentelemetry_lib/tracing/__init__.py | 14 +++ .../tracing/_instrument_initializers.py | 39 +++++++- .../opentelemetry_lib/tracing/instruments.py | 4 + src/lmnr/sdk/browser/bubus_otel.py | 90 +++++++++++++++++++ 4 files changed, 144 insertions(+), 3 deletions(-) create mode 100644 src/lmnr/sdk/browser/bubus_otel.py diff --git a/src/lmnr/opentelemetry_lib/tracing/__init__.py b/src/lmnr/opentelemetry_lib/tracing/__init__.py index 4f963b98..7e6a58e2 100644 --- a/src/lmnr/opentelemetry_lib/tracing/__init__.py +++ b/src/lmnr/opentelemetry_lib/tracing/__init__.py @@ -201,6 +201,20 @@ def push_span_context(self, span: trace.Span) -> Context: return new_context + def push_raw_span_context(self, span_context: trace.SpanContext) -> Context: + current_ctx = get_current_context() + new_context = trace.set_span_in_context( + trace.NonRecordingSpan(span_context), current_ctx + ) + token = attach_context(new_context) + + # Store the token for later detachment - tokens are much lighter than contexts + current_stack = get_token_stack().copy() + current_stack.append(token) + set_token_stack(current_stack) + + return new_context + def pop_span_context(self) -> None: """Pop the current span context from the stack.""" current_stack = get_token_stack().copy() diff --git a/src/lmnr/opentelemetry_lib/tracing/_instrument_initializers.py b/src/lmnr/opentelemetry_lib/tracing/_instrument_initializers.py index 201a9eb8..bed77114 100644 --- a/src/lmnr/opentelemetry_lib/tracing/_instrument_initializers.py +++ b/src/lmnr/opentelemetry_lib/tracing/_instrument_initializers.py @@ -51,9 +51,19 @@ def init_instrumentor(self, *args, **kwargs) -> BaseInstrumentor | None: class BrowserUseInstrumentorInitializer(InstrumentorInitializer): - def init_instrumentor( - self, client, async_client, *args, **kwargs - ) -> BaseInstrumentor | None: + """Instruments for different versions of browser-use: + + - browser-use < 0.5: BrowserUseLegacyInstrumentor to track agent_step and + other structure spans. Session instrumentation is controlled by + Instruments.PLAYWRIGHT (or Instruments.PATCHRIGHT for several versions + in 0.4.* that used patchright) + - browser-use ~= 0.5: Structure spans live in browser_use package itself. + Session instrumentation is controlled by Instruments.PLAYWRIGHT + - browser-use >= 0.6.0rc1: BubusInstrumentor to keep spans structure. + Session instrumentation is controlled by Instruments.BROWSER_USE_SESSION + """ + + def init_instrumentor(self, *args, **kwargs) -> BaseInstrumentor | None: if not is_package_installed("browser-use"): return None @@ -65,6 +75,19 @@ def init_instrumentor( return BrowserUseLegacyInstrumentor() + return None + + +class BrowserUseSessionInstrumentorInitializer(InstrumentorInitializer): + def init_instrumentor( + self, client, async_client, *args, **kwargs + ) -> BaseInstrumentor | None: + if not is_package_installed("browser-use"): + return None + + version = get_package_version("browser-use") + from packaging.version import parse + if version and parse(version) >= parse("0.6.0rc1"): from lmnr.sdk.browser.browser_use_cdp_otel import BrowserUseInstrumentor @@ -73,6 +96,16 @@ def init_instrumentor( return None +class BubusInstrumentorInitializer(InstrumentorInitializer): + def init_instrumentor(self, *args, **kwargs) -> BaseInstrumentor | None: + if not is_package_installed("bubus"): + return None + + from lmnr.sdk.browser.bubus_otel import BubusInstrumentor + + return BubusInstrumentor() + + class ChromaInstrumentorInitializer(InstrumentorInitializer): def init_instrumentor(self, *args, **kwargs) -> BaseInstrumentor | None: if not is_package_installed("chromadb"): diff --git a/src/lmnr/opentelemetry_lib/tracing/instruments.py b/src/lmnr/opentelemetry_lib/tracing/instruments.py index 909a132f..416057d3 100644 --- a/src/lmnr/opentelemetry_lib/tracing/instruments.py +++ b/src/lmnr/opentelemetry_lib/tracing/instruments.py @@ -17,6 +17,8 @@ class Instruments(Enum): ANTHROPIC = "anthropic" BEDROCK = "bedrock" BROWSER_USE = "browser_use" + BROWSER_USE_SESSION = "browser_use_session" + BUBUS = "bubus" CHROMA = "chroma" COHERE = "cohere" CREWAI = "crewai" @@ -60,6 +62,8 @@ class Instruments(Enum): Instruments.ANTHROPIC: initializers.AnthropicInstrumentorInitializer(), Instruments.BEDROCK: initializers.BedrockInstrumentorInitializer(), Instruments.BROWSER_USE: initializers.BrowserUseInstrumentorInitializer(), + Instruments.BROWSER_USE_SESSION: initializers.BrowserUseSessionInstrumentorInitializer(), + Instruments.BUBUS: initializers.BubusInstrumentorInitializer(), Instruments.CHROMA: initializers.ChromaInstrumentorInitializer(), Instruments.COHERE: initializers.CohereInstrumentorInitializer(), Instruments.CREWAI: initializers.CrewAIInstrumentorInitializer(), diff --git a/src/lmnr/sdk/browser/bubus_otel.py b/src/lmnr/sdk/browser/bubus_otel.py new file mode 100644 index 00000000..5de59588 --- /dev/null +++ b/src/lmnr/sdk/browser/bubus_otel.py @@ -0,0 +1,90 @@ +from typing import Collection + +from lmnr.opentelemetry_lib.tracing import TracerWrapper +from lmnr.opentelemetry_lib.tracing.context import get_current_context + +from opentelemetry import context as context_api +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor +from opentelemetry.instrumentation.utils import unwrap +from opentelemetry.trace import get_current_span +from wrapt import wrap_function_wrapper + +from lmnr.sdk.log import get_default_logger + +_instruments = ("bubus >= 1.3.0",) +event_id_to_span_context = {} +logger = get_default_logger(__name__) + + +def wrap_dispatch(wrapped, instance, args, kwargs): + event = args[0] if args and len(args) > 0 else kwargs.get("event", None) + if event and hasattr(event, "event_id"): + event_id = event.event_id + if event_id: + span = get_current_span(get_current_context()) + event_id_to_span_context[event_id] = span.get_span_context() + return wrapped(*args, **kwargs) + + +async def wrap_process_event(wrapped, instance, args, kwargs): + event = args[0] if args and len(args) > 0 else kwargs.get("event", None) + span_context = None + if event and hasattr(event, "event_id"): + event_id = event.event_id + if event_id: + span_context = event_id_to_span_context.get(event_id) + if not span_context: + return await wrapped(*args, **kwargs) + if not TracerWrapper.verify_initialized(): + return await wrapped(*args, **kwargs) + wrapper = None + context = None + context_token = None + try: + wrapper = TracerWrapper() + context = wrapper.push_raw_span_context(span_context) + # Some auto-instrumentations are not under our control, so they + # don't have access to our isolated context. We attach the context + # to the OTEL global context, so that spans know their parent + # span and trace_id. + context_token = context_api.attach(context) + except Exception as e: + logger.debug("Error pushing span context: %s", e) + try: + return await wrapped(*args, **kwargs) + finally: + if context_token: + context_api.detach(context_token) + if wrapper and context: + wrapper.pop_span_context() + + +class BubusInstrumentor(BaseInstrumentor): + def __init__(self): + super().__init__() + + def instrumentation_dependencies(self) -> Collection[str]: + return _instruments + + def _instrument(self, **kwargs): + try: + wrap_function_wrapper("bubus.service", "EventBus.dispatch", wrap_dispatch) + except (ModuleNotFoundError, ImportError): + pass + try: + wrap_function_wrapper( + "bubus.service", "EventBus.process_event", wrap_process_event + ) + except (ModuleNotFoundError, ImportError): + pass + + def _uninstrument(self, **kwargs): + try: + unwrap("bubus.service", "EventBus.dispatch") + except (ModuleNotFoundError, ImportError): + pass + try: + unwrap("bubus.service", "EventBus.process_event") + except (ModuleNotFoundError, ImportError): + pass + event_id_to_span_context.clear() From f6365278fc8d3319ea57bab09ac49d502c65e2c6 Mon Sep 17 00:00:00 2001 From: Din Date: Fri, 5 Sep 2025 16:29:53 +0100 Subject: [PATCH 06/12] add about:blank, expose disabled_instruments in evaluate --- src/lmnr/sdk/browser/cdp_utils.py | 11 ++++++----- src/lmnr/sdk/evaluations.py | 24 ++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/lmnr/sdk/browser/cdp_utils.py b/src/lmnr/sdk/browser/cdp_utils.py index beac0354..525cd7e4 100644 --- a/src/lmnr/sdk/browser/cdp_utils.py +++ b/src/lmnr/sdk/browser/cdp_utils.py @@ -515,7 +515,8 @@ """ -async def is_error_page(cdp_session): +async def should_instrument_page(cdp_session): + """Checks if the page url is an error page or an empty page.""" cdp_client = cdp_session.cdp_client try: @@ -535,6 +536,7 @@ async def is_error_page(cdp_session): # Comprehensive list of browser error URLs error_url_patterns = [ + "about:blank", # Chrome error pages "chrome-error://", "chrome://network-error/", @@ -662,13 +664,13 @@ async def get_isolated_context_id(cdp_session) -> int | None: async def inject_session_recorder(cdp_session): cdp_client = cdp_session.cdp_client try: + should_instrument = False try: - is_error = await is_error_page(cdp_session) + should_instrument = await should_instrument_page(cdp_session) except Exception as e: logger.debug(f"Failed to check if error page: {e}") - is_error = False - if is_error: + if should_instrument: logger.debug("Error page detected, skipping session recorder injection") return try: @@ -687,7 +689,6 @@ async def inject_session_recorder(cdp_session): async def load_session_recorder(): try: - await asyncio.wait_for( cdp_client.send.Runtime.evaluate( { diff --git a/src/lmnr/sdk/evaluations.py b/src/lmnr/sdk/evaluations.py index 142880a1..8235b17f 100644 --- a/src/lmnr/sdk/evaluations.py +++ b/src/lmnr/sdk/evaluations.py @@ -112,7 +112,12 @@ def __init__( base_http_url: str | None = None, http_port: int | None = None, grpc_port: int | None = None, - instruments: set[Instruments] | None = None, + instruments: ( + set[Instruments] | list[Instruments] | tuple[Instruments] | None + ) = None, + disabled_instruments: ( + set[Instruments] | list[Instruments] | tuple[Instruments] | None + ) = None, max_export_batch_size: int | None = MAX_EXPORT_BATCH_SIZE, trace_export_timeout_seconds: int | None = None, ): @@ -172,6 +177,10 @@ def __init__( used. See https://docs.lmnr.ai/tracing/automatic-instrumentation Defaults to None. + disabled_instruments (set[Instruments] | None, optional): Set of modules\ + to disable auto-instrumentations. If None, only modules passed\ + as `instruments` will be disabled. + Defaults to None. """ if not evaluators: @@ -234,6 +243,7 @@ def __init__( http_port=http_port, grpc_port=grpc_port, instruments=instruments, + disabled_instruments=disabled_instruments, max_export_batch_size=max_export_batch_size, export_timeout_seconds=trace_export_timeout_seconds, ) @@ -432,7 +442,12 @@ def evaluate( base_http_url: str | None = None, http_port: int | None = None, grpc_port: int | None = None, - instruments: set[Instruments] | None = None, + instruments: ( + set[Instruments] | list[Instruments] | tuple[Instruments] | None + ) = None, + disabled_instruments: ( + set[Instruments] | list[Instruments] | tuple[Instruments] | None + ) = None, max_export_batch_size: int | None = MAX_EXPORT_BATCH_SIZE, trace_export_timeout_seconds: int | None = None, ) -> Awaitable[None] | None: @@ -493,6 +508,10 @@ def evaluate( auto-instrument. If None, all available instruments\ will be used. Defaults to None. + disabled_instruments (set[Instruments] | None, optional): Set of modules\ + to disable auto-instrumentations. If None, no\ + only modules passed as `instruments` will be disabled. + Defaults to None. trace_export_timeout_seconds (int | None, optional): The timeout for\ trace export on OpenTelemetry exporter. Defaults to None. """ @@ -510,6 +529,7 @@ def evaluate( http_port=http_port, grpc_port=grpc_port, instruments=instruments, + disabled_instruments=disabled_instruments, max_export_batch_size=max_export_batch_size, trace_export_timeout_seconds=trace_export_timeout_seconds, ) From 304506a1a5f914a6581eb8ddced45229da4e4b14 Mon Sep 17 00:00:00 2001 From: Din Date: Fri, 5 Sep 2025 16:50:58 +0100 Subject: [PATCH 07/12] further safety and bump version --- src/lmnr/sdk/browser/cdp_utils.py | 54 ++++++++++++++++++++++--------- src/lmnr/version.py | 2 +- 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/lmnr/sdk/browser/cdp_utils.py b/src/lmnr/sdk/browser/cdp_utils.py index 525cd7e4..185c78ef 100644 --- a/src/lmnr/sdk/browser/cdp_utils.py +++ b/src/lmnr/sdk/browser/cdp_utils.py @@ -3,7 +3,6 @@ import orjson import os import time -import uuid from opentelemetry import trace @@ -515,7 +514,7 @@ """ -async def should_instrument_page(cdp_session): +async def should_skip_page(cdp_session): """Checks if the page url is an error page or an empty page.""" cdp_client = cdp_session.cdp_client @@ -593,9 +592,11 @@ async def should_instrument_page(cdp_session): logger.debug(f"Detected error page from data URL: {url[:100]}...") return True + return False + except asyncio.TimeoutError: logger.debug("Timeout error when checking if error page") - return True + return False except Exception as e: logger.debug(f"Error during checking if error page: {e}") return False @@ -628,13 +629,30 @@ def get_mask_input_setting() -> MaskInputOptions: ) +# browser_use.browser.session.CDPSession (browser-use >= 0.6.0) async def get_isolated_context_id(cdp_session) -> int | None: - tree = await cdp_session.cdp_client.send.Page.getFrameTree( - session_id=cdp_session.session_id - ) + tree = {} + try: + tree = await asyncio.wait_for( + cdp_session.cdp_client.send.Page.getFrameTree( + session_id=cdp_session.session_id + ), + timeout=CDP_OPERATION_TIMEOUT_SECONDS, + ) + except asyncio.TimeoutError: + logger.debug("Timeout error when getting frame tree") + return None + except Exception as e: + logger.debug(f"Failed to get frame tree: {e}") + return None + frame = tree.get("frameTree", {}).get("frame", {}) - frame_id = frame.get("id", str(uuid.uuid4())) - loader_id = frame.get("loaderId", str(uuid.uuid4())) + frame_id = frame.get("id") + loader_id = frame.get("loaderId") + + if frame_id is None or loader_id is None: + logger.debug("Failed to get frame id or loader id") + return None key = f"{frame_id}_{loader_id}" @@ -652,6 +670,9 @@ async def get_isolated_context_id(cdp_session) -> int | None: ), timeout=CDP_OPERATION_TIMEOUT_SECONDS, ) + except asyncio.TimeoutError: + logger.debug("Timeout error when getting isolated context id") + return None except Exception as e: logger.debug(f"Failed to get isolated context id: {e}") return None @@ -664,13 +685,13 @@ async def get_isolated_context_id(cdp_session) -> int | None: async def inject_session_recorder(cdp_session): cdp_client = cdp_session.cdp_client try: - should_instrument = False + should_skip = True try: - should_instrument = await should_instrument_page(cdp_session) + should_skip = await should_skip_page(cdp_session) except Exception as e: logger.debug(f"Failed to check if error page: {e}") - if should_instrument: + if should_skip: logger.debug("Error page detected, skipping session recorder injection") return try: @@ -700,15 +721,18 @@ async def load_session_recorder(): timeout=CDP_OPERATION_TIMEOUT_SECONDS, ) return True + except asyncio.TimeoutError: + logger.debug("Timeout error when loading session recorder base") + return False except Exception as e: - logger.error(f"Failed to load session recorder: {e}") + logger.debug(f"Failed to load session recorder base: {e}") return False if not await retry_async( load_session_recorder, retries=3, delay=1, - error_message="Failed to load session recorder processor", + error_message="Failed to load session recorder", ): return @@ -726,7 +750,7 @@ async def load_session_recorder(): except asyncio.TimeoutError: logger.debug("Timeout error when injecting session recorder") except Exception as e: - logger.debug(f"Failed to inject recorder processor: {e}") + logger.debug(f"Failed to inject recorder: {e}") except Exception as e: logger.debug(f"Error during session recorder injection: {e}") @@ -890,10 +914,8 @@ async def take_full_snapshot(cdp_session): { "expression": """(() => { if (window.lmnrRrweb) { - console.log("Taking full snapshot, lmnrRrweb is present") try { window.lmnrRrweb.record.takeFullSnapshot(); - console.log("Full snapshot taken successfully") return true; } catch (e) { console.error("Error taking full snapshot:", e); diff --git a/src/lmnr/version.py b/src/lmnr/version.py index 98bdc785..88145e64 100644 --- a/src/lmnr/version.py +++ b/src/lmnr/version.py @@ -3,7 +3,7 @@ from packaging import version -__version__ = "0.7.10" +__version__ = "0.7.11" PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}" From 9d95cefadc966362346d6df3a060409e536723ef Mon Sep 17 00:00:00 2001 From: Din Date: Fri, 5 Sep 2025 16:52:56 +0100 Subject: [PATCH 08/12] also commit bumped pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4f1f218d..aec8e50a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ [project] name = "lmnr" -version = "0.7.10" +version = "0.7.11" description = "Python SDK for Laminar" authors = [ { name = "lmnr.ai", email = "founders@lmnr.ai" } From ec9e39015bdbfec779b01917c271d6d6cf167172 Mon Sep 17 00:00:00 2001 From: Din Date: Fri, 5 Sep 2025 23:22:25 +0100 Subject: [PATCH 09/12] further context id checks, Laminar.use_span --- .../opentelemetry_lib/tracing/__init__.py | 14 -- src/lmnr/sdk/browser/bubus_otel.py | 29 +---- src/lmnr/sdk/browser/cdp_utils.py | 123 ++++++++++-------- src/lmnr/sdk/laminar.py | 6 +- 4 files changed, 79 insertions(+), 93 deletions(-) diff --git a/src/lmnr/opentelemetry_lib/tracing/__init__.py b/src/lmnr/opentelemetry_lib/tracing/__init__.py index 7e6a58e2..4f963b98 100644 --- a/src/lmnr/opentelemetry_lib/tracing/__init__.py +++ b/src/lmnr/opentelemetry_lib/tracing/__init__.py @@ -201,20 +201,6 @@ def push_span_context(self, span: trace.Span) -> Context: return new_context - def push_raw_span_context(self, span_context: trace.SpanContext) -> Context: - current_ctx = get_current_context() - new_context = trace.set_span_in_context( - trace.NonRecordingSpan(span_context), current_ctx - ) - token = attach_context(new_context) - - # Store the token for later detachment - tokens are much lighter than contexts - current_stack = get_token_stack().copy() - current_stack.append(token) - set_token_stack(current_stack) - - return new_context - def pop_span_context(self) -> None: """Pop the current span context from the stack.""" current_stack = get_token_stack().copy() diff --git a/src/lmnr/sdk/browser/bubus_otel.py b/src/lmnr/sdk/browser/bubus_otel.py index 5de59588..55155c18 100644 --- a/src/lmnr/sdk/browser/bubus_otel.py +++ b/src/lmnr/sdk/browser/bubus_otel.py @@ -1,15 +1,14 @@ from typing import Collection -from lmnr.opentelemetry_lib.tracing import TracerWrapper +from lmnr import Laminar from lmnr.opentelemetry_lib.tracing.context import get_current_context +from lmnr.sdk.log import get_default_logger -from opentelemetry import context as context_api from opentelemetry.instrumentation.instrumentor import BaseInstrumentor from opentelemetry.instrumentation.utils import unwrap -from opentelemetry.trace import get_current_span +from opentelemetry.trace import NonRecordingSpan, get_current_span from wrapt import wrap_function_wrapper -from lmnr.sdk.log import get_default_logger _instruments = ("bubus >= 1.3.0",) event_id_to_span_context = {} @@ -35,28 +34,10 @@ async def wrap_process_event(wrapped, instance, args, kwargs): span_context = event_id_to_span_context.get(event_id) if not span_context: return await wrapped(*args, **kwargs) - if not TracerWrapper.verify_initialized(): + if not Laminar.is_initialized(): return await wrapped(*args, **kwargs) - wrapper = None - context = None - context_token = None - try: - wrapper = TracerWrapper() - context = wrapper.push_raw_span_context(span_context) - # Some auto-instrumentations are not under our control, so they - # don't have access to our isolated context. We attach the context - # to the OTEL global context, so that spans know their parent - # span and trace_id. - context_token = context_api.attach(context) - except Exception as e: - logger.debug("Error pushing span context: %s", e) - try: + with Laminar.use_span(NonRecordingSpan(span_context)): return await wrapped(*args, **kwargs) - finally: - if context_token: - context_api.detach(context_token) - if wrapper and context: - wrapper.pop_span_context() class BubusInstrumentor(BaseInstrumentor): diff --git a/src/lmnr/sdk/browser/cdp_utils.py b/src/lmnr/sdk/browser/cdp_utils.py index 185c78ef..9b673f70 100644 --- a/src/lmnr/sdk/browser/cdp_utils.py +++ b/src/lmnr/sdk/browser/cdp_utils.py @@ -20,6 +20,7 @@ # CDP ContextId is int frame_to_isolated_context_id: dict[str, int] = {} +lock = asyncio.Lock() current_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(current_dir, "recorder", "record.umd.min.cjs"), "r") as f: @@ -631,58 +632,61 @@ def get_mask_input_setting() -> MaskInputOptions: # browser_use.browser.session.CDPSession (browser-use >= 0.6.0) async def get_isolated_context_id(cdp_session) -> int | None: - tree = {} - try: - tree = await asyncio.wait_for( - cdp_session.cdp_client.send.Page.getFrameTree( - session_id=cdp_session.session_id - ), - timeout=CDP_OPERATION_TIMEOUT_SECONDS, - ) - except asyncio.TimeoutError: - logger.debug("Timeout error when getting frame tree") - return None - except Exception as e: - logger.debug(f"Failed to get frame tree: {e}") - return None - - frame = tree.get("frameTree", {}).get("frame", {}) - frame_id = frame.get("id") - loader_id = frame.get("loaderId") - - if frame_id is None or loader_id is None: - logger.debug("Failed to get frame id or loader id") - return None + async with lock: + tree = {} + try: + tree = await asyncio.wait_for( + cdp_session.cdp_client.send.Page.getFrameTree( + session_id=cdp_session.session_id + ), + timeout=CDP_OPERATION_TIMEOUT_SECONDS, + ) + except asyncio.TimeoutError: + logger.debug("Timeout error when getting frame tree") + return None + except Exception as e: + logger.debug(f"Failed to get frame tree: {e}") + return None + frame = tree.get("frameTree", {}).get("frame", {}) + frame_id = frame.get("id") + loader_id = frame.get("loaderId") - key = f"{frame_id}_{loader_id}" + if frame_id is None or loader_id is None: + logger.debug("Failed to get frame id or loader id") + return None + key = f"{frame_id}_{loader_id}" - if key in frame_to_isolated_context_id: - return frame_to_isolated_context_id[key] + if key in frame_to_isolated_context_id: + return frame_to_isolated_context_id[key] - try: - result = await asyncio.wait_for( - cdp_session.cdp_client.send.Page.createIsolatedWorld( - { - "frameId": frame_id, - "worldName": "laminar-isolated-context", - }, - session_id=cdp_session.session_id, - ), - timeout=CDP_OPERATION_TIMEOUT_SECONDS, - ) - except asyncio.TimeoutError: - logger.debug("Timeout error when getting isolated context id") - return None - except Exception as e: - logger.debug(f"Failed to get isolated context id: {e}") - return None - isolated_context_id = result["executionContextId"] - frame_to_isolated_context_id[key] = isolated_context_id - return isolated_context_id + try: + result = await asyncio.wait_for( + cdp_session.cdp_client.send.Page.createIsolatedWorld( + { + "frameId": frame_id, + "worldName": "laminar-isolated-context", + }, + session_id=cdp_session.session_id, + ), + timeout=CDP_OPERATION_TIMEOUT_SECONDS, + ) + except asyncio.TimeoutError: + logger.debug("Timeout error when getting isolated context id") + return None + except Exception as e: + logger.debug(f"Failed to get isolated context id: {e}") + return None + isolated_context_id = result["executionContextId"] + frame_to_isolated_context_id[key] = isolated_context_id + return isolated_context_id # browser_use.browser.session.CDPSession (browser-use >= 0.6.0) -async def inject_session_recorder(cdp_session): +async def inject_session_recorder(cdp_session) -> int | None: + """Injects the session recorder base as well as the recorder itself. + Returns the isolated context id if successful. + """ + isolated_context_id = None cdp_client = cdp_session.cdp_client try: should_skip = True @@ -692,10 +696,12 @@ async def inject_session_recorder(cdp_session): logger.debug(f"Failed to check if error page: {e}") if should_skip: - logger.debug("Error page detected, skipping session recorder injection") + logger.debug("Empty page detected, skipping session recorder injection") return + + isolated_context_id = await get_isolated_context_id(cdp_session) try: - is_loaded = await is_recorder_present(cdp_session) + is_loaded = await is_recorder_present(cdp_session, isolated_context_id) except Exception as e: logger.debug(f"Failed to check if session recorder is loaded: {e}") is_loaded = False @@ -703,7 +709,6 @@ async def inject_session_recorder(cdp_session): if is_loaded: return - isolated_context_id = await get_isolated_context_id(cdp_session) if isolated_context_id is None: logger.debug("Failed to get isolated context id") return @@ -747,6 +752,7 @@ async def load_session_recorder(): ), timeout=CDP_OPERATION_TIMEOUT_SECONDS, ) + return isolated_context_id except asyncio.TimeoutError: logger.debug("Timeout error when injecting session recorder") except Exception as e: @@ -770,7 +776,10 @@ async def start_recording_events( trace_id = format(span.get_span_context().trace_id, "032x") span.set_attribute("lmnr.internal.has_browser_session", True) - await inject_session_recorder(cdp_session) + isolated_context_id = await inject_session_recorder(cdp_session) + if isolated_context_id is None: + logger.debug("Failed to inject session recorder, not registering bindings") + return # Buffer for reassembling chunks chunk_buffers = {} @@ -828,11 +837,14 @@ async def send_events_from_browser(chunk): async def send_events_callback(event, cdp_session_id: str | None = None): if event["name"] != "lmnrSendEvents": return + if event["executionContextId"] != isolated_context_id: + return await send_events_from_browser(orjson.loads(event["payload"])) await cdp_client.send.Runtime.addBinding( { "name": "lmnrSendEvents", + "executionContextId": isolated_context_id, }, session_id=cdp_session.session_id, ) @@ -870,11 +882,14 @@ def on_target_created(event, cdp_session_id: str | None = None): # browser_use.browser.session.CDPSession (browser-use >= 0.6.0) -async def is_recorder_present(cdp_session) -> bool: +async def is_recorder_present( + cdp_session, isolated_context_id: int | None = None +) -> bool: # This function returns True on any error, because it is safer to not record # events than to try to inject the recorder into a broken context. cdp_client = cdp_session.cdp_client - isolated_context_id = await get_isolated_context_id(cdp_session) + if isolated_context_id is None: + isolated_context_id = await get_isolated_context_id(cdp_session) if isolated_context_id is None: logger.debug("Failed to get isolated context id") return True @@ -908,6 +923,10 @@ async def take_full_snapshot(cdp_session): logger.debug("Failed to get isolated context id") return False + if await should_skip_page(cdp_session): + logger.debug("Skipping full snapshot") + return False + try: result = await asyncio.wait_for( cdp_client.send.Runtime.evaluate( diff --git a/src/lmnr/sdk/laminar.py b/src/lmnr/sdk/laminar.py index 49dc657d..9af3999a 100644 --- a/src/lmnr/sdk/laminar.py +++ b/src/lmnr/sdk/laminar.py @@ -421,14 +421,14 @@ def start_span( Usage example: ```python - from src.lmnr import Laminar, use_span + from src.lmnr import Laminar def foo(span): - with use_span(span): + with Laminar.use_span(span): with Laminar.start_as_current_span("foo_inner"): some_function() def bar(): - with use_span(span): + with Laminar.use_span(span): openai_client.chat.completions.create() span = Laminar.start_span("outer") From 5fec2b879401eaf781c662bba90d28f78d6d7d14 Mon Sep 17 00:00:00 2001 From: Dinmukhamed Mailibay <47117969+dinmukhamedm@users.noreply.github.com> Date: Fri, 5 Sep 2025 23:27:49 +0100 Subject: [PATCH 10/12] Update src/lmnr/sdk/evaluations.py Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --- src/lmnr/sdk/evaluations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lmnr/sdk/evaluations.py b/src/lmnr/sdk/evaluations.py index 8235b17f..6c916783 100644 --- a/src/lmnr/sdk/evaluations.py +++ b/src/lmnr/sdk/evaluations.py @@ -510,7 +510,7 @@ def evaluate( Defaults to None. disabled_instruments (set[Instruments] | None, optional): Set of modules\ to disable auto-instrumentations. If None, no\ - only modules passed as `instruments` will be disabled. + If None, only modules passed as `instruments` will be disabled. Defaults to None. trace_export_timeout_seconds (int | None, optional): The timeout for\ trace export on OpenTelemetry exporter. Defaults to None. From 1fdc895d612c33e627c651c4745afadea270e3ca Mon Sep 17 00:00:00 2001 From: Din Date: Fri, 5 Sep 2025 23:34:32 +0100 Subject: [PATCH 11/12] return True from should skip in case of errors --- src/lmnr/sdk/browser/cdp_utils.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/lmnr/sdk/browser/cdp_utils.py b/src/lmnr/sdk/browser/cdp_utils.py index 9b673f70..f10435f9 100644 --- a/src/lmnr/sdk/browser/cdp_utils.py +++ b/src/lmnr/sdk/browser/cdp_utils.py @@ -516,7 +516,11 @@ async def should_skip_page(cdp_session): - """Checks if the page url is an error page or an empty page.""" + """Checks if the page url is an error page or an empty page. + Thius function returns True in case of any error in our code, because + it is safer to not record events than to try to inject the recorder + into something that is already broken. + """ cdp_client = cdp_session.cdp_client try: @@ -597,10 +601,10 @@ async def should_skip_page(cdp_session): except asyncio.TimeoutError: logger.debug("Timeout error when checking if error page") - return False + return True except Exception as e: logger.debug(f"Error during checking if error page: {e}") - return False + return True def get_mask_input_setting() -> MaskInputOptions: From 39f87957abe13911eeee9896b16d0c5e05d989b1 Mon Sep 17 00:00:00 2001 From: Dinmukhamed Mailibay <47117969+dinmukhamedm@users.noreply.github.com> Date: Fri, 5 Sep 2025 23:37:57 +0100 Subject: [PATCH 12/12] Update src/lmnr/sdk/browser/cdp_utils.py Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --- src/lmnr/sdk/browser/cdp_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lmnr/sdk/browser/cdp_utils.py b/src/lmnr/sdk/browser/cdp_utils.py index f10435f9..eae4e274 100644 --- a/src/lmnr/sdk/browser/cdp_utils.py +++ b/src/lmnr/sdk/browser/cdp_utils.py @@ -517,7 +517,7 @@ async def should_skip_page(cdp_session): """Checks if the page url is an error page or an empty page. - Thius function returns True in case of any error in our code, because + This function returns True in case of any error in our code, because it is safer to not record events than to try to inject the recorder into something that is already broken. """