From 19e28d8784adef90553da071ed891fc3252b2c63 Mon Sep 17 00:00:00 2001 From: Anurag Date: Sat, 6 May 2023 01:54:42 +0530 Subject: [PATCH] feat: Allow users to pass additional arguments to the WebDriver (#4121) This commit adds support for passing additional arguments to the `SeleniumURLLoader ` when creating Chrome or Firefox web drivers. Previously, only a few arguments such as `headless` could be passed in. With this change, users can pass any additional arguments they need as a list of strings using the `arguments` parameter. The `arguments` parameter allows users to configure the driver with any options that are available for that particular browser. For example, users can now pass custom `user_agent` strings or `proxy` settings using this parameter. This change also includes updated documentation and type hints to reflect the new `arguments` parameter and its usage. fixes #4120 --- langchain/document_loaders/url_selenium.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/langchain/document_loaders/url_selenium.py b/langchain/document_loaders/url_selenium.py index 339d5ec4b8d9dc..2aed3dce0b4f69 100644 --- a/langchain/document_loaders/url_selenium.py +++ b/langchain/document_loaders/url_selenium.py @@ -22,6 +22,7 @@ class SeleniumURLLoader(BaseLoader): browser (str): The browser to use, either 'chrome' or 'firefox'. executable_path (Optional[str]): The path to the browser executable. headless (bool): If True, the browser will run in headless mode. + arguments [List[str]]: List of arguments to pass to the browser. """ def __init__( @@ -31,6 +32,7 @@ def __init__( browser: Literal["chrome", "firefox"] = "chrome", executable_path: Optional[str] = None, headless: bool = True, + arguments: List[str] = [], ): """Load a list of URLs using Selenium and unstructured.""" try: @@ -54,6 +56,7 @@ def __init__( self.browser = browser self.executable_path = executable_path self.headless = headless + self.arguments = arguments def _get_driver(self) -> Union["Chrome", "Firefox"]: """Create and return a WebDriver instance based on the specified browser. @@ -69,6 +72,10 @@ def _get_driver(self) -> Union["Chrome", "Firefox"]: from selenium.webdriver.chrome.options import Options as ChromeOptions chrome_options = ChromeOptions() + + for arg in self.arguments: + chrome_options.add_argument(arg) + if self.headless: chrome_options.add_argument("--headless") chrome_options.add_argument("--no-sandbox") @@ -80,6 +87,10 @@ def _get_driver(self) -> Union["Chrome", "Firefox"]: from selenium.webdriver.firefox.options import Options as FirefoxOptions firefox_options = FirefoxOptions() + + for arg in self.arguments: + firefox_options.add_argument(arg) + if self.headless: firefox_options.add_argument("--headless") if self.executable_path is None: