From fffb99e8bc58da138d4199713d9c93b7621d7668 Mon Sep 17 00:00:00 2001
From: yin1991 <84140478+xiaokuili@users.noreply.github.com>
Date: Tue, 30 Jan 2024 09:05:41 +0000
Subject: [PATCH 1/4] add proxy

---
 .../document_loaders/url_playwright.py        | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/url_playwright.py b/libs/community/langchain_community/document_loaders/url_playwright.py
index 8071d3717f7266..12c27391b815b4 100644
--- a/libs/community/langchain_community/document_loaders/url_playwright.py
+++ b/libs/community/langchain_community/document_loaders/url_playwright.py
@@ -2,7 +2,7 @@
 """
 import logging
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, List, Optional, Dict
 
 from langchain_core.documents import Document
 
@@ -111,6 +111,17 @@ class PlaywrightURLLoader(BaseLoader):
         urls (List[str]): List of URLs to load.
         continue_on_failure (bool): If True, continue loading other URLs on failure.
         headless (bool): If True, the browser will run in headless mode.
+        proxy (Optional[Dict[str, str]]): If set, the browser will access URLs through the specified proxy.
+
+    Examples
+    --------
+    from langchain_community.document_loaders import PlaywrightURLLoader
+
+    loader = UnstructuredHTMLLoader(
+        "example.html", mode="elements", strategy="fast",
+    )
+    docs = loader.load()
+    
     """
 
     def __init__(
@@ -120,6 +131,7 @@ def __init__(
         headless: bool = True,
         remove_selectors: Optional[List[str]] = None,
         evaluator: Optional[PlaywrightEvaluator] = None,
+        proxy: Optional[Dict[str, str]] = None
     ):
         """Load a list of URLs using Playwright."""
         try:
@@ -133,6 +145,7 @@ def __init__(
         self.urls = urls
         self.continue_on_failure = continue_on_failure
         self.headless = headless
+        self.proxy= proxy
 
         if remove_selectors and evaluator:
             raise ValueError(
@@ -153,7 +166,8 @@ def load(self) -> List[Document]:
         docs: List[Document] = list()
 
         with sync_playwright() as p:
-            browser = p.chromium.launch(headless=self.headless)
+         
+            browser = p.chromium.launch(headless=self.headless, proxy=self.proxy)
             for url in self.urls:
                 try:
                     page = browser.new_page()
@@ -186,7 +200,7 @@ async def aload(self) -> List[Document]:
         docs: List[Document] = list()
 
         async with async_playwright() as p:
-            browser = await p.chromium.launch(headless=self.headless)
+            browser = await p.chromium.launch(headless=self.headless, proxy=self.proxy)
             for url in self.urls:
                 try:
                     page = await browser.new_page()

From 6fae177e5f5136db743cb610dabcab4f9f4ccb74 Mon Sep 17 00:00:00 2001
From: root <root@ip-172-31-46-160.ap-southeast-1.compute.internal>
Date: Wed, 31 Jan 2024 09:17:25 +0000
Subject: [PATCH 2/4] add Example demonstrating crawling with a proxy.

---
 .../document_loaders/url_playwright.py        | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/url_playwright.py b/libs/community/langchain_community/document_loaders/url_playwright.py
index 12c27391b815b4..d5e9c72b353519 100644
--- a/libs/community/langchain_community/document_loaders/url_playwright.py
+++ b/libs/community/langchain_community/document_loaders/url_playwright.py
@@ -115,13 +115,18 @@ class PlaywrightURLLoader(BaseLoader):
 
     Examples
     --------
+    # crwal with proxy 
     from langchain_community.document_loaders import PlaywrightURLLoader
-
-    loader = UnstructuredHTMLLoader(
-        "example.html", mode="elements", strategy="fast",
-    )
-    docs = loader.load()
-    
+    urls = [
+        "https://api.ipify.org/?format=json",
+    ]
+    proxy={
+        "server": "https://xx.xx.xx:15818", # https://<host>:<port>
+        "username": "username",
+        "password": "password"
+    }
+    loader = PlaywrightURLLoader(urls=urls, proxy=proxy)
+    data = loader.load()
     """
 
     def __init__(
@@ -220,3 +225,5 @@ async def aload(self) -> List[Document]:
                         raise e
             await browser.close()
         return docs
+
+

From a8e422c0c40e670bac718f89136e5140102f23c2 Mon Sep 17 00:00:00 2001
From: root <root@ip-172-31-46-160.ap-southeast-1.compute.internal>
Date: Wed, 31 Jan 2024 09:48:39 +0000
Subject: [PATCH 3/4] lint and format file

---
 .../document_loaders/url_playwright.py             | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/url_playwright.py b/libs/community/langchain_community/document_loaders/url_playwright.py
index d5e9c72b353519..ce7bec24a03b16 100644
--- a/libs/community/langchain_community/document_loaders/url_playwright.py
+++ b/libs/community/langchain_community/document_loaders/url_playwright.py
@@ -2,7 +2,7 @@
 """
 import logging
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, List, Optional, Dict
+from typing import TYPE_CHECKING, Dict, List, Optional
 
 from langchain_core.documents import Document
 
@@ -111,11 +111,12 @@ class PlaywrightURLLoader(BaseLoader):
         urls (List[str]): List of URLs to load.
         continue_on_failure (bool): If True, continue loading other URLs on failure.
         headless (bool): If True, the browser will run in headless mode.
-        proxy (Optional[Dict[str, str]]): If set, the browser will access URLs through the specified proxy.
+        proxy (Optional[Dict[str, str]]): If set, the browser will access URLs
+            through the specified proxy.
 
     Examples
     --------
-    # crwal with proxy 
+    # crwal with proxy
     from langchain_community.document_loaders import PlaywrightURLLoader
     urls = [
         "https://api.ipify.org/?format=json",
@@ -136,7 +137,7 @@ def __init__(
         headless: bool = True,
         remove_selectors: Optional[List[str]] = None,
         evaluator: Optional[PlaywrightEvaluator] = None,
-        proxy: Optional[Dict[str, str]] = None
+        proxy: Optional[Dict[str, str]] = None,
     ):
         """Load a list of URLs using Playwright."""
         try:
@@ -150,7 +151,7 @@ def __init__(
         self.urls = urls
         self.continue_on_failure = continue_on_failure
         self.headless = headless
-        self.proxy= proxy
+        self.proxy = proxy
 
         if remove_selectors and evaluator:
             raise ValueError(
@@ -171,7 +172,6 @@ def load(self) -> List[Document]:
         docs: List[Document] = list()
 
         with sync_playwright() as p:
-         
             browser = p.chromium.launch(headless=self.headless, proxy=self.proxy)
             for url in self.urls:
                 try:
@@ -225,5 +225,3 @@ async def aload(self) -> List[Document]:
                         raise e
             await browser.close()
         return docs
-
-

From 8a0783ea9f5607c59fc6b843e41de150945df7fa Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Mon, 12 Feb 2024 19:35:17 -0800
Subject: [PATCH 4/4] fmt

---
 .../document_loaders/url_playwright.py        | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/url_playwright.py b/libs/community/langchain_community/document_loaders/url_playwright.py
index ce7bec24a03b16..106f15cee6f61e 100644
--- a/libs/community/langchain_community/document_loaders/url_playwright.py
+++ b/libs/community/langchain_community/document_loaders/url_playwright.py
@@ -114,20 +114,19 @@ class PlaywrightURLLoader(BaseLoader):
         proxy (Optional[Dict[str, str]]): If set, the browser will access URLs
             through the specified proxy.
 
-    Examples
-    --------
-    # crwal with proxy
-    from langchain_community.document_loaders import PlaywrightURLLoader
-    urls = [
-        "https://api.ipify.org/?format=json",
-    ]
-    proxy={
-        "server": "https://xx.xx.xx:15818", # https://<host>:<port>
-        "username": "username",
-        "password": "password"
-    }
-    loader = PlaywrightURLLoader(urls=urls, proxy=proxy)
-    data = loader.load()
+    Example:
+        .. code-block:: python
+
+            from langchain_community.document_loaders import PlaywrightURLLoader
+
+            urls = ["https://api.ipify.org/?format=json",]
+            proxy={
+                "server": "https://xx.xx.xx:15818", # https://<host>:<port>
+                "username": "username",
+                "password": "password"
+            }
+            loader = PlaywrightURLLoader(urls, proxy=proxy)
+            data = loader.load()
     """
 
     def __init__(