From 9c1a773893c4d258a154196bc1fea124d94ea36d Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sat, 14 May 2022 15:37:30 -0300
Subject: [PATCH 01/10] Launch persistent context

---
 examples/persistent_context.py | 35 +++++++++++++++++++
 scrapy_playwright/handler.py   | 63 ++++++++++++++++++++++++++--------
 2 files changed, 83 insertions(+), 15 deletions(-)
 create mode 100644 examples/persistent_context.py

diff --git a/examples/persistent_context.py b/examples/persistent_context.py
new file mode 100644
index 00000000..906389ef
--- /dev/null
+++ b/examples/persistent_context.py
@@ -0,0 +1,35 @@
+from pathlib import Path
+
+from scrapy import Spider, Request
+
+
+class PersistentContextSpider(Spider):
+    """Use a persistent browser context"""
+
+    name = "persistent_context"
+    custom_settings = {
+        "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor",
+        "DOWNLOAD_HANDLERS": {
+            "https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
+            # "http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
+        },
+        ""
+        "PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS": {
+            "user_data_dir": str(Path.home() / "playwright-persistent-context"),
+            "java_script_enabled": False,
+            "extra_http_headers": {"Asdf": "Qwerty"},
+            "user_agent": "foobar",
+        },
+        "PLAYWRIGHT_PROCESS_REQUEST_HEADERS": None,
+    }
+
+    def start_requests(self):
+        yield Request(url="https://httpbin.org/get", meta={"playwright": True})
+
+    def parse(self, response):
+        content = response.css("pre::text").get()
+        print(content)
+        return {
+            "url": response.url,
+            "context": response.meta["playwright_context"],
+        }
diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py
index 64453697..e1a25f3f 100644
--- a/scrapy_playwright/handler.py
+++ b/scrapy_playwright/handler.py
@@ -41,6 +41,10 @@
 logger = logging.getLogger("scrapy-playwright")
 
 
+DEFAULT_CONTEXT_NAME = "default"
+PERSISTENT_CONTEXT_NAME = "persistent"
+
+
 class ScrapyPlaywrightDownloadHandler(HTTPDownloadHandler):
     def __init__(self, crawler: Crawler) -> None:
         super().__init__(settings=crawler.settings, crawler=crawler)
@@ -80,9 +84,22 @@ def __init__(self, crawler: Crawler) -> None:
         else:
             self.process_request_headers = use_scrapy_headers
 
-        self.context_kwargs: dict = crawler.settings.getdict("PLAYWRIGHT_CONTEXTS")
+        # if PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS is present we only launch one context
         self.contexts: Dict[str, BrowserContext] = {}
+        self.persistent_context: bool = False
         self.context_semaphores: Dict[str, asyncio.Semaphore] = {}
+        self.context_kwargs: dict = {}
+        if crawler.settings.get("PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS"):
+            self.persistent_context = True
+            ctx_kwargs = crawler.settings.getdict("PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS")
+            self.context_kwargs[PERSISTENT_CONTEXT_NAME] = ctx_kwargs
+            if crawler.settings.getdict("PLAYWRIGHT_CONTEXTS"):
+                logger.info(
+                    "Both PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS and PLAYWRIGHT_CONTEXTS"
+                    " are set, ignoring PLAYWRIGHT_CONTEXTS"
+                )
+        else:
+            self.context_kwargs = crawler.settings.getdict("PLAYWRIGHT_CONTEXTS")
 
         self.abort_request: Optional[Callable[[PlaywrightRequest], Union[Awaitable, bool]]] = None
         if crawler.settings.get("PLAYWRIGHT_ABORT_REQUEST"):
@@ -99,20 +116,32 @@ def _engine_started(self) -> Deferred:
     async def _launch_browser(self) -> None:
         self.playwright_context_manager = PlaywrightContextManager()
         self.playwright = await self.playwright_context_manager.start()
-        logger.info("Launching browser")
-        browser_launcher = getattr(self.playwright, self.browser_type).launch
-        self.browser = await browser_launcher(**self.launch_options)
-        logger.info(f"Browser {self.browser_type} launched")
-        contexts = await asyncio.gather(
-            *[
-                self._create_browser_context(name, kwargs)
-                for name, kwargs in self.context_kwargs.items()
-            ]
+        browser_type = getattr(self.playwright, self.browser_type)
+        if self.persistent_context:
+            logger.info("Launching single persistent context")
+            self.contexts[PERSISTENT_CONTEXT_NAME] = await browser_type.launch_persistent_context(
+                **self.context_kwargs[PERSISTENT_CONTEXT_NAME]
+            )
+            if self.default_navigation_timeout is not None:
+                self.contexts[PERSISTENT_CONTEXT_NAME].set_default_navigation_timeout(
+                    self.default_navigation_timeout
+                )
+            logger.info("Persistent context launched")
+        else:
+            logger.info(f"Launching browser {self.browser_type}")
+            self.browser = await browser_type.launch(**self.launch_options)
+            logger.info("Launching startup context(s)")
+            contexts = await asyncio.gather(
+                *[
+                    self._create_browser_context(name, kwargs)
+                    for name, kwargs in self.context_kwargs.items()
+                ]
+            )
+            self.contexts = dict(zip(self.context_kwargs.keys(), contexts))
+        self.context_semaphores.update(
+            {name: asyncio.Semaphore(value=self.max_pages_per_context) for name in self.contexts}
         )
-        self.contexts = dict(zip(self.context_kwargs.keys(), contexts))
-        self.context_semaphores = {
-            name: asyncio.Semaphore(value=self.max_pages_per_context) for name in self.contexts
-        }
+        logger.info(f"Browser {self.browser_type} launched")
 
     async def _create_browser_context(self, name: str, context_kwargs: dict) -> BrowserContext:
         context = await self.browser.new_context(**context_kwargs)
@@ -125,7 +154,11 @@ async def _create_browser_context(self, name: str, context_kwargs: dict) -> Brow
 
     async def _create_page(self, request: Request) -> Page:
         """Create a new page in a context, also creating a new context if necessary."""
-        context_name = request.meta.setdefault("playwright_context", "default")
+        if self.persistent_context:
+            context_name = request.meta["playwright_context"] = PERSISTENT_CONTEXT_NAME
+        else:
+            context_name = request.meta.setdefault("playwright_context", DEFAULT_CONTEXT_NAME)
+
         context = self.contexts.get(context_name)
         if context is None:
             context_kwargs = request.meta.get("playwright_context_kwargs") or {}

From c5385d0c578bd524670376ea72c39beda7cc2d5c Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sat, 14 May 2022 15:47:38 -0300
Subject: [PATCH 02/10] Launch persistent context (docs)

---
 README.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/README.md b/README.md
index c15e1757..0b3171b7 100644
--- a/README.md
+++ b/README.md
@@ -91,6 +91,14 @@ TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
     this will be used by all requests which do not explicitly specify a context.
     See the docs for [`Browser.new_context`](https://playwright.dev/python/docs/api/class-browser#browser-new-context).
 
+* `PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS` (type `Optional[dict]`, default `None`)
+
+    A dictionary of keyword arguments to be passed to
+    [`BrowserType.launch_persistent_context`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context).
+    Use this setting to launch a **single** persistent context.
+    This setting takes precedence over `PLAYWRIGHT_CONTEXTS`,
+    i.e. `PLAYWRIGHT_CONTEXTS` is ignored if both are passed.
+
 * `PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT` (type `Optional[float]`, default `None`)
 
     The timeout used when requesting pages by Playwright. If `None` or unset,
@@ -258,6 +266,10 @@ class AwesomeSpiderWithPage(scrapy.Spider):
 Multiple [browser contexts](https://playwright.dev/python/docs/browser-contexts)
 to be launched at startup can be defined via the `PLAYWRIGHT_CONTEXTS` [setting](#settings).
 
+**Note:** this section doesn not apply if using the
+`PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS` setting. In that case,
+only one context is used for all operations.
+
 ### Choosing a specific context for a request
 
 Pass the name of the desired context in the `playwright_context` meta key:

From bc7fe9480605aa2bb76bdeae5ef164da18ddc56c Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sat, 14 May 2022 17:35:07 -0300
Subject: [PATCH 03/10] Reduce duplication

---
 scrapy_playwright/handler.py | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py
index e1a25f3f..a5a69bb7 100644
--- a/scrapy_playwright/handler.py
+++ b/scrapy_playwright/handler.py
@@ -84,7 +84,7 @@ def __init__(self, crawler: Crawler) -> None:
         else:
             self.process_request_headers = use_scrapy_headers
 
-        # if PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS is present we only launch one context
+        # context-related settings
         self.contexts: Dict[str, BrowserContext] = {}
         self.persistent_context: bool = False
         self.context_semaphores: Dict[str, asyncio.Semaphore] = {}
@@ -114,21 +114,22 @@ def _engine_started(self) -> Deferred:
         return deferred_from_coro(self._launch_browser())
 
     async def _launch_browser(self) -> None:
+        """Start the browser instance and the configured contexts. Alternatively,
+        start only one persistent context if PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS was set.
+        """
+        logger.info(f"Launching browser {self.browser_type}")
         self.playwright_context_manager = PlaywrightContextManager()
         self.playwright = await self.playwright_context_manager.start()
         browser_type = getattr(self.playwright, self.browser_type)
         if self.persistent_context:
             logger.info("Launching single persistent context")
-            self.contexts[PERSISTENT_CONTEXT_NAME] = await browser_type.launch_persistent_context(
+            context = await browser_type.launch_persistent_context(
                 **self.context_kwargs[PERSISTENT_CONTEXT_NAME]
             )
-            if self.default_navigation_timeout is not None:
-                self.contexts[PERSISTENT_CONTEXT_NAME].set_default_navigation_timeout(
-                    self.default_navigation_timeout
-                )
+            self._init_browser_context(PERSISTENT_CONTEXT_NAME, context)
+            self.contexts[PERSISTENT_CONTEXT_NAME] = context
             logger.info("Persistent context launched")
         else:
-            logger.info(f"Launching browser {self.browser_type}")
             self.browser = await browser_type.launch(**self.launch_options)
             logger.info("Launching startup context(s)")
             contexts = await asyncio.gather(
@@ -142,15 +143,19 @@ async def _launch_browser(self) -> None:
             {name: asyncio.Semaphore(value=self.max_pages_per_context) for name in self.contexts}
         )
         logger.info(f"Browser {self.browser_type} launched")
+        self.stats.set_value("playwright/page_count", self._get_total_page_count())
 
     async def _create_browser_context(self, name: str, context_kwargs: dict) -> BrowserContext:
         context = await self.browser.new_context(**context_kwargs)
+        self._init_browser_context(name, context)
+        return context
+
+    def _init_browser_context(self, name: str, context: BrowserContext) -> None:
         context.on("close", self._make_close_browser_context_callback(name))
         logger.debug("Browser context started: '%s'", name)
         self.stats.inc_value("playwright/context_count")
         if self.default_navigation_timeout is not None:
             context.set_default_navigation_timeout(self.default_navigation_timeout)
-        return context
 
     async def _create_page(self, request: Request) -> Page:
         """Create a new page in a context, also creating a new context if necessary."""
@@ -177,6 +182,7 @@ async def _create_page(self, request: Request) -> Page:
             len(context.pages),
             self._get_total_page_count(),
         )
+        self._set_max_concurrent_page_count()
         if self.default_navigation_timeout is not None:
             page.set_default_navigation_timeout(self.default_navigation_timeout)
 
@@ -190,11 +196,13 @@ async def _create_page(self, request: Request) -> Page:
         return page
 
     def _get_total_page_count(self):
-        count = sum([len(context.pages) for context in self.contexts.values()])
+        return sum([len(context.pages) for context in self.contexts.values()])
+
+    def _set_max_concurrent_page_count(self):
+        count = self._get_total_page_count()
         current_max_count = self.stats.get_value("playwright/page_count/max_concurrent")
         if current_max_count is None or count > current_max_count:
             self.stats.set_value("playwright/page_count/max_concurrent", count)
-        return count
 
     @inlineCallbacks
     def close(self) -> Deferred:
@@ -202,6 +210,7 @@ def close(self) -> Deferred:
         yield deferred_from_coro(self._close())
 
     async def _close(self) -> None:
+        await asyncio.gather(*[context.close() for context in self.contexts.values()])
         self.contexts.clear()
         self.context_semaphores.clear()
         if getattr(self, "browser", None):

From c06c791d768982e1cba916cd1e86646e4f2f7072 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 15 May 2022 00:20:38 -0300
Subject: [PATCH 04/10] Launch persistent context (tests)

---
 README.md                      |  9 +++++++
 scrapy_playwright/handler.py   |  4 ++--
 tests/test_browser_contexts.py | 44 ++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0b3171b7..82e3fc0a 100644
--- a/README.md
+++ b/README.md
@@ -99,6 +99,15 @@ TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
     This setting takes precedence over `PLAYWRIGHT_CONTEXTS`,
     i.e. `PLAYWRIGHT_CONTEXTS` is ignored if both are passed.
 
+    Example:
+    ```python
+    PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS = {
+        "user_data_dir": "/path/to/dir",  # mandatory
+        "java_script_enabled": False,
+        "headless": False,
+    }
+    ```
+
 * `PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT` (type `Optional[float]`, default `None`)
 
     The timeout used when requesting pages by Playwright. If `None` or unset,
diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py
index a5a69bb7..4d712a4f 100644
--- a/scrapy_playwright/handler.py
+++ b/scrapy_playwright/handler.py
@@ -93,8 +93,8 @@ def __init__(self, crawler: Crawler) -> None:
             self.persistent_context = True
             ctx_kwargs = crawler.settings.getdict("PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS")
             self.context_kwargs[PERSISTENT_CONTEXT_NAME] = ctx_kwargs
-            if crawler.settings.getdict("PLAYWRIGHT_CONTEXTS"):
-                logger.info(
+            if crawler.settings.get("PLAYWRIGHT_CONTEXTS"):
+                logger.warning(
                     "Both PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS and PLAYWRIGHT_CONTEXTS"
                     " are set, ignoring PLAYWRIGHT_CONTEXTS"
                 )
diff --git a/tests/test_browser_contexts.py b/tests/test_browser_contexts.py
index 0ff96542..c195f784 100644
--- a/tests/test_browser_contexts.py
+++ b/tests/test_browser_contexts.py
@@ -1,9 +1,15 @@
 import asyncio
+import logging
 import platform
+import tempfile
+from pathlib import Path
+from uuid import uuid4
 
 import pytest
 from scrapy import Spider, Request
 
+from scrapy_playwright.handler import PERSISTENT_CONTEXT_NAME
+
 from tests import make_handler
 from tests.mockserver import StaticMockServer
 
@@ -97,6 +103,44 @@ async def test_contexts_startup(self):
             assert cookie["value"] == "bar"
             assert cookie["domain"] == "example.org"
 
+    @pytest.mark.asyncio
+    async def test_persistent_context(self, caplog):
+        temp_dir = f"{tempfile.gettempdir()}/{uuid4()}"
+        settings = {
+            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
+            "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 3000,
+            "PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS": {
+                "user_data_dir": temp_dir,
+            },
+            "PLAYWRIGHT_CONTEXTS": {
+                "foo": {},
+            },
+        }
+        assert not Path(temp_dir).exists()
+        async with make_handler(settings) as handler:
+            assert handler.persistent_context
+            assert Path(temp_dir).is_dir()
+            assert PERSISTENT_CONTEXT_NAME in handler.contexts
+            assert len(handler.contexts) == 1
+            assert len(handler.context_semaphores) == 1
+            assert getattr(handler, "browser", None) is None
+
+            with StaticMockServer() as server:
+                meta = {
+                    "playwright": True,
+                    "playwright_context": "will-be-ignored",
+                }
+                req = Request(server.urljoin("/index.html"), meta=meta)
+                resp = await handler._download_request(req, Spider("foo"))
+                assert resp.meta["playwright_context"] == PERSISTENT_CONTEXT_NAME
+
+                assert (
+                    "scrapy-playwright",
+                    logging.WARNING,
+                    "Both PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS and PLAYWRIGHT_CONTEXTS"
+                    " are set, ignoring PLAYWRIGHT_CONTEXTS",
+                ) in caplog.record_tuples
+
     @pytest.mark.asyncio
     async def test_contexts_dynamic(self):
         async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler:

From 10d101b98778b0e20acb9857b1b15221963376f9 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 15 May 2022 20:38:02 -0300
Subject: [PATCH 05/10] Refactor browser context creation

---
 scrapy_playwright/handler.py   | 118 ++++++++++++++++++---------------
 tests/__init__.py              |   2 +-
 tests/test_browser_contexts.py |   4 --
 3 files changed, 64 insertions(+), 60 deletions(-)

diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py
index 4d712a4f..a62d0896 100644
--- a/scrapy_playwright/handler.py
+++ b/scrapy_playwright/handler.py
@@ -2,12 +2,14 @@
 import logging
 import warnings
 from contextlib import suppress
+from dataclasses import dataclass
 from ipaddress import ip_address
 from time import time
 from typing import Awaitable, Callable, Dict, Generator, Optional, Tuple, Type, TypeVar, Union
 
 from playwright.async_api import (
     BrowserContext,
+    BrowserType,
     Page,
     PlaywrightContextManager,
     Request as PlaywrightRequest,
@@ -41,36 +43,45 @@
 logger = logging.getLogger("scrapy-playwright")
 
 
+DEFAULT_BROWSER_TYPE = "chromium"
 DEFAULT_CONTEXT_NAME = "default"
 PERSISTENT_CONTEXT_NAME = "persistent"
 
 
+@dataclass
+class BrowserContextWrapper:
+    context: BrowserContext
+    semaphore: asyncio.Semaphore
+
+
 class ScrapyPlaywrightDownloadHandler(HTTPDownloadHandler):
     def __init__(self, crawler: Crawler) -> None:
-        super().__init__(settings=crawler.settings, crawler=crawler)
+        settings = crawler.settings
+        super().__init__(settings=settings, crawler=crawler)
         verify_installed_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
         crawler.signals.connect(self._engine_started, signals.engine_started)
         self.stats = crawler.stats
 
-        self.browser_type: str = crawler.settings.get("PLAYWRIGHT_BROWSER_TYPE") or "chromium"
-        self.max_pages_per_context: int = crawler.settings.getint(
+        self.browser_type_name = settings.get("PLAYWRIGHT_BROWSER_TYPE") or DEFAULT_BROWSER_TYPE
+        self.max_pages_per_context: int = settings.getint(
             "PLAYWRIGHT_MAX_PAGES_PER_CONTEXT"
-        ) or crawler.settings.getint("CONCURRENT_REQUESTS")
-        self.launch_options: dict = crawler.settings.getdict("PLAYWRIGHT_LAUNCH_OPTIONS") or {}
+        ) or settings.getint("CONCURRENT_REQUESTS")
+        self.launch_options: dict = settings.getdict("PLAYWRIGHT_LAUNCH_OPTIONS") or {}
 
         self.default_navigation_timeout: Optional[float] = None
-        if "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT" in crawler.settings:
+        if "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT" in settings:
             with suppress(TypeError, ValueError):
                 self.default_navigation_timeout = float(
-                    crawler.settings.get("PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT")
+                    settings.get("PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT")
                 )
 
-        if "PLAYWRIGHT_PROCESS_REQUEST_HEADERS" in crawler.settings:
-            if crawler.settings["PLAYWRIGHT_PROCESS_REQUEST_HEADERS"] is None:
+        # header-related settings
+        if "PLAYWRIGHT_PROCESS_REQUEST_HEADERS" in settings:
+            if settings["PLAYWRIGHT_PROCESS_REQUEST_HEADERS"] is None:
                 self.process_request_headers = None  # use headers from the Playwright request
             else:
                 self.process_request_headers = load_object(
-                    crawler.settings["PLAYWRIGHT_PROCESS_REQUEST_HEADERS"]
+                    settings["PLAYWRIGHT_PROCESS_REQUEST_HEADERS"]
                 )
                 if self.process_request_headers is use_playwright_headers:
                     warnings.warn(
@@ -85,25 +96,24 @@ def __init__(self, crawler: Crawler) -> None:
             self.process_request_headers = use_scrapy_headers
 
         # context-related settings
-        self.contexts: Dict[str, BrowserContext] = {}
+        self.contexts: Dict[str, BrowserContextWrapper] = {}
         self.persistent_context: bool = False
-        self.context_semaphores: Dict[str, asyncio.Semaphore] = {}
         self.context_kwargs: dict = {}
-        if crawler.settings.get("PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS"):
+        if settings.get("PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS"):
             self.persistent_context = True
-            ctx_kwargs = crawler.settings.getdict("PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS")
+            ctx_kwargs = settings.getdict("PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS")
             self.context_kwargs[PERSISTENT_CONTEXT_NAME] = ctx_kwargs
-            if crawler.settings.get("PLAYWRIGHT_CONTEXTS"):
+            if settings.get("PLAYWRIGHT_CONTEXTS"):
                 logger.warning(
                     "Both PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS and PLAYWRIGHT_CONTEXTS"
                     " are set, ignoring PLAYWRIGHT_CONTEXTS"
                 )
         else:
-            self.context_kwargs = crawler.settings.getdict("PLAYWRIGHT_CONTEXTS")
+            self.context_kwargs = settings.getdict("PLAYWRIGHT_CONTEXTS")
 
         self.abort_request: Optional[Callable[[PlaywrightRequest], Union[Awaitable, bool]]] = None
-        if crawler.settings.get("PLAYWRIGHT_ABORT_REQUEST"):
-            self.abort_request = load_object(crawler.settings["PLAYWRIGHT_ABORT_REQUEST"])
+        if settings.get("PLAYWRIGHT_ABORT_REQUEST"):
+            self.abort_request = load_object(settings["PLAYWRIGHT_ABORT_REQUEST"])
 
     @classmethod
     def from_crawler(cls: Type[PlaywrightHandler], crawler: Crawler) -> PlaywrightHandler:
@@ -111,51 +121,54 @@ def from_crawler(cls: Type[PlaywrightHandler], crawler: Crawler) -> PlaywrightHa
 
     def _engine_started(self) -> Deferred:
         """Launch the browser. Use the engine_started signal as it supports returning deferreds."""
-        return deferred_from_coro(self._launch_browser())
+        return deferred_from_coro(self._launch())
 
-    async def _launch_browser(self) -> None:
-        """Start the browser instance and the configured contexts. Alternatively,
-        start only one persistent context if PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS was set.
+    async def _launch(self) -> None:
+        """Start a browser instance and configured contexts.
+        Start only one persistent context if PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS was set.
         """
-        logger.info(f"Launching browser {self.browser_type}")
+        logger.info(f"Launching browser {self.browser_type_name}")
         self.playwright_context_manager = PlaywrightContextManager()
         self.playwright = await self.playwright_context_manager.start()
-        browser_type = getattr(self.playwright, self.browser_type)
+        self.browser_type: BrowserType = getattr(self.playwright, self.browser_type_name)
         if self.persistent_context:
             logger.info("Launching single persistent context")
-            context = await browser_type.launch_persistent_context(
-                **self.context_kwargs[PERSISTENT_CONTEXT_NAME]
+            self.contexts[PERSISTENT_CONTEXT_NAME] = await self._create_browser_context(
+                name=PERSISTENT_CONTEXT_NAME,
+                context_kwargs=self.context_kwargs[PERSISTENT_CONTEXT_NAME],
+                persistent=True,
             )
-            self._init_browser_context(PERSISTENT_CONTEXT_NAME, context)
-            self.contexts[PERSISTENT_CONTEXT_NAME] = context
             logger.info("Persistent context launched")
         else:
-            self.browser = await browser_type.launch(**self.launch_options)
+            self.browser = await self.browser_type.launch(**self.launch_options)
             logger.info("Launching startup context(s)")
             contexts = await asyncio.gather(
                 *[
-                    self._create_browser_context(name, kwargs)
+                    self._create_browser_context(name=name, context_kwargs=kwargs)
                     for name, kwargs in self.context_kwargs.items()
                 ]
             )
             self.contexts = dict(zip(self.context_kwargs.keys(), contexts))
-        self.context_semaphores.update(
-            {name: asyncio.Semaphore(value=self.max_pages_per_context) for name in self.contexts}
-        )
-        logger.info(f"Browser {self.browser_type} launched")
+            logger.info("Startup context(s) launched")
+        logger.info(f"Browser {self.browser_type_name} launched")
         self.stats.set_value("playwright/page_count", self._get_total_page_count())
 
-    async def _create_browser_context(self, name: str, context_kwargs: dict) -> BrowserContext:
-        context = await self.browser.new_context(**context_kwargs)
-        self._init_browser_context(name, context)
-        return context
-
-    def _init_browser_context(self, name: str, context: BrowserContext) -> None:
+    async def _create_browser_context(
+        self, name: str, context_kwargs: dict, persistent: bool = False
+    ) -> BrowserContextWrapper:
+        if persistent:
+            context = await self.browser_type.launch_persistent_context(**context_kwargs)
+        else:
+            context = await self.browser.new_context(**context_kwargs)
         context.on("close", self._make_close_browser_context_callback(name))
         logger.debug("Browser context started: '%s'", name)
         self.stats.inc_value("playwright/context_count")
         if self.default_navigation_timeout is not None:
             context.set_default_navigation_timeout(self.default_navigation_timeout)
+        return BrowserContextWrapper(
+            context=context,
+            semaphore=asyncio.Semaphore(value=self.max_pages_per_context),
+        )
 
     async def _create_page(self, request: Request) -> Page:
         """Create a new page in a context, also creating a new context if necessary."""
@@ -167,19 +180,17 @@ async def _create_page(self, request: Request) -> Page:
         context = self.contexts.get(context_name)
         if context is None:
             context_kwargs = request.meta.get("playwright_context_kwargs") or {}
-            context = await self._create_browser_context(context_name, context_kwargs)
-            self.contexts[context_name] = context
-            self.context_semaphores[context_name] = asyncio.Semaphore(
-                value=self.max_pages_per_context
+            context = self.contexts[context_name] = await self._create_browser_context(
+                name=context_name, context_kwargs=context_kwargs, persistent=False
             )
 
-        await self.context_semaphores[context_name].acquire()
-        page = await context.new_page()
+        await context.semaphore.acquire()
+        page = await context.context.new_page()
         self.stats.inc_value("playwright/page_count")
         logger.debug(
             "[Context=%s] New page created, page count is %i (%i for all contexts)",
             context_name,
-            len(context.pages),
+            len(context.context.pages),
             self._get_total_page_count(),
         )
         self._set_max_concurrent_page_count()
@@ -196,7 +207,7 @@ async def _create_page(self, request: Request) -> Page:
         return page
 
     def _get_total_page_count(self):
-        return sum([len(context.pages) for context in self.contexts.values()])
+        return sum([len(ctx.context.pages) for ctx in self.contexts.values()])
 
     def _set_max_concurrent_page_count(self):
         count = self._get_total_page_count()
@@ -210,9 +221,8 @@ def close(self) -> Deferred:
         yield deferred_from_coro(self._close())
 
     async def _close(self) -> None:
-        await asyncio.gather(*[context.close() for context in self.contexts.values()])
+        await asyncio.gather(*[ctx.context.close() for ctx in self.contexts.values()])
         self.contexts.clear()
-        self.context_semaphores.clear()
         if getattr(self, "browser", None):
             logger.info("Closing browser")
             await self.browser.close()
@@ -344,8 +354,8 @@ def _increment_response_stats(self, response: PlaywrightResponse) -> None:
 
     def _make_close_page_callback(self, context_name: str) -> Callable:
         def close_page_callback() -> None:
-            if context_name in self.context_semaphores:
-                self.context_semaphores[context_name].release()
+            if context_name in self.contexts:
+                self.contexts[context_name].semaphore.release()
 
         return close_page_callback
 
@@ -354,8 +364,6 @@ def close_browser_context_callback() -> None:
             logger.debug("Browser context closed: '%s'", name)
             if name in self.contexts:
                 self.contexts.pop(name)
-            if name in self.context_semaphores:
-                self.context_semaphores.pop(name)
 
         return close_browser_context_callback
 
@@ -376,7 +384,7 @@ async def _request_handler(route: Route, playwright_request: PlaywrightRequest)
             if self.process_request_headers is not None:
                 overrides["headers"] = await _await_if_necessary(
                     self.process_request_headers(
-                        self.browser_type, playwright_request, scrapy_headers
+                        self.browser_type_name, playwright_request, scrapy_headers
                     )
                 )
                 # the request that reaches the callback should contain the final headers
diff --git a/tests/__init__.py b/tests/__init__.py
index 5f2a34bb..f5385194 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -11,7 +11,7 @@ async def make_handler(settings_dict: dict):
     crawler = get_crawler(settings_dict=settings_dict)
     handler = ScrapyPlaywrightDownloadHandler(crawler=crawler)
     try:
-        await handler._launch_browser()
+        await handler._launch()
     except:  # noqa (E722), pylint: disable=bare-except
         pass
     else:
diff --git a/tests/test_browser_contexts.py b/tests/test_browser_contexts.py
index c195f784..8fbde126 100644
--- a/tests/test_browser_contexts.py
+++ b/tests/test_browser_contexts.py
@@ -83,7 +83,6 @@ async def test_contexts_startup(self):
         }
         async with make_handler(settings) as handler:
             assert len(handler.contexts) == 1
-            assert len(handler.context_semaphores) == 1
 
             with StaticMockServer() as server:
                 meta = {
@@ -122,7 +121,6 @@ async def test_persistent_context(self, caplog):
             assert Path(temp_dir).is_dir()
             assert PERSISTENT_CONTEXT_NAME in handler.contexts
             assert len(handler.contexts) == 1
-            assert len(handler.context_semaphores) == 1
             assert getattr(handler, "browser", None) is None
 
             with StaticMockServer() as server:
@@ -145,7 +143,6 @@ async def test_persistent_context(self, caplog):
     async def test_contexts_dynamic(self):
         async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler:
             assert len(handler.contexts) == 0
-            assert len(handler.context_semaphores) == 0
 
             with StaticMockServer() as server:
                 meta = {
@@ -168,7 +165,6 @@ async def test_contexts_dynamic(self):
                 resp = await handler._download_request(req, Spider("foo"))
 
             assert len(handler.contexts) == 1
-            assert len(handler.context_semaphores) == 1
 
             page = resp.meta["playwright_page"]
             storage_state = await page.context.storage_state()

From 2ff494f2030a4857b5ec73b522bae74be6bd5da6 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 15 May 2022 21:20:02 -0300
Subject: [PATCH 06/10] Allow persistent and non-persitent contexts at the smae
 time

---
 README.md                      |  41 ++++++-------
 examples/persistent_context.py |  17 ++++--
 scrapy_playwright/handler.py   | 103 +++++++++++++++------------------
 tests/test_browser_contexts.py |  48 ++++++---------
 4 files changed, 96 insertions(+), 113 deletions(-)

diff --git a/README.md b/README.md
index 82e3fc0a..49aa51c7 100644
--- a/README.md
+++ b/README.md
@@ -78,35 +78,25 @@ TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
     It should be a mapping of (name, keyword arguments). For instance:
     ```python
     {
-        "first": {
+        "foobar": {
             "context_arg1": "value",
             "context_arg2": "value",
         },
-        "second": {
+        "default": {
+            "context_arg1": "value",
+            "context_arg2": "value",
+        },
+        "persistent": {
+            "user_data_dir": "/path/to/dir",  # will be a persistent context
             "context_arg1": "value",
         },
     }
     ```
-    A default context (called `default`) is created if no contexts are defined,
-    this will be used by all requests which do not explicitly specify a context.
-    See the docs for [`Browser.new_context`](https://playwright.dev/python/docs/api/class-browser#browser-new-context).
 
-* `PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS` (type `Optional[dict]`, default `None`)
+    See the section on [Multiple browser contexts](#multiple-browser-contexts)
+    for more information.
 
-    A dictionary of keyword arguments to be passed to
-    [`BrowserType.launch_persistent_context`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context).
-    Use this setting to launch a **single** persistent context.
-    This setting takes precedence over `PLAYWRIGHT_CONTEXTS`,
-    i.e. `PLAYWRIGHT_CONTEXTS` is ignored if both are passed.
-
-    Example:
-    ```python
-    PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS = {
-        "user_data_dir": "/path/to/dir",  # mandatory
-        "java_script_enabled": False,
-        "headless": False,
-    }
-    ```
+    See also the docs for [`Browser.new_context`](https://playwright.dev/python/docs/api/class-browser#browser-new-context).
 
 * `PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT` (type `Optional[float]`, default `None`)
 
@@ -290,6 +280,17 @@ yield scrapy.Request(
 )
 ```
 
+### Default context
+
+If a request does not explicitly indicate a context via the `playwright_context`
+meta key, it falls back to using a general context called `default`. This `default`
+context can also be customized on startup via the `PLAYWRIGHT_CONTEXTS` setting.
+
+### Persistent contexts
+
+Pass a value for the `user_data_dir` keyword argument to launch a context as
+**persistent** (see [`BrowserType.launch_persistent_context`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context)).
+
 ### Creating a context during a crawl
 
 If the context specified in the `playwright_context` meta key does not exist, it will be created.
diff --git a/examples/persistent_context.py b/examples/persistent_context.py
index 906389ef..f4d1cddb 100644
--- a/examples/persistent_context.py
+++ b/examples/persistent_context.py
@@ -14,17 +14,22 @@ class PersistentContextSpider(Spider):
             # "http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
         },
         ""
-        "PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS": {
-            "user_data_dir": str(Path.home() / "playwright-persistent-context"),
-            "java_script_enabled": False,
-            "extra_http_headers": {"Asdf": "Qwerty"},
-            "user_agent": "foobar",
+        "PLAYWRIGHT_CONTEXTS": {
+            "foobar": {
+                "user_data_dir": str(Path.home() / "playwright-persistent-context"),
+                "java_script_enabled": False,
+                "extra_http_headers": {"Asdf": "Qwerty"},
+                "user_agent": "foobar",
+            }
         },
         "PLAYWRIGHT_PROCESS_REQUEST_HEADERS": None,
     }
 
     def start_requests(self):
-        yield Request(url="https://httpbin.org/get", meta={"playwright": True})
+        yield Request(
+            url="https://httpbin.org/get",
+            meta={"playwright": True, "playwright_context": "foobar"},
+        )
 
     def parse(self, response):
         content = response.css("pre::text").get()
diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py
index a62d0896..dda76f9b 100644
--- a/scrapy_playwright/handler.py
+++ b/scrapy_playwright/handler.py
@@ -8,6 +8,7 @@
 from typing import Awaitable, Callable, Dict, Generator, Optional, Tuple, Type, TypeVar, Union
 
 from playwright.async_api import (
+    Browser,
     BrowserContext,
     BrowserType,
     Page,
@@ -45,13 +46,14 @@
 
 DEFAULT_BROWSER_TYPE = "chromium"
 DEFAULT_CONTEXT_NAME = "default"
-PERSISTENT_CONTEXT_NAME = "persistent"
+PERSISTENT_CONTEXT_PATH_KEY = "user_data_dir"
 
 
 @dataclass
 class BrowserContextWrapper:
     context: BrowserContext
     semaphore: asyncio.Semaphore
+    persistent: bool
 
 
 class ScrapyPlaywrightDownloadHandler(HTTPDownloadHandler):
@@ -62,6 +64,9 @@ def __init__(self, crawler: Crawler) -> None:
         crawler.signals.connect(self._engine_started, signals.engine_started)
         self.stats = crawler.stats
 
+        self.browser_launch_lock = asyncio.Lock()
+        self.context_launch_lock = asyncio.Lock()
+        self.browser: Optional[Browser] = None
         self.browser_type_name = settings.get("PLAYWRIGHT_BROWSER_TYPE") or DEFAULT_BROWSER_TYPE
         self.max_pages_per_context: int = settings.getint(
             "PLAYWRIGHT_MAX_PAGES_PER_CONTEXT"
@@ -97,19 +102,7 @@ def __init__(self, crawler: Crawler) -> None:
 
         # context-related settings
         self.contexts: Dict[str, BrowserContextWrapper] = {}
-        self.persistent_context: bool = False
-        self.context_kwargs: dict = {}
-        if settings.get("PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS"):
-            self.persistent_context = True
-            ctx_kwargs = settings.getdict("PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS")
-            self.context_kwargs[PERSISTENT_CONTEXT_NAME] = ctx_kwargs
-            if settings.get("PLAYWRIGHT_CONTEXTS"):
-                logger.warning(
-                    "Both PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS and PLAYWRIGHT_CONTEXTS"
-                    " are set, ignoring PLAYWRIGHT_CONTEXTS"
-                )
-        else:
-            self.context_kwargs = settings.getdict("PLAYWRIGHT_CONTEXTS")
+        self.context_kwargs: dict = settings.getdict("PLAYWRIGHT_CONTEXTS")
 
         self.abort_request: Optional[Callable[[PlaywrightRequest], Union[Awaitable, bool]]] = None
         if settings.get("PLAYWRIGHT_ABORT_REQUEST"):
@@ -124,65 +117,63 @@ def _engine_started(self) -> Deferred:
         return deferred_from_coro(self._launch())
 
     async def _launch(self) -> None:
-        """Start a browser instance and configured contexts.
-        Start only one persistent context if PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS was set.
-        """
-        logger.info(f"Launching browser {self.browser_type_name}")
+        """Launch Playwright manager and configured startup context(s)."""
         self.playwright_context_manager = PlaywrightContextManager()
         self.playwright = await self.playwright_context_manager.start()
         self.browser_type: BrowserType = getattr(self.playwright, self.browser_type_name)
-        if self.persistent_context:
-            logger.info("Launching single persistent context")
-            self.contexts[PERSISTENT_CONTEXT_NAME] = await self._create_browser_context(
-                name=PERSISTENT_CONTEXT_NAME,
-                context_kwargs=self.context_kwargs[PERSISTENT_CONTEXT_NAME],
-                persistent=True,
-            )
-            logger.info("Persistent context launched")
-        else:
-            self.browser = await self.browser_type.launch(**self.launch_options)
-            logger.info("Launching startup context(s)")
-            contexts = await asyncio.gather(
-                *[
-                    self._create_browser_context(name=name, context_kwargs=kwargs)
-                    for name, kwargs in self.context_kwargs.items()
-                ]
-            )
-            self.contexts = dict(zip(self.context_kwargs.keys(), contexts))
-            logger.info("Startup context(s) launched")
-        logger.info(f"Browser {self.browser_type_name} launched")
+        logger.info("Launching startup context(s)")
+        contexts = await asyncio.gather(
+            *[
+                self._create_browser_context(name=name, context_kwargs=kwargs)
+                for name, kwargs in self.context_kwargs.items()
+            ]
+        )
+        self.contexts = dict(zip(self.context_kwargs.keys(), contexts))
+        logger.info("Startup context(s) launched")
         self.stats.set_value("playwright/page_count", self._get_total_page_count())
 
+    async def _maybe_launch_browser(self) -> None:
+        async with self.browser_launch_lock:
+            if self.browser is None:
+                logger.info(f"Launching browser {self.browser_type.name}")
+                self.browser = await self.browser_type.launch(**self.launch_options)
+                logger.info(f"Browser {self.browser_type.name} launched")
+
     async def _create_browser_context(
-        self, name: str, context_kwargs: dict, persistent: bool = False
+        self, name: str, context_kwargs: Optional[dict]
     ) -> BrowserContextWrapper:
-        if persistent:
+        """Create a new context, also launching a browser if necessary."""
+        context_kwargs = context_kwargs or {}
+        if context_kwargs.get(PERSISTENT_CONTEXT_PATH_KEY):
             context = await self.browser_type.launch_persistent_context(**context_kwargs)
+            persistent = True
         else:
+            await self._maybe_launch_browser()
+            assert self.browser  # nosec[assert_used] (typing)
             context = await self.browser.new_context(**context_kwargs)
-        context.on("close", self._make_close_browser_context_callback(name))
-        logger.debug("Browser context started: '%s'", name)
+            persistent = False
+        context.on("close", self._make_close_browser_context_callback(name, persistent))
+        logger.debug(f"Browser context started: '{name}' (persistent={persistent})")
         self.stats.inc_value("playwright/context_count")
         if self.default_navigation_timeout is not None:
             context.set_default_navigation_timeout(self.default_navigation_timeout)
         return BrowserContextWrapper(
             context=context,
             semaphore=asyncio.Semaphore(value=self.max_pages_per_context),
+            persistent=persistent,
         )
 
     async def _create_page(self, request: Request) -> Page:
         """Create a new page in a context, also creating a new context if necessary."""
-        if self.persistent_context:
-            context_name = request.meta["playwright_context"] = PERSISTENT_CONTEXT_NAME
-        else:
-            context_name = request.meta.setdefault("playwright_context", DEFAULT_CONTEXT_NAME)
-
-        context = self.contexts.get(context_name)
-        if context is None:
-            context_kwargs = request.meta.get("playwright_context_kwargs") or {}
-            context = self.contexts[context_name] = await self._create_browser_context(
-                name=context_name, context_kwargs=context_kwargs, persistent=False
-            )
+        context_name = request.meta.setdefault("playwright_context", DEFAULT_CONTEXT_NAME)
+        # this block needs to be locked because several attempts to launch a context
+        # with the same name could happen at the same time from different requests
+        async with self.context_launch_lock:
+            context = self.contexts.get(context_name)
+            if context is None:
+                context = self.contexts[context_name] = await self._create_browser_context(
+                    name=context_name, context_kwargs=request.meta.get("playwright_context_kwargs")
+                )
 
         await context.semaphore.acquire()
         page = await context.context.new_page()
@@ -223,7 +214,7 @@ def close(self) -> Deferred:
     async def _close(self) -> None:
         await asyncio.gather(*[ctx.context.close() for ctx in self.contexts.values()])
         self.contexts.clear()
-        if getattr(self, "browser", None):
+        if self.browser is not None:
             logger.info("Closing browser")
             await self.browser.close()
         await self.playwright_context_manager.__aexit__()
@@ -359,9 +350,9 @@ def close_page_callback() -> None:
 
         return close_page_callback
 
-    def _make_close_browser_context_callback(self, name: str) -> Callable:
+    def _make_close_browser_context_callback(self, name: str, persistent: bool) -> Callable:
         def close_browser_context_callback() -> None:
-            logger.debug("Browser context closed: '%s'", name)
+            logger.debug(f"Browser context closed: '{name}' (persistent={persistent})")
             if name in self.contexts:
                 self.contexts.pop(name)
 
diff --git a/tests/test_browser_contexts.py b/tests/test_browser_contexts.py
index 8fbde126..dc9880b0 100644
--- a/tests/test_browser_contexts.py
+++ b/tests/test_browser_contexts.py
@@ -1,5 +1,4 @@
 import asyncio
-import logging
 import platform
 import tempfile
 from pathlib import Path
@@ -8,8 +7,6 @@
 import pytest
 from scrapy import Spider, Request
 
-from scrapy_playwright.handler import PERSISTENT_CONTEXT_NAME
-
 from tests import make_handler
 from tests.mockserver import StaticMockServer
 
@@ -103,41 +100,30 @@ async def test_contexts_startup(self):
             assert cookie["domain"] == "example.org"
 
     @pytest.mark.asyncio
-    async def test_persistent_context(self, caplog):
-        temp_dir = f"{tempfile.gettempdir()}/{uuid4()}"
+    async def test_persistent_contexts(self):
+        temp_dir_foo = f"{tempfile.gettempdir()}/{uuid4()}"
+        temp_dir_bar = f"{tempfile.gettempdir()}/{uuid4()}"
         settings = {
             "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
             "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 3000,
-            "PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS": {
-                "user_data_dir": temp_dir,
-            },
             "PLAYWRIGHT_CONTEXTS": {
-                "foo": {},
+                "foo": {
+                    "user_data_dir": temp_dir_foo,
+                },
+                "bar": {
+                    "user_data_dir": temp_dir_bar,
+                },
             },
         }
-        assert not Path(temp_dir).exists()
+        assert not Path(temp_dir_foo).exists()
+        assert not Path(temp_dir_bar).exists()
         async with make_handler(settings) as handler:
-            assert handler.persistent_context
-            assert Path(temp_dir).is_dir()
-            assert PERSISTENT_CONTEXT_NAME in handler.contexts
-            assert len(handler.contexts) == 1
-            assert getattr(handler, "browser", None) is None
-
-            with StaticMockServer() as server:
-                meta = {
-                    "playwright": True,
-                    "playwright_context": "will-be-ignored",
-                }
-                req = Request(server.urljoin("/index.html"), meta=meta)
-                resp = await handler._download_request(req, Spider("foo"))
-                assert resp.meta["playwright_context"] == PERSISTENT_CONTEXT_NAME
-
-                assert (
-                    "scrapy-playwright",
-                    logging.WARNING,
-                    "Both PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS and PLAYWRIGHT_CONTEXTS"
-                    " are set, ignoring PLAYWRIGHT_CONTEXTS",
-                ) in caplog.record_tuples
+            assert Path(temp_dir_foo).is_dir()
+            assert Path(temp_dir_bar).is_dir()
+            assert len(handler.contexts) == 2
+            assert handler.contexts["foo"].persistent
+            assert handler.contexts["bar"].persistent
+            assert handler.browser is None
 
     @pytest.mark.asyncio
     async def test_contexts_dynamic(self):

From 1fc7701872dea5fcf7028a483b3681287d00d41d Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Sun, 15 May 2022 21:32:31 -0300
Subject: [PATCH 07/10] Stats: persistent context count

---
 scrapy_playwright/handler.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py
index dda76f9b..a5e49de4 100644
--- a/scrapy_playwright/handler.py
+++ b/scrapy_playwright/handler.py
@@ -147,11 +147,13 @@ async def _create_browser_context(
         if context_kwargs.get(PERSISTENT_CONTEXT_PATH_KEY):
             context = await self.browser_type.launch_persistent_context(**context_kwargs)
             persistent = True
+            self.stats.inc_value("playwright/context_count/persistent")
         else:
             await self._maybe_launch_browser()
             assert self.browser  # nosec[assert_used] (typing)
             context = await self.browser.new_context(**context_kwargs)
             persistent = False
+            self.stats.inc_value("playwright/context_count/non-persistent")
         context.on("close", self._make_close_browser_context_callback(name, persistent))
         logger.debug(f"Browser context started: '{name}' (persistent={persistent})")
         self.stats.inc_value("playwright/context_count")

From 76777d16f9018417ebd77cbfdd3fc3fe6bc73ff1 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 16 May 2022 01:35:45 -0300
Subject: [PATCH 08/10] Tidy up

---
 examples/persistent_context.py |  1 -
 scrapy_playwright/handler.py   | 33 +++++++++++++++++----------------
 tests/test_browser_contexts.py |  2 +-
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/examples/persistent_context.py b/examples/persistent_context.py
index f4d1cddb..b107b008 100644
--- a/examples/persistent_context.py
+++ b/examples/persistent_context.py
@@ -13,7 +13,6 @@ class PersistentContextSpider(Spider):
             "https": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
             # "http": "scrapy_playwright.handler.ScrapyPlaywrightDownloadHandler",
         },
-        ""
         "PLAYWRIGHT_CONTEXTS": {
             "foobar": {
                 "user_data_dir": str(Path.home() / "playwright-persistent-context"),
diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py
index a5e49de4..201fde89 100644
--- a/scrapy_playwright/handler.py
+++ b/scrapy_playwright/handler.py
@@ -66,7 +66,6 @@ def __init__(self, crawler: Crawler) -> None:
 
         self.browser_launch_lock = asyncio.Lock()
         self.context_launch_lock = asyncio.Lock()
-        self.browser: Optional[Browser] = None
         self.browser_type_name = settings.get("PLAYWRIGHT_BROWSER_TYPE") or DEFAULT_BROWSER_TYPE
         self.max_pages_per_context: int = settings.getint(
             "PLAYWRIGHT_MAX_PAGES_PER_CONTEXT"
@@ -83,7 +82,7 @@ def __init__(self, crawler: Crawler) -> None:
         # header-related settings
         if "PLAYWRIGHT_PROCESS_REQUEST_HEADERS" in settings:
             if settings["PLAYWRIGHT_PROCESS_REQUEST_HEADERS"] is None:
-                self.process_request_headers = None  # use headers from the Playwright request
+                self.process_request_headers = None
             else:
                 self.process_request_headers = load_object(
                     settings["PLAYWRIGHT_PROCESS_REQUEST_HEADERS"]
@@ -118,25 +117,27 @@ def _engine_started(self) -> Deferred:
 
     async def _launch(self) -> None:
         """Launch Playwright manager and configured startup context(s)."""
+        logger.info("Starting download handler")
         self.playwright_context_manager = PlaywrightContextManager()
         self.playwright = await self.playwright_context_manager.start()
         self.browser_type: BrowserType = getattr(self.playwright, self.browser_type_name)
-        logger.info("Launching startup context(s)")
-        contexts = await asyncio.gather(
-            *[
-                self._create_browser_context(name=name, context_kwargs=kwargs)
-                for name, kwargs in self.context_kwargs.items()
-            ]
-        )
-        self.contexts = dict(zip(self.context_kwargs.keys(), contexts))
-        logger.info("Startup context(s) launched")
-        self.stats.set_value("playwright/page_count", self._get_total_page_count())
+        if self.context_kwargs:
+            logger.info(f"Launching {len(self.context_kwargs)} startup context(s)")
+            contexts = await asyncio.gather(
+                *[
+                    self._create_browser_context(name=name, context_kwargs=kwargs)
+                    for name, kwargs in self.context_kwargs.items()
+                ]
+            )
+            self.contexts = dict(zip(self.context_kwargs.keys(), contexts))
+            logger.info("Startup context(s) launched")
+            self.stats.set_value("playwright/page_count", self._get_total_page_count())
 
     async def _maybe_launch_browser(self) -> None:
         async with self.browser_launch_lock:
-            if self.browser is None:
+            if not hasattr(self, "browser"):
                 logger.info(f"Launching browser {self.browser_type.name}")
-                self.browser = await self.browser_type.launch(**self.launch_options)
+                self.browser: Browser = await self.browser_type.launch(**self.launch_options)
                 logger.info(f"Browser {self.browser_type.name} launched")
 
     async def _create_browser_context(
@@ -150,7 +151,6 @@ async def _create_browser_context(
             self.stats.inc_value("playwright/context_count/persistent")
         else:
             await self._maybe_launch_browser()
-            assert self.browser  # nosec[assert_used] (typing)
             context = await self.browser.new_context(**context_kwargs)
             persistent = False
             self.stats.inc_value("playwright/context_count/non-persistent")
@@ -210,13 +210,14 @@ def _set_max_concurrent_page_count(self):
 
     @inlineCallbacks
     def close(self) -> Deferred:
+        logger.info("Closing download handler")
         yield super().close()
         yield deferred_from_coro(self._close())
 
     async def _close(self) -> None:
         await asyncio.gather(*[ctx.context.close() for ctx in self.contexts.values()])
         self.contexts.clear()
-        if self.browser is not None:
+        if hasattr(self, "browser"):
             logger.info("Closing browser")
             await self.browser.close()
         await self.playwright_context_manager.__aexit__()
diff --git a/tests/test_browser_contexts.py b/tests/test_browser_contexts.py
index dc9880b0..e6c02eca 100644
--- a/tests/test_browser_contexts.py
+++ b/tests/test_browser_contexts.py
@@ -123,7 +123,7 @@ async def test_persistent_contexts(self):
             assert len(handler.contexts) == 2
             assert handler.contexts["foo"].persistent
             assert handler.contexts["bar"].persistent
-            assert handler.browser is None
+            assert not hasattr(handler, "browser")
 
     @pytest.mark.asyncio
     async def test_contexts_dynamic(self):

From a6f58e09a12c46bbec4cd5072a4d90acdfeec4d4 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Mon, 16 May 2022 01:40:12 -0300
Subject: [PATCH 09/10] Remove outdated section in readme

---
 README.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/README.md b/README.md
index 49aa51c7..7a0659db 100644
--- a/README.md
+++ b/README.md
@@ -265,10 +265,6 @@ class AwesomeSpiderWithPage(scrapy.Spider):
 Multiple [browser contexts](https://playwright.dev/python/docs/browser-contexts)
 to be launched at startup can be defined via the `PLAYWRIGHT_CONTEXTS` [setting](#settings).
 
-**Note:** this section doesn not apply if using the
-`PLAYWRIGHT_PERSISTENT_CONTEXT_KWARGS` setting. In that case,
-only one context is used for all operations.
-
 ### Choosing a specific context for a request
 
 Pass the name of the desired context in the `playwright_context` meta key:

From e119c628576665a1ccb924643fafa51fdcb1ff22 Mon Sep 17 00:00:00 2001
From: Eugenio Lacuesta <eugenio.lacuesta@gmail.com>
Date: Tue, 17 May 2022 17:52:34 -0300
Subject: [PATCH 10/10] Adjust persistent context tests

---
 tests/test_browser_contexts.py    | 68 ++++++++++++++++++++++++-------
 tests/test_playwright_requests.py | 22 ----------
 2 files changed, 54 insertions(+), 36 deletions(-)

diff --git a/tests/test_browser_contexts.py b/tests/test_browser_contexts.py
index e6c02eca..ee34e29a 100644
--- a/tests/test_browser_contexts.py
+++ b/tests/test_browser_contexts.py
@@ -5,7 +5,9 @@
 from uuid import uuid4
 
 import pytest
+from playwright.async_api import Browser, TimeoutError as PlaywrightTimeoutError
 from scrapy import Spider, Request
+from scrapy_playwright.page import PageMethod
 
 from tests import make_handler
 from tests.mockserver import StaticMockServer
@@ -25,6 +27,28 @@ async def test_contexts_max_pages_setting(self):
         async with make_handler(settings) as handler:
             assert handler.max_pages_per_context == 9876
 
+    @pytest.mark.asyncio
+    async def test_context_kwargs(self):
+        settings_dict = {
+            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
+            "PLAYWRIGHT_CONTEXTS": {
+                "default": {"java_script_enabled": False},
+            },
+        }
+        async with make_handler(settings_dict) as handler:
+            with StaticMockServer() as server:
+                req = Request(
+                    url=server.urljoin("/scroll.html"),
+                    meta={
+                        "playwright": True,
+                        "playwright_page_methods": [
+                            PageMethod("wait_for_selector", selector="div.quote", timeout=1000),
+                        ],
+                    },
+                )
+                with pytest.raises(PlaywrightTimeoutError):
+                    await handler._download_request(req, Spider("foo"))
+
     @pytest.mark.asyncio
     async def test_contexts_max_pages(self):
         settings = {
@@ -100,30 +124,46 @@ async def test_contexts_startup(self):
             assert cookie["domain"] == "example.org"
 
     @pytest.mark.asyncio
-    async def test_persistent_contexts(self):
-        temp_dir_foo = f"{tempfile.gettempdir()}/{uuid4()}"
-        temp_dir_bar = f"{tempfile.gettempdir()}/{uuid4()}"
+    async def test_persistent_context(self):
+        temp_dir = f"{tempfile.gettempdir()}/{uuid4()}"
+        settings = {
+            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
+            "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 3000,
+            "PLAYWRIGHT_CONTEXTS": {
+                "persistent": {
+                    "user_data_dir": temp_dir,
+                },
+            },
+        }
+        assert not Path(temp_dir).exists()
+        async with make_handler(settings) as handler:
+            assert Path(temp_dir).is_dir()
+            assert len(handler.contexts) == 1
+            assert handler.contexts["persistent"].persistent
+            assert not hasattr(handler, "browser")
+
+    @pytest.mark.asyncio
+    async def test_mixed_persistent_contexts(self):
+        temp_dir = f"{tempfile.gettempdir()}/{uuid4()}"
         settings = {
             "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
             "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 3000,
             "PLAYWRIGHT_CONTEXTS": {
-                "foo": {
-                    "user_data_dir": temp_dir_foo,
+                "persistent": {
+                    "user_data_dir": temp_dir,
                 },
-                "bar": {
-                    "user_data_dir": temp_dir_bar,
+                "non-persistent": {
+                    "java_script_enabled": False,
                 },
             },
         }
-        assert not Path(temp_dir_foo).exists()
-        assert not Path(temp_dir_bar).exists()
+        assert not Path(temp_dir).exists()
         async with make_handler(settings) as handler:
-            assert Path(temp_dir_foo).is_dir()
-            assert Path(temp_dir_bar).is_dir()
+            assert Path(temp_dir).is_dir()
             assert len(handler.contexts) == 2
-            assert handler.contexts["foo"].persistent
-            assert handler.contexts["bar"].persistent
-            assert not hasattr(handler, "browser")
+            assert handler.contexts["persistent"].persistent
+            assert not handler.contexts["non-persistent"].persistent
+            assert isinstance(handler.browser, Browser)
 
     @pytest.mark.asyncio
     async def test_contexts_dynamic(self):
diff --git a/tests/test_playwright_requests.py b/tests/test_playwright_requests.py
index d207c14d..ed3fcd87 100644
--- a/tests/test_playwright_requests.py
+++ b/tests/test_playwright_requests.py
@@ -208,28 +208,6 @@ async def test_route_continue_exception(self, logger):
             with pytest.raises(ZeroDivisionError):
                 await req_handler(route, playwright_request)
 
-    @pytest.mark.asyncio
-    async def test_context_kwargs(self):
-        settings_dict = {
-            "PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
-            "PLAYWRIGHT_CONTEXTS": {
-                "default": {"java_script_enabled": False},
-            },
-        }
-        async with make_handler(settings_dict) as handler:
-            with StaticMockServer() as server:
-                req = Request(
-                    url=server.urljoin("/scroll.html"),
-                    meta={
-                        "playwright": True,
-                        "playwright_page_methods": [
-                            PageMethod("wait_for_selector", selector="div.quote", timeout=1000),
-                        ],
-                    },
-                )
-                with pytest.raises(PlaywrightTimeoutError):
-                    await handler._download_request(req, Spider("foo"))
-
     @pytest.mark.asyncio
     async def test_page_method_screenshot(self):
         async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: