Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions scrapy_playwright/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,14 @@ async def _create_page(self, request: Request) -> Page:
@inlineCallbacks
def close(self) -> Deferred:
yield super().close()
for context in self.contexts.copy().values():
yield deferred_from_coro(context.close())
yield deferred_from_coro(self._close())

async def _close(self) -> None:
self.contexts.clear()
if getattr(self, "browser", None):
logger.info("Closing browser")
yield deferred_from_coro(self.browser.close())
yield deferred_from_coro(self.playwright_context_manager.__aexit__())
await self.browser.close()
await self.playwright_context_manager.__aexit__()

def download_request(self, request: Request, spider: Spider) -> Deferred:
if request.meta.get("playwright"):
Expand Down
20 changes: 20 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from contextlib import asynccontextmanager

from scrapy.utils.test import get_crawler


@asynccontextmanager
async def make_handler(settings_dict: dict):
"""Convenience function to obtain an initialized handler and close it gracefully"""
from scrapy_playwright.handler import ScrapyPlaywrightDownloadHandler

crawler = get_crawler(settings_dict=settings_dict)
handler = ScrapyPlaywrightDownloadHandler(crawler=crawler)
try:
await handler._launch_browser()
except: # noqa (E722)
pass
else:
yield handler
finally:
await handler._close()
153 changes: 70 additions & 83 deletions tests/test_browser_contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@

import pytest
from scrapy import Spider, Request
from scrapy.utils.test import get_crawler

from scrapy_playwright.handler import ScrapyPlaywrightDownloadHandler

from tests import make_handler
from tests.mockserver import StaticMockServer


Expand All @@ -29,63 +27,56 @@ async def test_contexts_startup(self):
},
},
}
handler = ScrapyPlaywrightDownloadHandler(get_crawler(settings_dict=settings))
await handler._launch_browser()

with StaticMockServer() as server:
meta = {
"playwright": True,
"playwright_include_page": True,
"playwright_context": "first",
}
req = Request(server.urljoin("/index.html"), meta=meta)
resp = await handler._download_request(req, Spider("foo"))

page = resp.meta["playwright_page"]
storage_state = await page.context.storage_state()
cookie = storage_state["cookies"][0]
assert cookie["name"] == "foo"
assert cookie["value"] == "bar"
assert cookie["domain"] == "example.org"

await page.close()
await handler.browser.close()
async with make_handler(settings) as handler:
with StaticMockServer() as server:
meta = {
"playwright": True,
"playwright_include_page": True,
"playwright_context": "first",
}
req = Request(server.urljoin("/index.html"), meta=meta)
resp = await handler._download_request(req, Spider("foo"))

page = resp.meta["playwright_page"]
storage_state = await page.context.storage_state()
await page.context.close()
await page.close()
cookie = storage_state["cookies"][0]
assert cookie["name"] == "foo"
assert cookie["value"] == "bar"
assert cookie["domain"] == "example.org"

@pytest.mark.asyncio
async def test_contexts_dynamic(self):
settings = {"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}
handler = ScrapyPlaywrightDownloadHandler(get_crawler(settings_dict=settings))
await handler._launch_browser()

with StaticMockServer() as server:
meta = {
"playwright": True,
"playwright_include_page": True,
"playwright_context": "new",
"playwright_context_kwargs": {
"storage_state": {
"cookies": [
{
"url": "https://example.org",
"name": "asdf",
"value": "qwerty",
},
],
async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler:

with StaticMockServer() as server:
meta = {
"playwright": True,
"playwright_include_page": True,
"playwright_context": "new",
"playwright_context_kwargs": {
"storage_state": {
"cookies": [
{
"url": "https://example.org",
"name": "asdf",
"value": "qwerty",
},
],
},
},
},
}
req = Request(server.urljoin("/index.html"), meta=meta)
resp = await handler._download_request(req, Spider("foo"))

page = resp.meta["playwright_page"]
storage_state = await page.context.storage_state()
cookie = storage_state["cookies"][0]
assert cookie["name"] == "asdf"
assert cookie["value"] == "qwerty"
assert cookie["domain"] == "example.org"

await page.close()
await handler.browser.close()
}
req = Request(server.urljoin("/index.html"), meta=meta)
resp = await handler._download_request(req, Spider("foo"))

page = resp.meta["playwright_page"]
storage_state = await page.context.storage_state()
await page.close()
cookie = storage_state["cookies"][0]
assert cookie["name"] == "asdf"
assert cookie["value"] == "qwerty"
assert cookie["domain"] == "example.org"

@pytest.mark.asyncio
async def test_deprecated_setting(self):
Expand All @@ -104,33 +95,29 @@ async def test_deprecated_setting(self):
},
}
with warnings.catch_warnings(record=True) as warning_list:
handler = ScrapyPlaywrightDownloadHandler(get_crawler(settings_dict=settings))
await handler._launch_browser()

assert warning_list[0].category is DeprecationWarning
assert str(warning_list[0].message) == (
"The PLAYWRIGHT_CONTEXT_ARGS setting is deprecated, please use"
" PLAYWRIGHT_CONTEXTS instead. Keyword arguments defined in"
" PLAYWRIGHT_CONTEXT_ARGS will be used when creating the 'default' context"
)

with StaticMockServer() as server:
meta = {
"playwright": True,
"playwright_include_page": True,
}
req = Request(server.urljoin("/index.html"), meta=meta)
resp = await handler._download_request(req, Spider("foo"))

page = resp.meta["playwright_page"]
storage_state = await page.context.storage_state()
cookie = storage_state["cookies"][0]
assert cookie["name"] == "asdf"
assert cookie["value"] == "qwerty"
assert cookie["domain"] == "example.org"

await page.close()
await handler.browser.close()
async with make_handler(settings) as handler:
assert warning_list[0].category is DeprecationWarning
assert str(warning_list[0].message) == (
"The PLAYWRIGHT_CONTEXT_ARGS setting is deprecated, please use"
" PLAYWRIGHT_CONTEXTS instead. Keyword arguments defined in"
" PLAYWRIGHT_CONTEXT_ARGS will be used when creating the 'default' context"
)

with StaticMockServer() as server:
meta = {
"playwright": True,
"playwright_include_page": True,
}
req = Request(server.urljoin("/index.html"), meta=meta)
resp = await handler._download_request(req, Spider("foo"))

page = resp.meta["playwright_page"]
storage_state = await page.context.storage_state()
await page.close()
cookie = storage_state["cookies"][0]
assert cookie["name"] == "asdf"
assert cookie["value"] == "qwerty"
assert cookie["domain"] == "example.org"


class TestCaseMultipleContextsChromium(MixinTestCaseMultipleContexts):
Expand Down
Loading