From 09d76de536397bf2ce55bbc8f63a6a9789372036 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Sat, 9 Oct 2021 20:51:02 -0300 Subject: [PATCH 1/6] Closing coroutine, simplify tests --- scrapy_playwright/handler.py | 9 +- tests/test_playwright_requests.py | 433 +++++++++++++----------------- 2 files changed, 194 insertions(+), 248 deletions(-) diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py index dfc94e1d..a9a7f0d4 100644 --- a/scrapy_playwright/handler.py +++ b/scrapy_playwright/handler.py @@ -115,12 +115,15 @@ async def _create_page(self, request: Request) -> Page: @inlineCallbacks def close(self) -> Deferred: yield super().close() + yield deferred_from_coro(self._close()) + + async def _close(self) -> None: for context in self.contexts.copy().values(): - yield deferred_from_coro(context.close()) + await context.close() if getattr(self, "browser", None): logger.info("Closing browser") - yield deferred_from_coro(self.browser.close()) - yield deferred_from_coro(self.playwright_context_manager.__aexit__()) + await self.browser.close() + await self.playwright_context_manager.__aexit__() def download_request(self, request: Request, spider: Spider) -> Deferred: if request.meta.get("playwright"): diff --git a/tests/test_playwright_requests.py b/tests/test_playwright_requests.py index 5ef7341e..41ef4d15 100644 --- a/tests/test_playwright_requests.py +++ b/tests/test_playwright_requests.py @@ -1,6 +1,7 @@ import logging import platform import subprocess +from contextlib import asynccontextmanager from tempfile import NamedTemporaryFile import pytest @@ -23,6 +24,18 @@ def get_mimetype(file): ).stdout.strip() +@asynccontextmanager +async def make_handler(settings_dict: dict): + """Convenience function to obtain an initialized handler and close it gracefully""" + try: + crawler = get_crawler(settings_dict=settings_dict) + handler = ScrapyPlaywrightDownloadHandler(crawler=crawler) + await handler._launch_browser() + yield handler + finally: + await handler._close() + + class DialogSpider(Spider): """A spider with a method to handle the "dialog" page event""" @@ -36,289 +49,221 @@ async def handle_dialog(self, dialog: Dialog) -> None: class MixinTestCase: @pytest.mark.asyncio async def test_basic_response(self): - handler = ScrapyPlaywrightDownloadHandler( - get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) - ) - await handler._launch_browser() - - with StaticMockServer() as server: - meta = {"playwright": True, "playwright_include_page": True} - req = Request(server.urljoin("/index.html"), meta=meta) - resp = await handler._download_request(req, Spider("foo")) - - assert isinstance(resp, HtmlResponse) - assert resp.request is req - assert resp.url == req.url - assert resp.status == 200 - assert "playwright" in resp.flags - assert resp.css("a::text").getall() == ["Lorem Ipsum", "Infinite Scroll"] - assert isinstance(resp.meta["playwright_page"], PlaywrightPage) - assert resp.meta["playwright_page"].url == resp.url - - await resp.meta["playwright_page"].close() - await handler.browser.close() + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + with StaticMockServer() as server: + meta = {"playwright": True, "playwright_include_page": True} + req = Request(server.urljoin("/index.html"), meta=meta) + resp = await handler._download_request(req, Spider("foo")) + + assert isinstance(resp, HtmlResponse) + assert resp.request is req + assert resp.url == req.url + assert resp.status == 200 + assert "playwright" in resp.flags + assert resp.css("a::text").getall() == ["Lorem Ipsum", "Infinite Scroll"] + assert isinstance(resp.meta["playwright_page"], PlaywrightPage) + assert resp.meta["playwright_page"].url == resp.url + + await resp.meta["playwright_page"].close() @pytest.mark.asyncio async def test_post_request(self): - handler = ScrapyPlaywrightDownloadHandler( - get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) - ) - await handler._launch_browser() - - with MockServer() as server: - req = FormRequest( - server.urljoin("/"), meta={"playwright": True}, formdata={"foo": "bar"} - ) - resp = await handler._download_request(req, Spider("foo")) - - assert resp.request is req - assert resp.url == req.url - assert resp.status == 200 - assert "playwright" in resp.flags - assert "Request body: foo=bar" in resp.text - - await handler.browser.close() + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + with MockServer() as server: + req = FormRequest( + server.urljoin("/"), meta={"playwright": True}, formdata={"foo": "bar"} + ) + resp = await handler._download_request(req, Spider("foo")) + + assert resp.request is req + assert resp.url == req.url + assert resp.status == 200 + assert "playwright" in resp.flags + assert "Request body: foo=bar" in resp.text @pytest.mark.asyncio async def test_page_coroutine_navigation(self): - handler = ScrapyPlaywrightDownloadHandler( - get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) - ) - await handler._launch_browser() - - with StaticMockServer() as server: - req = Request( - url=server.urljoin("/index.html"), - meta={ - "playwright": True, - "playwright_page_coroutines": [PageCoro("click", "a.lorem_ipsum")], - }, - ) - resp = await handler._download_request(req, Spider("foo")) - - assert isinstance(resp, HtmlResponse) - assert resp.request is req - assert resp.url == server.urljoin("/lorem_ipsum.html") - assert resp.status == 200 - assert "playwright" in resp.flags - assert resp.css("title::text").get() == "Lorem Ipsum" - text = resp.css("p::text").get() - assert text == "Lorem ipsum dolor sit amet, consectetur adipiscing elit." - - await handler.browser.close() + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + with StaticMockServer() as server: + req = Request( + url=server.urljoin("/index.html"), + meta={ + "playwright": True, + "playwright_page_coroutines": [PageCoro("click", "a.lorem_ipsum")], + }, + ) + resp = await handler._download_request(req, Spider("foo")) + + assert isinstance(resp, HtmlResponse) + assert resp.request is req + assert resp.url == server.urljoin("/lorem_ipsum.html") + assert resp.status == 200 + assert "playwright" in resp.flags + assert resp.css("title::text").get() == "Lorem Ipsum" + text = resp.css("p::text").get() + assert text == "Lorem ipsum dolor sit amet, consectetur adipiscing elit." @pytest.mark.asyncio async def test_page_coroutine_infinite_scroll(self): - handler = ScrapyPlaywrightDownloadHandler( - get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) - ) - await handler._launch_browser() + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + with StaticMockServer() as server: + req = Request( + url=server.urljoin("/scroll.html"), + headers={"User-Agent": "scrapy-playwright"}, + meta={ + "playwright": True, + "playwright_page_coroutines": [ + PageCoro("wait_for_selector", selector="div.quote"), + PageCoro("evaluate", "window.scrollBy(0, document.body.scrollHeight)"), + PageCoro("wait_for_selector", selector="div.quote:nth-child(11)"), + PageCoro("evaluate", "window.scrollBy(0, document.body.scrollHeight)"), + PageCoro("wait_for_selector", selector="div.quote:nth-child(21)"), + ], + }, + ) + resp = await handler._download_request(req, Spider("foo")) - with StaticMockServer() as server: - req = Request( - url=server.urljoin("/scroll.html"), - headers={"User-Agent": "scrapy-playwright"}, - meta={ - "playwright": True, - "playwright_page_coroutines": [ - PageCoro("wait_for_selector", selector="div.quote"), - PageCoro("evaluate", "window.scrollBy(0, document.body.scrollHeight)"), - PageCoro("wait_for_selector", selector="div.quote:nth-child(11)"), - PageCoro("evaluate", "window.scrollBy(0, document.body.scrollHeight)"), - PageCoro("wait_for_selector", selector="div.quote:nth-child(21)"), - ], - }, - ) - resp = await handler._download_request(req, Spider("foo")) - - assert isinstance(resp, HtmlResponse) - assert resp.request is req - assert resp.url == server.urljoin("/scroll.html") - assert resp.status == 200 - assert "playwright" in resp.flags - assert len(resp.css("div.quote")) == 30 - - await handler.browser.close() + assert isinstance(resp, HtmlResponse) + assert resp.request is req + assert resp.url == server.urljoin("/scroll.html") + assert resp.status == 200 + assert "playwright" in resp.flags + assert len(resp.css("div.quote")) == 30 @pytest.mark.asyncio async def test_timeout(self): - handler = ScrapyPlaywrightDownloadHandler( - get_crawler( - settings_dict={ - "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, - "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 1000, - } - ) - ) - await handler._launch_browser() - - with MockServer() as server: - req = Request(server.urljoin("/index.html"), meta={"playwright": True}) - with pytest.raises(TimeoutError): - await handler._download_request(req, Spider("foo")) - - await handler.browser.close() + settings_dict = { + "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, + "PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 1000, + } + async with make_handler(settings_dict) as handler: + with MockServer() as server: + req = Request(server.urljoin("/index.html"), meta={"playwright": True}) + with pytest.raises(TimeoutError): + await handler._download_request(req, Spider("foo")) @pytest.mark.asyncio async def test_context_kwargs(self): - handler = ScrapyPlaywrightDownloadHandler( - get_crawler( - settings_dict={ - "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, - "PLAYWRIGHT_CONTEXTS": { - "default": {"java_script_enabled": False}, + settings_dict = { + "PLAYWRIGHT_BROWSER_TYPE": self.browser_type, + "PLAYWRIGHT_CONTEXTS": { + "default": {"java_script_enabled": False}, + }, + } + async with make_handler(settings_dict) as handler: + with StaticMockServer() as server: + req = Request( + url=server.urljoin("/scroll.html"), + meta={ + "playwright": True, + "playwright_page_coroutines": [ + PageCoro("wait_for_selector", selector="div.quote", timeout=1000), + ], }, - } - ) - ) - await handler._launch_browser() - - with StaticMockServer() as server: - req = Request( - url=server.urljoin("/scroll.html"), - meta={ - "playwright": True, - "playwright_page_coroutines": [ - PageCoro("wait_for_selector", selector="div.quote", timeout=1000), - ], - }, - ) - with pytest.raises(TimeoutError): - await handler._download_request(req, Spider("foo")) - - await handler.browser.close() + ) + with pytest.raises(TimeoutError): + await handler._download_request(req, Spider("foo")) @pytest.mark.asyncio async def test_page_coroutine_screenshot(self): - png_file = NamedTemporaryFile(mode="w+b") - handler = ScrapyPlaywrightDownloadHandler( - get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) - ) - await handler._launch_browser() - - with StaticMockServer() as server: - req = Request( - url=server.urljoin("/index.html"), - meta={ - "playwright": True, - "playwright_page_coroutines": { - "png": PageCoro("screenshot", path=png_file.name, type="png"), - }, - }, - ) - await handler._download_request(req, Spider("foo")) - - assert get_mimetype(png_file) == "image/png" - - png_file.file.seek(0) - assert png_file.file.read() == req.meta["playwright_page_coroutines"]["png"].result - - png_file.close() - - await handler.browser.close() + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + with NamedTemporaryFile(mode="w+b") as png_file: + with StaticMockServer() as server: + req = Request( + url=server.urljoin("/index.html"), + meta={ + "playwright": True, + "playwright_page_coroutines": { + "png": PageCoro("screenshot", path=png_file.name, type="png"), + }, + }, + ) + await handler._download_request(req, Spider("foo")) + + png_file.file.seek(0) + assert png_file.file.read() == req.meta["playwright_page_coroutines"]["png"].result + assert get_mimetype(png_file) == "image/png" @pytest.mark.asyncio async def test_page_coroutine_pdf(self): if self.browser_type != "chromium": pytest.skip("PDF generation is supported only in Chromium") - pdf_file = NamedTemporaryFile(mode="w+b") - handler = ScrapyPlaywrightDownloadHandler( - get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) - ) - await handler._launch_browser() - - with StaticMockServer() as server: - req = Request( - url=server.urljoin("/index.html"), - meta={ - "playwright": True, - "playwright_page_coroutines": { - "pdf": PageCoro("pdf", path=pdf_file.name), - }, - }, - ) - await handler._download_request(req, Spider("foo")) - - assert get_mimetype(pdf_file) == "application/pdf" - - pdf_file.file.seek(0) - assert pdf_file.file.read() == req.meta["playwright_page_coroutines"]["pdf"].result - - pdf_file.close() - - await handler.browser.close() + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + with NamedTemporaryFile(mode="w+b") as pdf_file: + with StaticMockServer() as server: + req = Request( + url=server.urljoin("/index.html"), + meta={ + "playwright": True, + "playwright_page_coroutines": { + "pdf": PageCoro("pdf", path=pdf_file.name), + }, + }, + ) + await handler._download_request(req, Spider("foo")) + + pdf_file.file.seek(0) + assert pdf_file.file.read() == req.meta["playwright_page_coroutines"]["pdf"].result + assert get_mimetype(pdf_file) == "application/pdf" @pytest.mark.asyncio async def test_event_handler_dialog_callable(self): - crawler = get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) - handler = ScrapyPlaywrightDownloadHandler(crawler) - await handler._launch_browser() - - with StaticMockServer() as server: - spider = DialogSpider() - req = Request( - url=server.urljoin("/index.html"), - meta={ - "playwright": True, - "playwright_page_coroutines": [ - PageCoro("evaluate", "alert('foobar');"), - ], - "playwright_page_event_handlers": { - "dialog": spider.handle_dialog, + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + with StaticMockServer() as server: + spider = DialogSpider() + req = Request( + url=server.urljoin("/index.html"), + meta={ + "playwright": True, + "playwright_page_coroutines": [ + PageCoro("evaluate", "alert('foobar');"), + ], + "playwright_page_event_handlers": { + "dialog": spider.handle_dialog, + }, }, - }, - ) - await handler._download_request(req, spider) - - assert spider.dialog_message == "foobar" + ) + await handler._download_request(req, spider) - await handler.browser.close() + assert spider.dialog_message == "foobar" @pytest.mark.asyncio async def test_event_handler_dialog_str(self): - crawler = get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) - handler = ScrapyPlaywrightDownloadHandler(crawler) - await handler._launch_browser() - - with StaticMockServer() as server: - spider = DialogSpider() - req = Request( - url=server.urljoin("/index.html"), - meta={ - "playwright": True, - "playwright_page_coroutines": [ - PageCoro("evaluate", "alert('foobar');"), - ], - "playwright_page_event_handlers": { - "dialog": "handle_dialog", + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + with StaticMockServer() as server: + spider = DialogSpider() + req = Request( + url=server.urljoin("/index.html"), + meta={ + "playwright": True, + "playwright_page_coroutines": [ + PageCoro("evaluate", "alert('foobar');"), + ], + "playwright_page_event_handlers": { + "dialog": "handle_dialog", + }, }, - }, - ) - await handler._download_request(req, spider) - - assert spider.dialog_message == "foobar" + ) + await handler._download_request(req, spider) - await handler.browser.close() + assert spider.dialog_message == "foobar" @pytest.mark.asyncio async def test_event_handler_dialog_missing(self, caplog): - crawler = get_crawler(settings_dict={"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) - handler = ScrapyPlaywrightDownloadHandler(crawler) - await handler._launch_browser() - - with StaticMockServer() as server: - spider = DialogSpider() - req = Request( - url=server.urljoin("/index.html"), - meta={ - "playwright": True, - "playwright_page_event_handlers": { - "dialog": "missing_method", + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + with StaticMockServer() as server: + spider = DialogSpider() + req = Request( + url=server.urljoin("/index.html"), + meta={ + "playwright": True, + "playwright_page_event_handlers": { + "dialog": "missing_method", + }, }, - }, - ) - await handler._download_request(req, spider) + ) + await handler._download_request(req, spider) assert ( "scrapy-playwright", @@ -328,8 +273,6 @@ async def test_event_handler_dialog_missing(self, caplog): ) in caplog.record_tuples assert getattr(spider, "dialog_message", None) is None - await handler.browser.close() - class TestCaseChromium(MixinTestCase): browser_type = "chromium" From 1f8aa5388a7e1cda11e03f41741a7bb372d4602c Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Sat, 9 Oct 2021 21:05:21 -0300 Subject: [PATCH 2/6] Simplify more tests --- tests/__init__.py | 17 ++++ tests/test_browser_contexts.py | 152 ++++++++++++++---------------- tests/test_playwright_requests.py | 16 +--- 3 files changed, 87 insertions(+), 98 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index e69de29b..7ee7936c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,17 @@ +from contextlib import asynccontextmanager + +from scrapy.utils.test import get_crawler + + +@asynccontextmanager +async def make_handler(settings_dict: dict): + """Convenience function to obtain an initialized handler and close it gracefully""" + from scrapy_playwright.handler import ScrapyPlaywrightDownloadHandler + + try: + crawler = get_crawler(settings_dict=settings_dict) + handler = ScrapyPlaywrightDownloadHandler(crawler=crawler) + await handler._launch_browser() + yield handler + finally: + await handler._close() diff --git a/tests/test_browser_contexts.py b/tests/test_browser_contexts.py index ebca42d9..870d9f3a 100644 --- a/tests/test_browser_contexts.py +++ b/tests/test_browser_contexts.py @@ -3,10 +3,8 @@ import pytest from scrapy import Spider, Request -from scrapy.utils.test import get_crawler - -from scrapy_playwright.handler import ScrapyPlaywrightDownloadHandler +from tests import make_handler from tests.mockserver import StaticMockServer @@ -29,63 +27,55 @@ async def test_contexts_startup(self): }, }, } - handler = ScrapyPlaywrightDownloadHandler(get_crawler(settings_dict=settings)) - await handler._launch_browser() - - with StaticMockServer() as server: - meta = { - "playwright": True, - "playwright_include_page": True, - "playwright_context": "first", - } - req = Request(server.urljoin("/index.html"), meta=meta) - resp = await handler._download_request(req, Spider("foo")) - - page = resp.meta["playwright_page"] - storage_state = await page.context.storage_state() - cookie = storage_state["cookies"][0] - assert cookie["name"] == "foo" - assert cookie["value"] == "bar" - assert cookie["domain"] == "example.org" - - await page.close() - await handler.browser.close() + async with make_handler(settings) as handler: + with StaticMockServer() as server: + meta = { + "playwright": True, + "playwright_include_page": True, + "playwright_context": "first", + } + req = Request(server.urljoin("/index.html"), meta=meta) + resp = await handler._download_request(req, Spider("foo")) + + page = resp.meta["playwright_page"] + storage_state = await page.context.storage_state() + await page.close() + cookie = storage_state["cookies"][0] + assert cookie["name"] == "foo" + assert cookie["value"] == "bar" + assert cookie["domain"] == "example.org" @pytest.mark.asyncio async def test_contexts_dynamic(self): - settings = {"PLAYWRIGHT_BROWSER_TYPE": self.browser_type} - handler = ScrapyPlaywrightDownloadHandler(get_crawler(settings_dict=settings)) - await handler._launch_browser() - - with StaticMockServer() as server: - meta = { - "playwright": True, - "playwright_include_page": True, - "playwright_context": "new", - "playwright_context_kwargs": { - "storage_state": { - "cookies": [ - { - "url": "https://example.org", - "name": "asdf", - "value": "qwerty", - }, - ], + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + + with StaticMockServer() as server: + meta = { + "playwright": True, + "playwright_include_page": True, + "playwright_context": "new", + "playwright_context_kwargs": { + "storage_state": { + "cookies": [ + { + "url": "https://example.org", + "name": "asdf", + "value": "qwerty", + }, + ], + }, }, - }, - } - req = Request(server.urljoin("/index.html"), meta=meta) - resp = await handler._download_request(req, Spider("foo")) - - page = resp.meta["playwright_page"] - storage_state = await page.context.storage_state() - cookie = storage_state["cookies"][0] - assert cookie["name"] == "asdf" - assert cookie["value"] == "qwerty" - assert cookie["domain"] == "example.org" - - await page.close() - await handler.browser.close() + } + req = Request(server.urljoin("/index.html"), meta=meta) + resp = await handler._download_request(req, Spider("foo")) + + page = resp.meta["playwright_page"] + storage_state = await page.context.storage_state() + await page.close() + cookie = storage_state["cookies"][0] + assert cookie["name"] == "asdf" + assert cookie["value"] == "qwerty" + assert cookie["domain"] == "example.org" @pytest.mark.asyncio async def test_deprecated_setting(self): @@ -104,33 +94,29 @@ async def test_deprecated_setting(self): }, } with warnings.catch_warnings(record=True) as warning_list: - handler = ScrapyPlaywrightDownloadHandler(get_crawler(settings_dict=settings)) - await handler._launch_browser() - - assert warning_list[0].category is DeprecationWarning - assert str(warning_list[0].message) == ( - "The PLAYWRIGHT_CONTEXT_ARGS setting is deprecated, please use" - " PLAYWRIGHT_CONTEXTS instead. Keyword arguments defined in" - " PLAYWRIGHT_CONTEXT_ARGS will be used when creating the 'default' context" - ) - - with StaticMockServer() as server: - meta = { - "playwright": True, - "playwright_include_page": True, - } - req = Request(server.urljoin("/index.html"), meta=meta) - resp = await handler._download_request(req, Spider("foo")) - - page = resp.meta["playwright_page"] - storage_state = await page.context.storage_state() - cookie = storage_state["cookies"][0] - assert cookie["name"] == "asdf" - assert cookie["value"] == "qwerty" - assert cookie["domain"] == "example.org" - - await page.close() - await handler.browser.close() + async with make_handler(settings) as handler: + assert warning_list[0].category is DeprecationWarning + assert str(warning_list[0].message) == ( + "The PLAYWRIGHT_CONTEXT_ARGS setting is deprecated, please use" + " PLAYWRIGHT_CONTEXTS instead. Keyword arguments defined in" + " PLAYWRIGHT_CONTEXT_ARGS will be used when creating the 'default' context" + ) + + with StaticMockServer() as server: + meta = { + "playwright": True, + "playwright_include_page": True, + } + req = Request(server.urljoin("/index.html"), meta=meta) + resp = await handler._download_request(req, Spider("foo")) + + page = resp.meta["playwright_page"] + storage_state = await page.context.storage_state() + await page.close() + cookie = storage_state["cookies"][0] + assert cookie["name"] == "asdf" + assert cookie["value"] == "qwerty" + assert cookie["domain"] == "example.org" class TestCaseMultipleContextsChromium(MixinTestCaseMultipleContexts): diff --git a/tests/test_playwright_requests.py b/tests/test_playwright_requests.py index 41ef4d15..c63c5cec 100644 --- a/tests/test_playwright_requests.py +++ b/tests/test_playwright_requests.py @@ -1,18 +1,16 @@ import logging import platform import subprocess -from contextlib import asynccontextmanager from tempfile import NamedTemporaryFile import pytest from playwright.async_api import Dialog, Page as PlaywrightPage, TimeoutError from scrapy import Spider, Request, FormRequest from scrapy.http.response.html import HtmlResponse -from scrapy.utils.test import get_crawler -from scrapy_playwright.handler import ScrapyPlaywrightDownloadHandler from scrapy_playwright.page import PageCoroutine as PageCoro +from tests import make_handler from tests.mockserver import MockServer, StaticMockServer @@ -24,18 +22,6 @@ def get_mimetype(file): ).stdout.strip() -@asynccontextmanager -async def make_handler(settings_dict: dict): - """Convenience function to obtain an initialized handler and close it gracefully""" - try: - crawler = get_crawler(settings_dict=settings_dict) - handler = ScrapyPlaywrightDownloadHandler(crawler=crawler) - await handler._launch_browser() - yield handler - finally: - await handler._close() - - class DialogSpider(Spider): """A spider with a method to handle the "dialog" page event""" From e9d0059b39227f0cdd8f5d31fd7b4e4347a76558 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Sat, 9 Oct 2021 21:11:03 -0300 Subject: [PATCH 3/6] Clear contexts dict when closing the handler (they are closed with the browser) --- scrapy_playwright/handler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py index a9a7f0d4..6785f34c 100644 --- a/scrapy_playwright/handler.py +++ b/scrapy_playwright/handler.py @@ -118,8 +118,7 @@ def close(self) -> Deferred: yield deferred_from_coro(self._close()) async def _close(self) -> None: - for context in self.contexts.copy().values(): - await context.close() + self.contexts.clear() if getattr(self, "browser", None): logger.info("Closing browser") await self.browser.close() From f8692608fd7039aae1fd2df48750c4491113266b Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Sat, 9 Oct 2021 21:54:05 -0300 Subject: [PATCH 4/6] Restore test coverage --- tests/test_browser_contexts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_browser_contexts.py b/tests/test_browser_contexts.py index 870d9f3a..07bcaacf 100644 --- a/tests/test_browser_contexts.py +++ b/tests/test_browser_contexts.py @@ -39,6 +39,7 @@ async def test_contexts_startup(self): page = resp.meta["playwright_page"] storage_state = await page.context.storage_state() + await page.context.close() await page.close() cookie = storage_state["cookies"][0] assert cookie["name"] == "foo" From 1cee1d14a277b22a467228db17c1a9bc09f1c771 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta <1731933+elacuesta@users.noreply.github.com> Date: Sun, 10 Oct 2021 11:13:16 -0300 Subject: [PATCH 5/6] Update tests/__init__.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrián Chaves --- tests/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 7ee7936c..c9d67922 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -8,10 +8,11 @@ async def make_handler(settings_dict: dict): """Convenience function to obtain an initialized handler and close it gracefully""" from scrapy_playwright.handler import ScrapyPlaywrightDownloadHandler + crawler = get_crawler(settings_dict=settings_dict) + handler = ScrapyPlaywrightDownloadHandler(crawler=crawler) try: - crawler = get_crawler(settings_dict=settings_dict) - handler = ScrapyPlaywrightDownloadHandler(crawler=crawler) await handler._launch_browser() + else: yield handler finally: await handler._close() From ee0f5a6bcd62fa3ff6a3167d9f5fc7fdfdc436b6 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Sun, 10 Oct 2021 11:21:43 -0300 Subject: [PATCH 6/6] Fix SyntaxError --- tests/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/__init__.py b/tests/__init__.py index c9d67922..66b35fd1 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -12,6 +12,8 @@ async def make_handler(settings_dict: dict): handler = ScrapyPlaywrightDownloadHandler(crawler=crawler) try: await handler._launch_browser() + except: # noqa (E722) + pass else: yield handler finally: