Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -729,23 +729,3 @@ Deprecated features will be supported for at least six months
following the release that deprecated them. After that, they
may be removed at any time. See the [changelog](changelog.md)
for more information about deprecations and removals.

### Currently deprecated features

* `scrapy_playwright.headers.use_playwright_headers` function

Deprecated since
[`v0.0.16`](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.16),
set `PLAYWRIGHT_PROCESS_REQUEST_HEADERS=None` instead

* `scrapy_playwright.page.PageCoroutine` class

Deprecated since
[`v0.0.14`](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.14),
use `scrapy_playwright.page.PageMethod` instead

* `playwright_page_coroutines` Request meta key

Deprecated since
[`v0.0.14`](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.14),
use `playwright_page_methods` instead
6 changes: 6 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# scrapy-playwright changelog


### v0.0.22 (to be released) (2022-xx-xx)

* Remove deprecated code (`PageCoroutine` class, `playwright_page_coroutines` request meta key,
`use_playwright_headers` function).


### [v0.0.21](https://github.com/scrapy-plugins/scrapy-playwright/releases/tag/v0.0.21) (2022-08-08)

* Fixed TypeError exception when getting server IP address
Expand Down
23 changes: 1 addition & 22 deletions scrapy_playwright/handler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import asyncio
import logging
import warnings
from contextlib import suppress
from dataclasses import dataclass
from ipaddress import ip_address
Expand All @@ -20,7 +19,6 @@
from scrapy import Spider, signals
from scrapy.core.downloader.handlers.http import HTTPDownloadHandler
from scrapy.crawler import Crawler
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.http import Request, Response
from scrapy.http.headers import Headers
from scrapy.responsetypes import responsetypes
Expand All @@ -31,7 +29,7 @@
from twisted.internet.defer import Deferred, inlineCallbacks
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding

from scrapy_playwright.headers import use_scrapy_headers, use_playwright_headers
from scrapy_playwright.headers import use_scrapy_headers
from scrapy_playwright.page import PageMethod


Expand Down Expand Up @@ -96,15 +94,6 @@ def __init__(self, crawler: Crawler) -> None:
self.process_request_headers = load_object(
settings["PLAYWRIGHT_PROCESS_REQUEST_HEADERS"]
)
if self.process_request_headers is use_playwright_headers:
warnings.warn(
"The 'scrapy_playwright.headers.use_playwright_headers' function is"
" deprecated, please set 'PLAYWRIGHT_PROCESS_REQUEST_HEADERS=None'"
" instead.",
category=ScrapyDeprecationWarning,
stacklevel=1,
)
self.process_request_headers = None
else:
self.process_request_headers = use_scrapy_headers

Expand Down Expand Up @@ -331,16 +320,6 @@ async def _download_request_with_page(self, request: Request, page: Page) -> Res

async def _apply_page_methods(self, page: Page, request: Request) -> None:
page_methods = request.meta.get("playwright_page_methods") or ()

if not page_methods and "playwright_page_coroutines" in request.meta:
page_methods = request.meta["playwright_page_coroutines"]
warnings.warn(
"The 'playwright_page_coroutines' request meta key is deprecated,"
" please use 'playwright_page_methods' instead.",
category=ScrapyDeprecationWarning,
stacklevel=1,
)

if isinstance(page_methods, dict):
page_methods = page_methods.values()
for pm in page_methods:
Expand Down
17 changes: 0 additions & 17 deletions scrapy_playwright/headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@
This module includes functions to process request headers.
Refer to the PLAYWRIGHT_PROCESS_REQUEST_HEADERS setting for more information.
"""
import warnings
from urllib.parse import urlparse

from playwright.async_api import Request as PlaywrightRequest
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.http.headers import Headers


Expand Down Expand Up @@ -34,18 +32,3 @@ async def use_scrapy_headers(
if scrapy_headers_str.get("user-agent"):
playwright_headers["user-agent"] = scrapy_headers_str["user-agent"]
return playwright_headers


async def use_playwright_headers(
browser_type: str,
playwright_request: PlaywrightRequest,
scrapy_headers: Headers,
) -> dict:
warnings.warn(
"The 'scrapy_playwright.headers.use_playwright_headers' function is"
" deprecated, please set 'PLAYWRIGHT_PROCESS_REQUEST_HEADERS=None'"
" instead.",
category=ScrapyDeprecationWarning,
stacklevel=1,
)
return await playwright_request.all_headers()
19 changes: 0 additions & 19 deletions scrapy_playwright/page.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
import warnings

from scrapy.exceptions import ScrapyDeprecationWarning

__all__ = ["PageMethod"]


Expand All @@ -21,18 +17,3 @@ def __str__(self):
return f"<{self.__class__.__name__} for method '{self.method}'>"

__repr__ = __str__


class PageCoroutine(PageMethod):
def __init__(self, method: str, *args, **kwargs) -> None:
warnings.warn(
f"The {_qualname(self.__class__)} class is deprecated,"
f" please use {_qualname(PageMethod)} instead.",
category=ScrapyDeprecationWarning,
stacklevel=2,
)
super().__init__(method, *args, **kwargs)


def _qualname(cls: type) -> str:
return f"{cls.__module__}.{cls.__qualname__}"
54 changes: 1 addition & 53 deletions tests/test_headers.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,12 @@
import json
import platform
import sys
import warnings

import pytest
from scrapy import Spider, Request
from scrapy.http.headers import Headers

from tests import make_handler
from tests.mockserver import MockServer

from scrapy_playwright.headers import use_playwright_headers


@pytest.mark.skipif(sys.version_info < (3, 8), reason="AsyncMock was added on Python 3.8")
@pytest.mark.asyncio
async def test_use_playwright_headers_deprecated():
from unittest.mock import AsyncMock

headers = {"foo": "bar", "a": "b"}
playwright_request = AsyncMock()
playwright_request.all_headers.return_value = headers
with warnings.catch_warnings(record=True) as warning_list:
processed_headers = await use_playwright_headers("foobar", playwright_request, Headers({}))
assert processed_headers == headers
assert str(warning_list[0].message) == (
"The 'scrapy_playwright.headers.use_playwright_headers' function is"
" deprecated, please set 'PLAYWRIGHT_PROCESS_REQUEST_HEADERS=None'"
" instead."
)


class MixinProcessHeadersTestCase:
@pytest.mark.asyncio
Expand Down Expand Up @@ -63,7 +40,7 @@ async def test_user_agent(self):
assert headers["user-agent"] == "foobar"

@pytest.mark.asyncio
async def test_use_playwright_headers(self):
async def test_playwright_headers(self):
"""Ignore Scrapy headers"""
settings_dict = {
"PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
Expand All @@ -87,35 +64,6 @@ async def test_use_playwright_headers(self):
assert "asdf" not in req.headers
assert b"asdf" not in req.headers

@pytest.mark.asyncio
async def test_use_playwright_headers_deprecated(self):
"""Ignore Scrapy headers"""
settings_dict = {
"PLAYWRIGHT_BROWSER_TYPE": self.browser_type,
"PLAYWRIGHT_CONTEXTS": {"default": {"user_agent": self.browser_type}},
"PLAYWRIGHT_PROCESS_REQUEST_HEADERS": use_playwright_headers,
"PLAYWRIGHT_DEFAULT_NAVIGATION_TIMEOUT": 2000,
}
with warnings.catch_warnings(record=True) as warning_list:
async with make_handler(settings_dict) as handler:
with MockServer() as server:
req = Request(
url=server.urljoin("/headers"),
meta={"playwright": True},
headers={"User-Agent": "foobar", "Asdf": "qwerty"},
)
resp = await handler._download_request(req, Spider("foo"))
headers = json.loads(resp.css("pre::text").get())
headers = {key.lower(): value for key, value in headers.items()}
assert headers["user-agent"] == self.browser_type
assert "asdf" not in headers

assert str(warning_list[0].message) == (
"The 'scrapy_playwright.headers.use_playwright_headers' function is"
" deprecated, please set 'PLAYWRIGHT_PROCESS_REQUEST_HEADERS=None'"
" instead."
)

@pytest.mark.asyncio
async def test_use_custom_headers(self):
"""Custom header processing function"""
Expand Down
35 changes: 1 addition & 34 deletions tests/test_page_methods.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import logging
import platform
import warnings

import pytest
from scrapy import Spider, Request
from scrapy.http.response.html import HtmlResponse

from scrapy_playwright.page import PageMethod, PageCoroutine
from scrapy_playwright.page import PageMethod

from tests import make_handler
from tests.mockserver import StaticMockServer
Expand All @@ -22,16 +21,6 @@ async def test_page_methods():
assert str(screenshot) == "<PageMethod for method 'screenshot'>"


@pytest.mark.asyncio
async def test_deprecated_class():
with warnings.catch_warnings(record=True) as warning_list:
PageCoroutine("screenshot", "foo", 123, path="/tmp/file", type="png")
assert str(warning_list[0].message) == (
"The scrapy_playwright.page.PageCoroutine class is deprecated, "
"please use scrapy_playwright.page.PageMethod instead."
)


def assert_correct_response(response: HtmlResponse, request: Request) -> None:
assert isinstance(response, HtmlResponse)
assert response.request is request
Expand Down Expand Up @@ -93,28 +82,6 @@ async def test_page_mixed_page_methods(self, caplog):
assert not req.meta["playwright_page_methods"]["is_closed"].result
assert req.meta["playwright_page_methods"]["title"].result == "Awesome site"

@pytest.mark.asyncio
async def test_deprecated_request_meta_key(self, caplog):
async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler:
with StaticMockServer() as server:
req = Request(
url=server.urljoin("/index.html"),
meta={
"playwright": True,
"playwright_page_coroutines": [
PageMethod("is_closed"),
],
},
)
with warnings.catch_warnings(record=True) as warning_list:
resp = await handler._download_request(req, Spider("foo"))

assert_correct_response(resp, req)
assert str(warning_list[0].message) == (
"The 'playwright_page_coroutines' request meta key is deprecated,"
" please use 'playwright_page_methods' instead."
)


class TestPageMethodChromium(MixinPageMethodTestCase):
browser_type = "chromium"
Expand Down