Skip to content

Commit

Permalink
Merge pull request #4178 from elacuesta/remove_spider_make_requests_f…
Browse files Browse the repository at this point in the history
…rom_url
  • Loading branch information
wRAR committed Aug 20, 2021
2 parents bdf8355 + 91f8144 commit 731f2d3
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 58 deletions.
27 changes: 2 additions & 25 deletions scrapy/spiders/__init__.py
Expand Up @@ -4,14 +4,12 @@
See documentation in docs/topics/spiders.rst
"""
import logging
import warnings
from typing import Optional

from scrapy import signals
from scrapy.http import Request
from scrapy.utils.trackref import object_ref
from scrapy.utils.url import url_is_from_spider
from scrapy.utils.deprecate import method_is_overridden


class Spider(object_ref):
Expand Down Expand Up @@ -57,34 +55,13 @@ def _set_crawler(self, crawler):
crawler.signals.connect(self.close, signals.spider_closed)

def start_requests(self):
cls = self.__class__
if not self.start_urls and hasattr(self, 'start_url'):
raise AttributeError(
"Crawling could not start: 'start_urls' not found "
"or empty (but found 'start_url' attribute instead, "
"did you miss an 's'?)")
if method_is_overridden(cls, Spider, 'make_requests_from_url'):
warnings.warn(
"Spider.make_requests_from_url method is deprecated; it "
"won't be called in future Scrapy releases. Please "
"override Spider.start_requests method instead "
f"(see {cls.__module__}.{cls.__name__}).",
)
for url in self.start_urls:
yield self.make_requests_from_url(url)
else:
for url in self.start_urls:
yield Request(url, dont_filter=True)

def make_requests_from_url(self, url):
""" This method is deprecated. """
warnings.warn(
"Spider.make_requests_from_url method is deprecated: "
"it will be removed and not be called by the default "
"Spider.start_requests method in future Scrapy releases. "
"Please override Spider.start_requests method instead."
)
return Request(url, dont_filter=True)
for url in self.start_urls:
yield Request(url, dont_filter=True)

def _parse(self, response, **kwargs):
return self.parse(response, **kwargs)
Expand Down
33 changes: 0 additions & 33 deletions tests/test_spider.py
Expand Up @@ -584,39 +584,6 @@ def test_crawl_spider(self):
assert issubclass(CrawlSpider, Spider)
assert isinstance(CrawlSpider(name='foo'), Spider)

def test_make_requests_from_url_deprecated(self):
class MySpider4(Spider):
name = 'spider1'
start_urls = ['http://example.com']

class MySpider5(Spider):
name = 'spider2'
start_urls = ['http://example.com']

def make_requests_from_url(self, url):
return Request(url + "/foo", dont_filter=True)

with warnings.catch_warnings(record=True) as w:
# spider without overridden make_requests_from_url method
# doesn't issue a warning
spider1 = MySpider4()
self.assertEqual(len(list(spider1.start_requests())), 1)
self.assertEqual(len(w), 0)

# spider without overridden make_requests_from_url method
# should issue a warning when called directly
request = spider1.make_requests_from_url("http://www.example.com")
self.assertTrue(isinstance(request, Request))
self.assertEqual(len(w), 1)

# spider with overridden make_requests_from_url issues a warning,
# but the method still works
spider2 = MySpider5()
requests = list(spider2.start_requests())
self.assertEqual(len(requests), 1)
self.assertEqual(requests[0].url, 'http://example.com/foo')
self.assertEqual(len(w), 2)


class NoParseMethodSpiderTest(unittest.TestCase):

Expand Down

0 comments on commit 731f2d3

Please sign in to comment.