Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addressing issues identified in issue #5944 #5945

Closed
wants to merge 11 commits into from
2 changes: 1 addition & 1 deletion scrapy/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,4 +183,4 @@ def _run_command_profiled(cmd, args, opts):
# Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect() on exit:
# http://doc.pypy.org/en/latest/cpython_differences.html
# ?highlight=gc.collect#differences-related-to-garbage-collection-strategies
garbage_collect()
garbage_collect()
2 changes: 1 addition & 1 deletion scrapy/commands/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,4 @@ def run(self, args, opts):

result.printErrors()
result.printSummary(start, stop)
self.exitcode = int(not result.wasSuccessful())
self.exitcode = int(not result.wasSuccessful())
2 changes: 1 addition & 1 deletion scrapy/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,4 +400,4 @@ def _stop_reactor(self, _=None):
try:
reactor.stop()
except RuntimeError: # raised if already stopped or in shutdown stage
pass
pass
2 changes: 1 addition & 1 deletion scrapy/mail.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,4 +206,4 @@ def _create_sender_factory(self, to_addrs, msg, d):
**factory_keywords
)
factory.noisy = False
return factory
return factory
30 changes: 4 additions & 26 deletions scrapy/statscollectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import TYPE_CHECKING, Any, Dict, Optional

from scrapy import Spider
from scrapy.extensions.corestats import CoreStats

if TYPE_CHECKING:
from scrapy.crawler import Crawler
Expand Down Expand Up @@ -51,7 +52,8 @@ def clear_stats(self, spider: Optional[Spider] = None) -> None:
self._stats.clear()

def open_spider(self, spider: Spider) -> None:
pass
ext = spider.from_crawler(self.crawler)
ext.spider_opened(self.spider)

def close_spider(self, spider: Spider, reason: str) -> None:
if self._dump:
Expand All @@ -62,7 +64,7 @@ def close_spider(self, spider: Spider, reason: str) -> None:
self._persist_stats(self._stats, spider)

def _persist_stats(self, stats: StatsT, spider: Spider) -> None:
pass
spider.custom_settings = stats


class MemoryStatsCollector(StatsCollector):
Expand All @@ -72,27 +74,3 @@ def __init__(self, crawler: "Crawler"):

def _persist_stats(self, stats: StatsT, spider: Spider) -> None:
self.spider_stats[spider.name] = stats


class DummyStatsCollector(StatsCollector):
def get_value(
self, key: str, default: Any = None, spider: Optional[Spider] = None
) -> Any:
return default

def set_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
pass

def set_stats(self, stats: StatsT, spider: Optional[Spider] = None) -> None:
pass

def inc_value(
self, key: str, count: int = 1, start: int = 0, spider: Optional[Spider] = None
) -> None:
pass

def max_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
pass

def min_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
pass
Empty file.
Empty file.
98 changes: 91 additions & 7 deletions tests/test_link.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,46 +12,130 @@ def _assert_different_links(self, link1, link2):
self.assertNotEqual(link1, link2)
self.assertNotEqual(hash(link1), hash(link2))

def test_eq_and_hash(self):
def test_eq_and_hash_1(self):
"""
Tests if two instances of Link with the
same url recognize they have the same url
"""
l1 = Link("http://www.example.com")
l2 = Link("http://www.example.com/other")
l3 = Link("http://www.example.com")

self._assert_same_links(l1, l1)

def test_eq_and_hash_2(self):
"""
Tests if two instances of Link with different
url address recognize they are different even
if the initial part is the same.
"""
l1 = Link("http://www.example.com")
l2 = Link("http://www.example.com/other")

self._assert_different_links(l1, l2)

def test_eq_and_hash_3(self):
"""
Checks if two instances of Link successufully
capture the address they are point two if it is
the same address.
"""
l1 = Link("http://www.example.com")
l3 = Link("http://www.example.com")

self._assert_same_links(l1, l3)

def test_eq_and_hash_4(self):
"""
Tests if an instance of Link successfully recognizes
it points to the same url if it is compared with itself
"""
l4 = Link("http://www.example.com", text="test")
self._assert_same_links(l4, l4)

def test_eq_and_hash_5(self):
"""
Tests if two instances of Link that point to the same url
are not evaluating that they point to the same url if they
have different text variable values.
"""
l4 = Link("http://www.example.com", text="test")
l5 = Link("http://www.example.com", text="test2")
l6 = Link("http://www.example.com", text="test")

self._assert_same_links(l4, l4)
self._assert_different_links(l4, l5)

def test_eq_and_hash_6(self):
"""
Tests if two instances of Link that point to the same url
are evaluated that they point to the same url if they
have the same text variable values.
"""
l4 = Link("http://www.example.com", text="test")
l6 = Link("http://www.example.com", text="test")
self._assert_same_links(l4, l6)

def test_eq_and_hash_7(self):
"""
Tests if two instances of Link that point to the same url
are evaluated that they point to the same url if they
have the same text and fragment variable values and they both
have the nofollow option set to False.
"""
l7 = Link(
"http://www.example.com", text="test", fragment="something", nofollow=False
)
l8 = Link(
"http://www.example.com", text="test", fragment="something", nofollow=False
)
self._assert_same_links(l7, l8)

def test_eq_and_hash_8(self):
"""
Tests if two instances of Link that point to the same url
are evaluated as different if they point to the same url and
have the same text and fragment variable values but one is set
to have the nofollow option set to False while the other to True.
"""
l7 = Link(
"http://www.example.com", text="test", fragment="something", nofollow=False
)
l9 = Link(
"http://www.example.com", text="test", fragment="something", nofollow=True
)
self._assert_different_links(l7, l9)

def test_eq_and_hash_9(self):
"""
Tests if two instances of Link that point to the same url
are evaluated as different if they point to the same url and
have the same text variable values and both haves set the
nofollow option to False but they have different fragment values.
"""
l7 = Link(
"http://www.example.com", text="test", fragment="something", nofollow=False
)
l10 = Link(
"http://www.example.com", text="test", fragment="other", nofollow=False
)
self._assert_same_links(l7, l8)
self._assert_different_links(l7, l9)
self._assert_different_links(l7, l10)

def test_repr(self):
"""
Tests if the repr function successfully creates a similar copy of a Link
instance.
"""
l1 = Link(
"http://www.example.com", text="test", fragment="something", nofollow=True
)
l2 = eval(repr(l1))
self._assert_same_links(l1, l2)

def test_bytes_url(self):
"""
Tests if a wrong argument is passed in the initialization of
a Link instance successfully raises a TypeError.
"""
with self.assertRaises(TypeError):
Link(b"http://www.example.com/\xc2\xa3")


if __name__ == "__main__":
unittest.main()