scrapy · DeanDro · May 20, 2023 · May 20, 2023 · May 20, 2023 · May 20, 2023
diff --git a/scrapy/cmdline.py b/scrapy/cmdline.py
@@ -183,4 +183,4 @@ def _run_command_profiled(cmd, args, opts):
         # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect() on exit:
         # http://doc.pypy.org/en/latest/cpython_differences.html
         # ?highlight=gc.collect#differences-related-to-garbage-collection-strategies
-        garbage_collect()
+        garbage_collect()
diff --git a/scrapy/commands/check.py b/scrapy/commands/check.py
@@ -105,4 +105,4 @@ def run(self, args, opts):
 
                 result.printErrors()
                 result.printSummary(start, stop)
-                self.exitcode = int(not result.wasSuccessful())
+                self.exitcode = int(not result.wasSuccessful())
diff --git a/scrapy/crawler.py b/scrapy/crawler.py
@@ -400,4 +400,4 @@ def _stop_reactor(self, _=None):
         try:
             reactor.stop()
         except RuntimeError:  # raised if already stopped or in shutdown stage
-            pass
+            pass
diff --git a/scrapy/mail.py b/scrapy/mail.py
@@ -206,4 +206,4 @@ def _create_sender_factory(self, to_addrs, msg, d):
             **factory_keywords
         )
         factory.noisy = False
-        return factory
+        return factory
diff --git a/scrapy/statscollectors.py b/scrapy/statscollectors.py
@@ -6,6 +6,7 @@
 from typing import TYPE_CHECKING, Any, Dict, Optional
 
 from scrapy import Spider
+from scrapy.extensions.corestats import CoreStats
 
 if TYPE_CHECKING:
     from scrapy.crawler import Crawler
@@ -51,7 +52,8 @@ def clear_stats(self, spider: Optional[Spider] = None) -> None:
         self._stats.clear()
 
     def open_spider(self, spider: Spider) -> None:
-        pass
+        ext = spider.from_crawler(self.crawler)
+        ext.spider_opened(self.spider)
 
     def close_spider(self, spider: Spider, reason: str) -> None:
         if self._dump:
@@ -62,7 +64,7 @@ def close_spider(self, spider: Spider, reason: str) -> None:
         self._persist_stats(self._stats, spider)
 
     def _persist_stats(self, stats: StatsT, spider: Spider) -> None:
-        pass
+        spider.custom_settings = stats
 
 
 class MemoryStatsCollector(StatsCollector):
@@ -72,27 +74,3 @@ def __init__(self, crawler: "Crawler"):
 
     def _persist_stats(self, stats: StatsT, spider: Spider) -> None:
         self.spider_stats[spider.name] = stats
-
-
-class DummyStatsCollector(StatsCollector):
-    def get_value(
-        self, key: str, default: Any = None, spider: Optional[Spider] = None
-    ) -> Any:
-        return default
-
-    def set_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
-        pass
-
-    def set_stats(self, stats: StatsT, spider: Optional[Spider] = None) -> None:
-        pass
-
-    def inc_value(
-        self, key: str, count: int = 1, start: int = 0, spider: Optional[Spider] = None
-    ) -> None:
-        pass
-
-    def max_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
-        pass
-
-    def min_value(self, key: str, value: Any, spider: Optional[Spider] = None) -> None:
-        pass
diff --git a/tests/test_cmdline_crawl_with_pipeline/test_spider/__init__.py b/tests/test_cmdline_crawl_with_pipeline/test_spider/__init__.py
diff --git a/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/__init__.py b/tests/test_cmdline_crawl_with_pipeline/test_spider/spiders/__init__.py
diff --git a/tests/test_link.py b/tests/test_link.py
@@ -12,46 +12,130 @@ def _assert_different_links(self, link1, link2):
         self.assertNotEqual(link1, link2)
         self.assertNotEqual(hash(link1), hash(link2))
 
-    def test_eq_and_hash(self):
+    def test_eq_and_hash_1(self):
+        """
+        Tests if two instances of Link with the
+        same url recognize they have the same url
+        """
         l1 = Link("http://www.example.com")
-        l2 = Link("http://www.example.com/other")
-        l3 = Link("http://www.example.com")
 
         self._assert_same_links(l1, l1)
+
+    def test_eq_and_hash_2(self):
+        """
+        Tests if two instances of Link with different
+        url address recognize they are different even
+        if the initial part is the same.
+        """
+        l1 = Link("http://www.example.com")
+        l2 = Link("http://www.example.com/other")
+
         self._assert_different_links(l1, l2)
+
+    def test_eq_and_hash_3(self):
+        """
+        Checks if two instances of Link successufully
+        capture the address they are point two if it is
+        the same address.
+        """
+        l1 = Link("http://www.example.com")
+        l3 = Link("http://www.example.com")
+
         self._assert_same_links(l1, l3)
 
+    def test_eq_and_hash_4(self):
+        """
+        Tests if an instance of Link successfully recognizes
+        it points to the same url if it is compared with itself
+        """
+        l4 = Link("http://www.example.com", text="test")
+        self._assert_same_links(l4, l4)
+
+    def test_eq_and_hash_5(self):
+        """
+        Tests if two instances of Link that point to the same url
+        are not evaluating that they point to the same url if they
+        have different text variable values.
+        """
         l4 = Link("http://www.example.com", text="test")
         l5 = Link("http://www.example.com", text="test2")
-        l6 = Link("http://www.example.com", text="test")
 
-        self._assert_same_links(l4, l4)
         self._assert_different_links(l4, l5)
+
+    def test_eq_and_hash_6(self):
+        """
+        Tests if two instances of Link that point to the same url
+        are evaluated that they point to the same url if they
+        have the same text variable values.
+        """
+        l4 = Link("http://www.example.com", text="test")
+        l6 = Link("http://www.example.com", text="test")
         self._assert_same_links(l4, l6)
 
+    def test_eq_and_hash_7(self):
+        """
+        Tests if two instances of Link that point to the same url
+        are evaluated that they point to the same url if they
+        have the same text and fragment variable values and they both
+        have the nofollow option set to False.
+        """
         l7 = Link(
             "http://www.example.com", text="test", fragment="something", nofollow=False
         )
         l8 = Link(
             "http://www.example.com", text="test", fragment="something", nofollow=False
         )
+        self._assert_same_links(l7, l8)
+
+    def test_eq_and_hash_8(self):
+        """
+        Tests if two instances of Link that point to the same url
+        are evaluated as different if they point to the same url and
+        have the same text and fragment variable values but one is set
+        to have the nofollow option set to False while the other to True.
+        """
+        l7 = Link(
+            "http://www.example.com", text="test", fragment="something", nofollow=False
+        )
         l9 = Link(
             "http://www.example.com", text="test", fragment="something", nofollow=True
         )
+        self._assert_different_links(l7, l9)
+
+    def test_eq_and_hash_9(self):
+        """
+        Tests if two instances of Link that point to the same url
+        are evaluated as different if they point to the same url and
+        have the same text variable values and both haves set the
+        nofollow option to False but they have different fragment values.
+        """
+        l7 = Link(
+            "http://www.example.com", text="test", fragment="something", nofollow=False
+        )
         l10 = Link(
             "http://www.example.com", text="test", fragment="other", nofollow=False
         )
-        self._assert_same_links(l7, l8)
-        self._assert_different_links(l7, l9)
         self._assert_different_links(l7, l10)
 
     def test_repr(self):
+        """
+        Tests if the repr function successfully creates a similar copy of a Link
+        instance.
+        """
         l1 = Link(
             "http://www.example.com", text="test", fragment="something", nofollow=True
         )
         l2 = eval(repr(l1))
         self._assert_same_links(l1, l2)
 
     def test_bytes_url(self):
+        """
+        Tests if a wrong argument is passed in the initialization of
+        a Link instance successfully raises a TypeError.
+        """
         with self.assertRaises(TypeError):
             Link(b"http://www.example.com/\xc2\xa3")
+
+
+if __name__ == "__main__":
+    unittest.main()