From fe2979edf39ba583032a6af514bb7aad805f47ce Mon Sep 17 00:00:00 2001 From: Georgiy Zatserklianyi Date: Fri, 9 Feb 2024 13:57:21 +0200 Subject: [PATCH 1/6] cookiejars exposed --- scrapy/downloadermiddlewares/cookies.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scrapy/downloadermiddlewares/cookies.py b/scrapy/downloadermiddlewares/cookies.py index 85781efd6a1..1f920fa2e94 100644 --- a/scrapy/downloadermiddlewares/cookies.py +++ b/scrapy/downloadermiddlewares/cookies.py @@ -16,7 +16,7 @@ from tldextract import TLDExtract -from scrapy import Request, Spider +from scrapy import Request, Spider, signals from scrapy.crawler import Crawler from scrapy.exceptions import NotConfigured from scrapy.http import Response @@ -51,7 +51,13 @@ def __init__(self, debug: bool = False): def from_crawler(cls, crawler: Crawler) -> Self: if not crawler.settings.getbool("COOKIES_ENABLED"): raise NotConfigured - return cls(crawler.settings.getbool("COOKIES_DEBUG")) + o = cls(crawler.settings.getbool("COOKIES_DEBUG")) + if crawler.settings.getbool("COOKIES_EXPOSE_JARS"): + crawler.signals.connect(o.spider_opened, signal=signals.spider_opened) + return o + + def spider_opened(self, spider: Spider) -> None: + spider.jars = self.jars def _process_cookies( self, cookies: Iterable[Cookie], *, jar: CookieJar, request: Request From b8f89608a6a626eb6e98d6deebf737fd2b09f0c3 Mon Sep 17 00:00:00 2001 From: Georgiy Zatserklianyi Date: Fri, 23 Feb 2024 11:22:46 +0100 Subject: [PATCH 2/6] cookiejars exposed --- scrapy/downloadermiddlewares/cookies.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scrapy/downloadermiddlewares/cookies.py b/scrapy/downloadermiddlewares/cookies.py index 1f920fa2e94..cc70fe36668 100644 --- a/scrapy/downloadermiddlewares/cookies.py +++ b/scrapy/downloadermiddlewares/cookies.py @@ -52,12 +52,11 @@ def from_crawler(cls, crawler: Crawler) -> Self: if not crawler.settings.getbool("COOKIES_ENABLED"): raise NotConfigured o = cls(crawler.settings.getbool("COOKIES_DEBUG")) - if crawler.settings.getbool("COOKIES_EXPOSE_JARS"): - crawler.signals.connect(o.spider_opened, signal=signals.spider_opened) + crawler.signals.connect(o.spider_opened, signal=signals.spider_opened) return o def spider_opened(self, spider: Spider) -> None: - spider.jars = self.jars + spider.cookie_jars = self.jars def _process_cookies( self, cookies: Iterable[Cookie], *, jar: CookieJar, request: Request From 11022ac4d48cbbf244bf0e7763b61089b2a3a7f6 Mon Sep 17 00:00:00 2001 From: Georgiy Zatserklianyi Date: Fri, 7 Jun 2024 14:48:30 +0200 Subject: [PATCH 3/6] cookiejars exposed, docs added --- docs/faq.rst | 2 +- docs/topics/downloader-middleware.rst | 46 +++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/docs/faq.rst b/docs/faq.rst index 2113b096435..273d4bfcc83 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -307,7 +307,7 @@ Does Scrapy manage cookies automatically? Yes, Scrapy receives and keeps track of cookies sent by servers, and sends them back on subsequent requests, like any regular web browser does. -For more info see :ref:`topics-request-response` and :ref:`cookies-mw`. +For more info see :ref:`topics-request-response` and :ref:`cookiejars`. How can I see the cookies being sent and received from Scrapy? -------------------------------------------------------------- diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst index 1abbc49684f..4940f914a97 100644 --- a/docs/topics/downloader-middleware.rst +++ b/docs/topics/downloader-middleware.rst @@ -293,6 +293,52 @@ Here's an example of a log with :setting:`COOKIES_DEBUG` enabled:: [...] +.. _cookiejars: + +Direct access to cookiejars from spider +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +On some cases it is required to directly set specific values to existing cookiejar + +.. code-block:: python + + import scrapy + from scrapy.crawler import CrawlerProcess + + + class Quotes(scrapy.Spider): + name = "quotes" + custom_settings = {"DOWNLOAD_DELAY": 1} + + def start_requests(self): + yield scrapy.Request( + url="https://quotes.toscrape.com/login", callback=self.login + ) + + def login(self, response): + self.logger.info(self.cookie_jars[None]) # scrapy.http.cookies.CookieJar object + self.logger.info(self.cookie_jars[None].jar) # http.cookiejar object + + locale_cookie = ( + self.cookie_jars[None]._cookies["quotes.toscrape.com"]["/"].get("session") + ) + locale_cookie.value = locale_cookie.value.upper() + self.logger.info(self.cookie_jars[None].jar) + + + if __name__ == "__main__": + p = CrawlerProcess() + p.crawl(Quotes) + p.start() + +Log output:: + + 2024-02-23 10:51:27 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) + 2024-02-23 10:51:27 [quotes] INFO: + 2024-02-23 10:51:27 [quotes] INFO: ]> + 2024-02-23 10:51:27 [quotes] INFO: ]> + 2024-02-23 10:51:27 [scrapy.core.engine] INFO: Closing spider (finished) + + DefaultHeadersMiddleware ------------------------ From d8fb9d2e7ecd7117e56f27c1f15bf861fe68c15d Mon Sep 17 00:00:00 2001 From: Georgiy Zatserklianyi Date: Fri, 7 Jun 2024 15:08:44 +0200 Subject: [PATCH 4/6] cookiejars exposed, docs added --- docs/faq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/faq.rst b/docs/faq.rst index 273d4bfcc83..100e850b0a2 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -307,7 +307,7 @@ Does Scrapy manage cookies automatically? Yes, Scrapy receives and keeps track of cookies sent by servers, and sends them back on subsequent requests, like any regular web browser does. -For more info see :ref:`topics-request-response` and :ref:`cookiejars`. +For more info see :ref:`topics-request-response` , :ref:`cookies-mw` and :ref:`cookiejars`. How can I see the cookies being sent and received from Scrapy? -------------------------------------------------------------- From 53db741532d7a117a999e71860532abc99dc88e5 Mon Sep 17 00:00:00 2001 From: Georgiy Zatserklianyi Date: Fri, 21 Jun 2024 10:10:58 +0200 Subject: [PATCH 5/6] Update docs/faq.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrián Chaves --- docs/faq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/faq.rst b/docs/faq.rst index 100e850b0a2..55a81761cfa 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -307,7 +307,7 @@ Does Scrapy manage cookies automatically? Yes, Scrapy receives and keeps track of cookies sent by servers, and sends them back on subsequent requests, like any regular web browser does. -For more info see :ref:`topics-request-response` , :ref:`cookies-mw` and :ref:`cookiejars`. +For more info see :ref:`topics-request-response`, :ref:`cookies-mw` and :ref:`cookiejars`. How can I see the cookies being sent and received from Scrapy? -------------------------------------------------------------- From 2f457179e8bbe57493fb88197e9b4a5ee9504f5a Mon Sep 17 00:00:00 2001 From: Georgiy Zatserklianyi Date: Fri, 21 Jun 2024 10:11:30 +0200 Subject: [PATCH 6/6] Update docs/topics/downloader-middleware.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adrián Chaves --- docs/topics/downloader-middleware.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst index 4940f914a97..4ff0cf10924 100644 --- a/docs/topics/downloader-middleware.rst +++ b/docs/topics/downloader-middleware.rst @@ -297,7 +297,9 @@ Here's an example of a log with :setting:`COOKIES_DEBUG` enabled:: Direct access to cookiejars from spider ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -On some cases it is required to directly set specific values to existing cookiejar + +In some cases it is required to directly set specific values in an existing +cookiejar. .. code-block:: python