From e96dc42a7e60a9c135f9951a557837fb119aca22 Mon Sep 17 00:00:00 2001 From: HuanYang <33485493+baotlake@users.noreply.github.com> Date: Mon, 20 May 2024 19:04:14 +0800 Subject: [PATCH 1/2] Update README.rst Fix the issue of no line breaks in the Usage code --- README.rst | 91 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/README.rst b/README.rst index cf0d221..b9953fe 100644 --- a/README.rst +++ b/README.rst @@ -27,52 +27,57 @@ To install Protego, simply use pip: Usage ===== ->>> from protego import Protego ->>> robotstxt = """ -... User-agent: * -... Disallow: / -... Allow: /about -... Allow: /account -... Disallow: /account/contact$ -... Disallow: /account/*/profile -... Crawl-delay: 4 -... Request-rate: 10/1m # 10 requests every 1 minute -... -... Sitemap: http://example.com/sitemap-index.xml -... Host: http://example.co.in -... """ ->>> rp = Protego.parse(robotstxt) ->>> rp.can_fetch("http://example.com/profiles", "mybot") -False ->>> rp.can_fetch("http://example.com/about", "mybot") -True ->>> rp.can_fetch("http://example.com/account", "mybot") -True ->>> rp.can_fetch("http://example.com/account/myuser/profile", "mybot") -False ->>> rp.can_fetch("http://example.com/account/contact", "mybot") -False ->>> rp.crawl_delay("mybot") -4.0 ->>> rp.request_rate("mybot") -RequestRate(requests=10, seconds=60, start_time=None, end_time=None) ->>> list(rp.sitemaps) -['http://example.com/sitemap-index.xml'] ->>> rp.preferred_host -'http://example.co.in' +.. code-block:: none + + >>> from protego import Protego + >>> robotstxt = """ + ... User-agent: * + ... Disallow: / + ... Allow: /about + ... Allow: /account + ... Disallow: /account/contact$ + ... Disallow: /account/*/profile + ... Crawl-delay: 4 + ... Request-rate: 10/1m # 10 requests every 1 minute + ... + ... Sitemap: http://example.com/sitemap-index.xml + ... Host: http://example.co.in + ... """ + >>> rp = Protego.parse(robotstxt) + >>> rp.can_fetch("http://example.com/profiles", "mybot") + False + >>> rp.can_fetch("http://example.com/about", "mybot") + True + >>> rp.can_fetch("http://example.com/account", "mybot") + True + >>> rp.can_fetch("http://example.com/account/myuser/profile", "mybot") + False + >>> rp.can_fetch("http://example.com/account/contact", "mybot") + False + >>> rp.crawl_delay("mybot") + 4.0 + >>> rp.request_rate("mybot") + RequestRate(requests=10, seconds=60, start_time=None, end_time=None) + >>> list(rp.sitemaps) + ['http://example.com/sitemap-index.xml'] + >>> rp.preferred_host + 'http://example.co.in' + Using Protego with Requests_: ->>> from protego import Protego ->>> import requests ->>> r = requests.get("https://google.com/robots.txt") ->>> rp = Protego.parse(r.text) ->>> rp.can_fetch("https://google.com/search", "mybot") -False ->>> rp.can_fetch("https://google.com/search/about", "mybot") -True ->>> list(rp.sitemaps) -['https://www.google.com/sitemap.xml'] +.. code-block:: none + + >>> from protego import Protego + >>> import requests + >>> r = requests.get("https://google.com/robots.txt") + >>> rp = Protego.parse(r.text) + >>> rp.can_fetch("https://google.com/search", "mybot") + False + >>> rp.can_fetch("https://google.com/search/about", "mybot") + True + >>> list(rp.sitemaps) + ['https://www.google.com/sitemap.xml'] .. _Requests: https://3.python-requests.org/ From d6d7c11331fa86814982114b49bebbff88581e11 Mon Sep 17 00:00:00 2001 From: HuanYang <33485493+baotlake@users.noreply.github.com> Date: Tue, 28 May 2024 19:09:13 +0800 Subject: [PATCH 2/2] fix `README.rst` code-block --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index b9953fe..8b8c689 100644 --- a/README.rst +++ b/README.rst @@ -27,7 +27,7 @@ To install Protego, simply use pip: Usage ===== -.. code-block:: none +.. code-block:: pycon >>> from protego import Protego >>> robotstxt = """ @@ -66,7 +66,7 @@ Usage Using Protego with Requests_: -.. code-block:: none +.. code-block:: pycon >>> from protego import Protego >>> import requests