improvements

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
kennethreitz · Mar 3, 2018 · 90de9b7 · 90de9b7
1 parent df1b037
commit 90de9b7
Show file tree

Hide file tree

Showing 2 changed files with 46 additions and 10 deletions.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -144,6 +144,15 @@ XPath is also supported (`learn more <https://msdn.microsoft.com/en-us/library/m
    >>> r.html.xpath('a')
    [<Element 'a' class='btn' href='https://help.github.com/articles/supported-browsers'>]
 
+You can also select only elements containing certian text:
+
+.. code-block:: pycon
+
+    >>> r = session.get('http://python-requests.org/')
+    >>> r.html.find('a', containing=('kenneth',))
+    [<Element 'a' href='http://kennethreitz.com/pages/open-projects.html'>, <Element 'a' href='http://kennethreitz.org/'>, <Element 'a' href='https://twitter.com/kennethreitz' class=('twitter-follow-button',) data-show-count='false'>, <Element 'a' class=('reference', 'internal') href='dev/contributing/#kenneth-reitz-s-code-style'>]
+
+
 JavaScript Support
 ==================
 
@@ -162,6 +171,32 @@ Note, the first time you ever run the ``render()`` method, it will download
 Chromium into your home directory (e.g. ``~/.pyppeteer/``). This only happens
 once.
 
+Pagination
+==========
+
+There's also intelligent pagination support (always improving):
+
+.. code-block:: pycon
+
+    >>> r = session.get('https://reddit.com')
+    >>> for html in r.html:
+    ...     print(html)
+    <HTML url='https://www.reddit.com/'>
+    <HTML url='https://www.reddit.com/?count=25&after=t3_81puu5'>
+    <HTML url='https://www.reddit.com/?count=50&after=t3_81nevg'>
+    <HTML url='https://www.reddit.com/?count=75&after=t3_81lqtp'>
+    <HTML url='https://www.reddit.com/?count=100&after=t3_81k1c8'>
+    <HTML url='https://www.reddit.com/?count=125&after=t3_81p438'>
+    <HTML url='https://www.reddit.com/?count=150&after=t3_81nrcd'>
+    …
+
+You can also just request the next URL easily:
+
+.. code-block:: pycon
+
+    >>> r = session.get('https://reddit.com')
+    >>> r.html.next()
+    'https://www.reddit.com/?count=25&after=t3_81pm82'
 
 Using without Requests
 ======================

diff --git a/requests_html.py b/requests_html.py
@@ -37,6 +37,7 @@
 _LXML = HtmlElement
 _Text = str
 _Search = Result
+_Containing = Union[str, List[str]]
 _Links = Set[str]
 _Attrs = MutableMapping
 _Next = Union['HTML', List[str]]
@@ -59,11 +60,6 @@ class BaseParser:
 
     """
 
-    __slots__ = [
-        'element', 'url', 'skip_anchors', 'default_encoding', '_encoding',
-        '_encoding', '_html', '_lxml', '_pq', 'session'
-    ]
-
     def __init__(self, *, element, session: 'HTTPSession' = None, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL) -> None:
         self.element = element
         self.session = session or HTMLSession()
@@ -156,7 +152,7 @@ def full_text(self) -> _Text:
         """
         return self.lxml.text_content()
 
-    def next(self, fetch: bool = True) -> _Next:
+    def next(self, fetch: bool = False) -> _Next:
         """Attempts to find the next page, if there is one. If ``fetch``
         is ``True`` (default), returns :class:`HTML <HTML>` object of
         next page. If ``fetch`` is ``False``, simply returns the next URL.
@@ -186,7 +182,6 @@ def get_next():
             except IndexError:
                 return None
 
-
         next = get_next()
         if next:
             url = self._make_absolute(next)
@@ -198,8 +193,7 @@ def get_next():
         else:
             return url
 
-
-    def find(self, selector: str = "*", containing: Optional[str] = None, first: bool = False, _encoding: str = None) -> _Find:
+    def find(self, selector: str = "*", containing: _Containing = None, first: bool = False, _encoding: str = None) -> _Find:
         """Given a CSS Selector, returns a list of
         :class:`Element <Element>` objects or a single one.
 
@@ -223,6 +217,10 @@ def find(self, selector: str = "*", containing: Optional[str] = None, first: boo
         :class:`Element <Element>` found.
         """
 
+        # Convert a single containing into a list.
+        if isinstance(containing, str):
+            containing = [containing]
+
         encoding = _encoding or self.encoding
         elements = [
             Element(element=found, url=self.url, default_encoding=encoding)
@@ -357,7 +355,10 @@ class Element(BaseParser):
     :param default_encoding: Which encoding to default to.
     """
 
-    __slots__ = BaseParser.__slots__
+    __slots__ = [
+        'element', 'url', 'skip_anchors', 'default_encoding', '_encoding',
+        '_encoding', '_html', '_lxml', '_pq', 'session'
+    ]
 
     def __init__(self, *, element, url: _URL, default_encoding: _DefaultEncoding = None) -> None:
         super(Element, self).__init__(element=element, url=url, default_encoding=default_encoding)