Merge branch 'hotfix-6.0.1' into master

kurtmckee · Sep 15, 2020 · 98d189f · 98d189f
2 parents 7cb26f7 + 9c592f8
commit 98d189f
Show file tree

Hide file tree

Showing 33 changed files with 122 additions and 318 deletions.
diff --git a/NEWS b/NEWS
@@ -1,5 +1,9 @@
 coming in the next release:
 
+6.0.1 - 15 September 2020
+    * Remove all Python 2 compatibility code (#228)
+    * Add *python_requires* to ``setup.py`` (#231)
+
 6.0.0 - 12 September 2020
     * Support Python 3.6, 3.7, 3.8 and 3.9
     * Drop support for Python 2.4 through 2.7, and Python 3.0 through 3.5 (#169)

diff --git a/feedparser/__init__.py b/feedparser/__init__.py
@@ -25,17 +25,14 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE."""
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 from .api import parse
 from .datetimes import registerDateHandler
 from .exceptions import *
 from .util import FeedParserDict
 
 __author__ = 'Kurt McKee <contactme@kurtmckee.org>'
 __license__ = 'BSD 2-clause'
-__version__ = '6.0.0'
+__version__ = '6.0.1'
 
 # HTTP "User-Agent" header to send to servers when downloading feeds.
 # If you are embedding feedparser in a larger application, you should

diff --git a/feedparser/api.py b/feedparser/api.py
@@ -26,29 +26,10 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
+import io
+import urllib.parse
 import xml.sax
 
-try:
-    from io import BytesIO as _StringIO
-except ImportError:
-    # Python 2.7
-    try:
-        from cStringIO import StringIO as _StringIO
-    except ImportError:
-        from StringIO import StringIO as _StringIO
-
-try:
-    import urllib.parse
-except ImportError:
-    from urlparse import urlparse
-
-    class urllib(object):
-        class parse(object):
-            urlparse = staticmethod(urlparse)
-
 from .datetimes import registerDateHandler, _parse_date
 from .encodings import convert_to_utf8
 from .exceptions import *
@@ -63,14 +44,6 @@ class parse(object):
 from .urls import convert_to_idn, make_safe_absolute_uri
 from .util import FeedParserDict
 
-bytes_ = type(b'')
-unicode_ = type('')
-try:
-    unichr
-    basestring
-except NameError:
-    unichr = chr
-    basestring = str
 
 # List of preferred XML parsers, by SAX driver name.  These will be tried first,
 # but if they're not installed, Python will keep searching through its own list
@@ -130,13 +103,13 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
     if request_headers is supplied it is a dictionary of HTTP request headers
     that will override the values generated by FeedParser.
 
-    :return: A :class:`StringIO.StringIO` or :class:`io.BytesIO`.
+    :return: A bytes object.
     """
 
     if hasattr(url_file_stream_or_string, 'read'):
         return url_file_stream_or_string.read()
 
-    if isinstance(url_file_stream_or_string, basestring) \
+    if isinstance(url_file_stream_or_string, str) \
        and urllib.parse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'):
         return http.get(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result)
 
@@ -145,7 +118,7 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
         with open(url_file_stream_or_string, 'rb') as f:
             data = f.read()
     except (IOError, UnicodeEncodeError, TypeError, ValueError):
-        # if url_file_stream_or_string is a unicode object that
+        # if url_file_stream_or_string is a str object that
         # cannot be converted to the encoding returned by
         # sys.getfilesystemencoding(), a UnicodeEncodeError
         # will be thrown
@@ -157,19 +130,19 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
         return data
 
     # treat url_file_stream_or_string as string
-    if not isinstance(url_file_stream_or_string, bytes_):
+    if not isinstance(url_file_stream_or_string, bytes):
         return url_file_stream_or_string.encode('utf-8')
     return url_file_stream_or_string
 
 
 LooseFeedParser = type(
-    str('LooseFeedParser'),  # `str()` call required for Python 2.7
+    'LooseFeedParser',
     (_LooseFeedParser, _FeedParserMixin, _BaseHTMLProcessor, object),
     {},
 )
 
 StrictFeedParser = type(
-    str('StrictFeedParser'),  # `str()` call required for Python 2.7
+    'StrictFeedParser',
     (_StrictFeedParser, _FeedParserMixin, xml.sax.handler.ContentHandler, object),
     {},
 )
@@ -257,7 +230,7 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
     baseuri = make_safe_absolute_uri(href, contentloc) or make_safe_absolute_uri(contentloc) or href
 
     baselang = result['headers'].get('content-language', None)
-    if isinstance(baselang, bytes_) and baselang is not None:
+    if isinstance(baselang, bytes) and baselang is not None:
         baselang = baselang.decode('utf-8', 'ignore')
 
     if not _XML_AVAILABLE:
@@ -277,14 +250,14 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
         saxparser.setContentHandler(feedparser)
         saxparser.setErrorHandler(feedparser)
         source = xml.sax.xmlreader.InputSource()
-        source.setByteStream(_StringIO(data))
+        source.setByteStream(io.BytesIO(data))
         try:
             saxparser.parse(source)
         except xml.sax.SAXException as e:
             result['bozo'] = 1
             result['bozo_exception'] = feedparser.exc or e
             use_strict_parser = 0
-    if not use_strict_parser and _SGML_AVAILABLE:
+    if not use_strict_parser:
         feedparser = LooseFeedParser(baseuri, baselang, 'utf-8', entities)
         feedparser.resolve_relative_uris = resolve_relative_uris
         feedparser.sanitize_html = sanitize_html

diff --git a/feedparser/datetimes/__init__.py b/feedparser/datetimes/__init__.py
@@ -25,8 +25,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-
 from .asctime import _parse_date_asctime
 from .greek import _parse_date_greek
 from .hungarian import _parse_date_hungarian

diff --git a/feedparser/datetimes/asctime.py b/feedparser/datetimes/asctime.py
@@ -25,9 +25,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 from .rfc822 import _parse_date_rfc822
 
 _months = [

diff --git a/feedparser/datetimes/greek.py b/feedparser/datetimes/greek.py
@@ -25,9 +25,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 import re
 
 from .rfc822 import _parse_date_rfc822
@@ -56,13 +53,13 @@
 }
 
 _greek_wdays = {
-   '\u039a\u03c5\u03c1': 'Sun', # caf5f1 in iso-8859-7
-   '\u0394\u03b5\u03c5': 'Mon', # c4e5f5 in iso-8859-7
-   '\u03a4\u03c1\u03b9': 'Tue', # d4f1e9 in iso-8859-7
-   '\u03a4\u03b5\u03c4': 'Wed', # d4e5f4 in iso-8859-7
-   '\u03a0\u03b5\u03bc': 'Thu', # d0e5ec in iso-8859-7
-   '\u03a0\u03b1\u03c1': 'Fri', # d0e1f1 in iso-8859-7
-   '\u03a3\u03b1\u03b2': 'Sat', # d3e1e2 in iso-8859-7
+   '\u039a\u03c5\u03c1': 'Sun',  # caf5f1 in iso-8859-7
+   '\u0394\u03b5\u03c5': 'Mon',  # c4e5f5 in iso-8859-7
+   '\u03a4\u03c1\u03b9': 'Tue',  # d4f1e9 in iso-8859-7
+   '\u03a4\u03b5\u03c4': 'Wed',  # d4e5f4 in iso-8859-7
+   '\u03a0\u03b5\u03bc': 'Thu',  # d0e5ec in iso-8859-7
+   '\u03a0\u03b1\u03c1': 'Fri',  # d0e1f1 in iso-8859-7
+   '\u03a3\u03b1\u03b2': 'Sat',  # d3e1e2 in iso-8859-7
 }
 
 _greek_date_format_re = re.compile(r'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)')

diff --git a/feedparser/datetimes/hungarian.py b/feedparser/datetimes/hungarian.py
@@ -25,9 +25,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 import re
 
 from .w3dtf import _parse_date_w3dtf
@@ -48,7 +45,7 @@
     'december':      '12',
 }
 
-_hungarian_date_format_re = re.compile(r'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))')
+_hungarian_date_format_re = re.compile(r'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})([+-](\d{,2}:\d{2}))')
 
 
 def _parse_date_hungarian(date_string):

diff --git a/feedparser/datetimes/iso8601.py b/feedparser/datetimes/iso8601.py
@@ -25,9 +25,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 import re
 import time
 

diff --git a/feedparser/datetimes/korean.py b/feedparser/datetimes/korean.py
@@ -25,9 +25,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 import re
 
 from .w3dtf import _parse_date_w3dtf
@@ -55,8 +52,8 @@ def _parse_date_onblog(dateString):
     if not m:
         return
     w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
-                {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\
-                 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\
+                {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),
+                 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),
                  'zonediff': '+09:00'}
     return _parse_date_w3dtf(w3dtfdate)
 

diff --git a/feedparser/datetimes/perforce.py b/feedparser/datetimes/perforce.py
@@ -25,14 +25,7 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
-try:
-    import rfc822
-except ImportError:
-    from email import _parseaddr as rfc822
-
+import email._parseaddr
 import re
 import time
 
@@ -48,6 +41,6 @@ def _parse_date_perforce(date_string):
     dow, year, month, day, hour, minute, second, tz = m.groups()
     months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
     new_date_string = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz)
-    tm = rfc822.parsedate_tz(new_date_string)
+    tm = email._parseaddr.parsedate_tz(new_date_string)
     if tm:
-        return time.gmtime(rfc822.mktime_tz(tm))
+        return time.gmtime(email._parseaddr.mktime_tz(tm))
diff --git a/feedparser/datetimes/rfc822.py b/feedparser/datetimes/rfc822.py
@@ -25,9 +25,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 import datetime
 
 timezone_names = {

diff --git a/feedparser/datetimes/w3dtf.py b/feedparser/datetimes/w3dtf.py
@@ -25,9 +25,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 import datetime
 
 timezonenames = {

diff --git a/feedparser/encodings.py b/feedparser/encodings.py
@@ -26,9 +26,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 import cgi
 import codecs
 import re
@@ -43,21 +40,14 @@
     lazy_chardet_encoding = None
 else:
     def lazy_chardet_encoding(data):
-        chardet_encoding = chardet.detect(data)['encoding']
-        if not chardet_encoding:
-            chardet_encoding = ''
-        if isinstance(chardet_encoding, bytes_):
-            chardet_encoding = chardet_encoding.encode('ascii', 'ignore')
-        return chardet_encoding
+        return chardet.detect(data)['encoding'] or ''
 
 from .exceptions import (
     CharacterEncodingOverride,
     CharacterEncodingUnknown,
     NonXMLContentType,
 )
 
-bytes_ = type(b'')
-unicode_ = type('')
 
 # Each marker represents some of the characters of the opening XML
 # processing instruction ('<?xm') in the specified encoding.
@@ -193,7 +183,7 @@ def convert_to_utf8(http_headers, data, result):
     http_content_type = http_headers.get('content-type') or ''
     http_content_type, params = cgi.parse_header(http_content_type)
     http_encoding = params.get('charset', '').replace("'", "")
-    if isinstance(http_encoding, bytes_):
+    if isinstance(http_encoding, bytes):
         http_encoding = http_encoding.decode('utf-8', 'ignore')
 
     acceptable_content_type = 0

diff --git a/feedparser/exceptions.py b/feedparser/exceptions.py
@@ -26,9 +26,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
 __all__ = [
     'ThingsNobodyCaresAboutButMe',
     'CharacterEncodingOverride',

diff --git a/feedparser/html.py b/feedparser/html.py
@@ -25,18 +25,9 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
+import html.entities
 import re
 
-try:
-    from html.entities import name2codepoint
-except ImportError:
-    # Python 2
-    # noinspection PyUnresolvedReferences
-    from htmlentitydefs import name2codepoint
-
 from .sgml import *
 
 _cp1252 = {
@@ -251,7 +242,7 @@ def handle_entityref(self, ref):
 
         # Called for each entity reference, e.g. '&copy;' will extract 'copy'
         # Reconstruct the original entity reference.
-        if ref in name2codepoint or ref == 'apos':
+        if ref in html.entities.name2codepoint or ref == 'apos':
             self.pieces.append('&%s;' % ref)
         else:
             self.pieces.append('&amp;%s' % ref)