Skip to content

Commit

Permalink
Merge branch 'hotfix-6.0.1' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
kurtmckee committed Sep 15, 2020
2 parents 7cb26f7 + 9c592f8 commit 98d189f
Show file tree
Hide file tree
Showing 33 changed files with 122 additions and 318 deletions.
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
coming in the next release:

6.0.1 - 15 September 2020
* Remove all Python 2 compatibility code (#228)
* Add *python_requires* to ``setup.py`` (#231)

6.0.0 - 12 September 2020
* Support Python 3.6, 3.7, 3.8 and 3.9
* Drop support for Python 2.4 through 2.7, and Python 3.0 through 3.5 (#169)
Expand Down
5 changes: 1 addition & 4 deletions feedparser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,14 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE."""

from __future__ import absolute_import
from __future__ import unicode_literals

from .api import parse
from .datetimes import registerDateHandler
from .exceptions import *
from .util import FeedParserDict

__author__ = 'Kurt McKee <contactme@kurtmckee.org>'
__license__ = 'BSD 2-clause'
__version__ = '6.0.0'
__version__ = '6.0.1'

# HTTP "User-Agent" header to send to servers when downloading feeds.
# If you are embedding feedparser in a larger application, you should
Expand Down
49 changes: 11 additions & 38 deletions feedparser/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,10 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

import io
import urllib.parse
import xml.sax

try:
from io import BytesIO as _StringIO
except ImportError:
# Python 2.7
try:
from cStringIO import StringIO as _StringIO
except ImportError:
from StringIO import StringIO as _StringIO

try:
import urllib.parse
except ImportError:
from urlparse import urlparse

class urllib(object):
class parse(object):
urlparse = staticmethod(urlparse)

from .datetimes import registerDateHandler, _parse_date
from .encodings import convert_to_utf8
from .exceptions import *
Expand All @@ -63,14 +44,6 @@ class parse(object):
from .urls import convert_to_idn, make_safe_absolute_uri
from .util import FeedParserDict

bytes_ = type(b'')
unicode_ = type('')
try:
unichr
basestring
except NameError:
unichr = chr
basestring = str

# List of preferred XML parsers, by SAX driver name. These will be tried first,
# but if they're not installed, Python will keep searching through its own list
Expand Down Expand Up @@ -130,13 +103,13 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
if request_headers is supplied it is a dictionary of HTTP request headers
that will override the values generated by FeedParser.
:return: A :class:`StringIO.StringIO` or :class:`io.BytesIO`.
:return: A bytes object.
"""

if hasattr(url_file_stream_or_string, 'read'):
return url_file_stream_or_string.read()

if isinstance(url_file_stream_or_string, basestring) \
if isinstance(url_file_stream_or_string, str) \
and urllib.parse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp', 'file', 'feed'):
return http.get(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result)

Expand All @@ -145,7 +118,7 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
with open(url_file_stream_or_string, 'rb') as f:
data = f.read()
except (IOError, UnicodeEncodeError, TypeError, ValueError):
# if url_file_stream_or_string is a unicode object that
# if url_file_stream_or_string is a str object that
# cannot be converted to the encoding returned by
# sys.getfilesystemencoding(), a UnicodeEncodeError
# will be thrown
Expand All @@ -157,19 +130,19 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
return data

# treat url_file_stream_or_string as string
if not isinstance(url_file_stream_or_string, bytes_):
if not isinstance(url_file_stream_or_string, bytes):
return url_file_stream_or_string.encode('utf-8')
return url_file_stream_or_string


LooseFeedParser = type(
str('LooseFeedParser'), # `str()` call required for Python 2.7
'LooseFeedParser',
(_LooseFeedParser, _FeedParserMixin, _BaseHTMLProcessor, object),
{},
)

StrictFeedParser = type(
str('StrictFeedParser'), # `str()` call required for Python 2.7
'StrictFeedParser',
(_StrictFeedParser, _FeedParserMixin, xml.sax.handler.ContentHandler, object),
{},
)
Expand Down Expand Up @@ -257,7 +230,7 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
baseuri = make_safe_absolute_uri(href, contentloc) or make_safe_absolute_uri(contentloc) or href

baselang = result['headers'].get('content-language', None)
if isinstance(baselang, bytes_) and baselang is not None:
if isinstance(baselang, bytes) and baselang is not None:
baselang = baselang.decode('utf-8', 'ignore')

if not _XML_AVAILABLE:
Expand All @@ -277,14 +250,14 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
saxparser.setContentHandler(feedparser)
saxparser.setErrorHandler(feedparser)
source = xml.sax.xmlreader.InputSource()
source.setByteStream(_StringIO(data))
source.setByteStream(io.BytesIO(data))
try:
saxparser.parse(source)
except xml.sax.SAXException as e:
result['bozo'] = 1
result['bozo_exception'] = feedparser.exc or e
use_strict_parser = 0
if not use_strict_parser and _SGML_AVAILABLE:
if not use_strict_parser:
feedparser = LooseFeedParser(baseuri, baselang, 'utf-8', entities)
feedparser.resolve_relative_uris = resolve_relative_uris
feedparser.sanitize_html = sanitize_html
Expand Down
2 changes: 0 additions & 2 deletions feedparser/datetimes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import

from .asctime import _parse_date_asctime
from .greek import _parse_date_greek
from .hungarian import _parse_date_hungarian
Expand Down
3 changes: 0 additions & 3 deletions feedparser/datetimes/asctime.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

from .rfc822 import _parse_date_rfc822

_months = [
Expand Down
17 changes: 7 additions & 10 deletions feedparser/datetimes/greek.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

import re

from .rfc822 import _parse_date_rfc822
Expand Down Expand Up @@ -56,13 +53,13 @@
}

_greek_wdays = {
'\u039a\u03c5\u03c1': 'Sun', # caf5f1 in iso-8859-7
'\u0394\u03b5\u03c5': 'Mon', # c4e5f5 in iso-8859-7
'\u03a4\u03c1\u03b9': 'Tue', # d4f1e9 in iso-8859-7
'\u03a4\u03b5\u03c4': 'Wed', # d4e5f4 in iso-8859-7
'\u03a0\u03b5\u03bc': 'Thu', # d0e5ec in iso-8859-7
'\u03a0\u03b1\u03c1': 'Fri', # d0e1f1 in iso-8859-7
'\u03a3\u03b1\u03b2': 'Sat', # d3e1e2 in iso-8859-7
'\u039a\u03c5\u03c1': 'Sun', # caf5f1 in iso-8859-7
'\u0394\u03b5\u03c5': 'Mon', # c4e5f5 in iso-8859-7
'\u03a4\u03c1\u03b9': 'Tue', # d4f1e9 in iso-8859-7
'\u03a4\u03b5\u03c4': 'Wed', # d4e5f4 in iso-8859-7
'\u03a0\u03b5\u03bc': 'Thu', # d0e5ec in iso-8859-7
'\u03a0\u03b1\u03c1': 'Fri', # d0e1f1 in iso-8859-7
'\u03a3\u03b1\u03b2': 'Sat', # d3e1e2 in iso-8859-7
}

_greek_date_format_re = re.compile(r'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)')
Expand Down
5 changes: 1 addition & 4 deletions feedparser/datetimes/hungarian.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

import re

from .w3dtf import _parse_date_w3dtf
Expand All @@ -48,7 +45,7 @@
'december': '12',
}

_hungarian_date_format_re = re.compile(r'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))')
_hungarian_date_format_re = re.compile(r'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})([+-](\d{,2}:\d{2}))')


def _parse_date_hungarian(date_string):
Expand Down
3 changes: 0 additions & 3 deletions feedparser/datetimes/iso8601.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

import re
import time

Expand Down
7 changes: 2 additions & 5 deletions feedparser/datetimes/korean.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

import re

from .w3dtf import _parse_date_w3dtf
Expand Down Expand Up @@ -55,8 +52,8 @@ def _parse_date_onblog(dateString):
if not m:
return
w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
{'year': m.group(1), 'month': m.group(2), 'day': m.group(3),\
'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),\
{'year': m.group(1), 'month': m.group(2), 'day': m.group(3),
'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),
'zonediff': '+09:00'}
return _parse_date_w3dtf(w3dtfdate)

Expand Down
13 changes: 3 additions & 10 deletions feedparser/datetimes/perforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,7 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

try:
import rfc822
except ImportError:
from email import _parseaddr as rfc822

import email._parseaddr
import re
import time

Expand All @@ -48,6 +41,6 @@ def _parse_date_perforce(date_string):
dow, year, month, day, hour, minute, second, tz = m.groups()
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
new_date_string = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz)
tm = rfc822.parsedate_tz(new_date_string)
tm = email._parseaddr.parsedate_tz(new_date_string)
if tm:
return time.gmtime(rfc822.mktime_tz(tm))
return time.gmtime(email._parseaddr.mktime_tz(tm))
3 changes: 0 additions & 3 deletions feedparser/datetimes/rfc822.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

import datetime

timezone_names = {
Expand Down
3 changes: 0 additions & 3 deletions feedparser/datetimes/w3dtf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

import datetime

timezonenames = {
Expand Down
14 changes: 2 additions & 12 deletions feedparser/encodings.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

import cgi
import codecs
import re
Expand All @@ -43,21 +40,14 @@
lazy_chardet_encoding = None
else:
def lazy_chardet_encoding(data):
chardet_encoding = chardet.detect(data)['encoding']
if not chardet_encoding:
chardet_encoding = ''
if isinstance(chardet_encoding, bytes_):
chardet_encoding = chardet_encoding.encode('ascii', 'ignore')
return chardet_encoding
return chardet.detect(data)['encoding'] or ''

from .exceptions import (
CharacterEncodingOverride,
CharacterEncodingUnknown,
NonXMLContentType,
)

bytes_ = type(b'')
unicode_ = type('')

# Each marker represents some of the characters of the opening XML
# processing instruction ('<?xm') in the specified encoding.
Expand Down Expand Up @@ -193,7 +183,7 @@ def convert_to_utf8(http_headers, data, result):
http_content_type = http_headers.get('content-type') or ''
http_content_type, params = cgi.parse_header(http_content_type)
http_encoding = params.get('charset', '').replace("'", "")
if isinstance(http_encoding, bytes_):
if isinstance(http_encoding, bytes):
http_encoding = http_encoding.decode('utf-8', 'ignore')

acceptable_content_type = 0
Expand Down
3 changes: 0 additions & 3 deletions feedparser/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

__all__ = [
'ThingsNobodyCaresAboutButMe',
'CharacterEncodingOverride',
Expand Down
13 changes: 2 additions & 11 deletions feedparser/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,9 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from __future__ import absolute_import
from __future__ import unicode_literals

import html.entities
import re

try:
from html.entities import name2codepoint
except ImportError:
# Python 2
# noinspection PyUnresolvedReferences
from htmlentitydefs import name2codepoint

from .sgml import *

_cp1252 = {
Expand Down Expand Up @@ -251,7 +242,7 @@ def handle_entityref(self, ref):

# Called for each entity reference, e.g. '&copy;' will extract 'copy'
# Reconstruct the original entity reference.
if ref in name2codepoint or ref == 'apos':
if ref in html.entities.name2codepoint or ref == 'apos':
self.pieces.append('&%s;' % ref)
else:
self.pieces.append('&amp;%s' % ref)
Expand Down
Loading

0 comments on commit 98d189f

Please sign in to comment.