Drop support for legacy Python 2 (#346)

* Drop support for legacy Python 2 * Add python_requires to help pip * Upgrade Python syntax with pyupgrade * Upgrade Python syntax with pyupgrade --py3-plus * Python 3 imports * Replace six * Update CONTRIBUTING.md * Added line to changelog Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com>
pdfminer · Jan 4, 2020 · 3502dc9 · 3502dc9
1 parent f3ab1bc
commit 3502dc9
Show file tree

Hide file tree

Showing 43 changed files with 4,533 additions and 4,693 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,6 +1,5 @@
 language: python
 python:
-  - "2.7"
   - "3.4"
   - "3.5"
   - "3.6"

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ## [Unreleased]
 
+## Removed
+- Support for Python 2 ([#346](https://github.com/pdfminer/pdfminer.six/pull/346))
+
 ### Changed
 - Enforce pep8 coding style by adding flake8 to CI ([#345](https://github.com/pdfminer/pdfminer.six/pull/345))
 

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -29,8 +29,9 @@ Any contribution is appreciated! You might want to:
 * Pull requests should be merged to develop, not master. This ensures that master always equals the released version.  
 * Include unit tests when possible. In case of bugs, this will help to prevent the same mistake in the future. In case 
   of features, this will show that your code works correctly.
-* Code should work for Python 2.7 and Python 3.x (for now), conform to PEP8 code style (enforced by 
-  [flake8](http://flake8.pycqa.org/en/latest/)) and properly documented with docstrings.
+* Code should work for Python 3.4+.
+* Code should conform to PEP8 coding style.
+* New features should be well documented using docstrings.
 * Check spelling and grammar.
 * Don't forget to update the [CHANGELOG.md](CHANGELOG.md#[Unreleased])
 

diff --git a/README.md b/README.md
@@ -37,8 +37,8 @@ Features
 How to use
 ----------
 
- * Install Python 2.7 or newer. Note that Python 2 support is dropped at
-  January, 2020.
+ * Install Python 3.4 or newer
+ * Install
 
     `pip install pdfminer.six`
 

diff --git a/pdfminer/__init__.py b/pdfminer/__init__.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
 """
-Fork of PDFMiner using six for Python 2+3 compatibility
+Fork of PDFMiner
 
 PDFMiner is a tool for extracting information from PDF documents.
 Unlike other PDF-related tools, it focuses entirely on getting and analyzing
@@ -10,18 +9,8 @@
 formats (such as HTML). It has an extensible PDF parser that can be used for
 other purposes instead of text analysis.
 """
-import sys
-import warnings
 
 __version__ = '20191110'
 
-
-if sys.version_info < (3, 0):
-    warnings.warn('On January 1st, 2020, '
-                  'pdfminer.six will stop supporting Python 2. '
-                  'Please upgrade to Python 3. '
-                  'For more information see '
-                  'https://github.com/pdfminer/pdfminer.six/issues/194')
-
 if __name__ == '__main__':
     print(__version__)
diff --git a/pdfminer/arcfour.py b/pdfminer/arcfour.py
@@ -1,23 +1,19 @@
-
-
 """ Python implementation of Arcfour encryption algorithm.
 See https://en.wikipedia.org/wiki/RC4
 This code is in the public domain.
 
 """
 
-import six  # Python 2+3 compatibility
-
 
-class Arcfour(object):
+class Arcfour:
 
     def __init__(self, key):
         # because Py3 range is not indexable
         s = [i for i in range(256)]
         j = 0
         klen = len(key)
         for i in range(256):
-            j = (j + s[i] + six.indexbytes(key, i % klen)) % 256
+            j = (j + s[i] + key[i % klen]) % 256
             (s[i], s[j]) = (s[j], s[i])
         self.s = s
         (self.i, self.j) = (0, 0)
@@ -27,12 +23,12 @@ def process(self, data):
         (i, j) = (self.i, self.j)
         s = self.s
         r = b''
-        for c in six.iterbytes(data):
+        for c in iter(data):
             i = (i+1) % 256
             j = (j+s[i]) % 256
             (s[i], s[j]) = (s[j], s[i])
             k = s[(s[i]+s[j]) % 256]
-            r += six.int2byte(c ^ k)
+            r += bytes((c ^ k,))
         (self.i, self.j) = (i, j)
         return r
 

diff --git a/pdfminer/ascii85.py b/pdfminer/ascii85.py
@@ -1,5 +1,3 @@
-
-
 """ Python implementation of ASCII85/ASCIIHex decoder (Adobe version).
 
 This code is in the public domain.
@@ -9,8 +7,6 @@
 import re
 import struct
 
-import six  # Python 2+3 compatibility
-
 
 # ascii85decode(data)
 def ascii85decode(data):
@@ -26,8 +22,8 @@ def ascii85decode(data):
     """
     n = b = 0
     out = b''
-    for i in six.iterbytes(data):
-        c = six.int2byte(i)
+    for i in iter(data):
+        c = bytes((i,))
         if b'!' <= c and c <= b'u':
             n += 1
             b = b*85+(ord(c)-33)
@@ -47,9 +43,8 @@ def ascii85decode(data):
 
 
 # asciihexdecode(data)
-hex_re = re.compile(b'([a-f0-9]{2})', re.IGNORECASE)
-trail_re = re.compile(b'^(?:[a-f0-9]{2}|[ \t\n\r\f\v])*'
-                      b'([a-f0-9])[ \t\n\r\f\v>]*$', re.IGNORECASE)
+hex_re = re.compile(br'([a-f\d]{2})', re.IGNORECASE)
+trail_re = re.compile(br'^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$', re.IGNORECASE)
 
 
 def asciihexdecode(data):
@@ -64,7 +59,7 @@ def asciihexdecode(data):
     """
     def decode(x):
         i = int(x, 16)
-        return six.int2byte(i)
+        return bytes((i,))
 
     out = b''
     for x in hex_re.findall(data):

diff --git a/pdfminer/ccitt.py b/pdfminer/ccitt.py
@@ -1,4 +1,3 @@
-
 # CCITT Fax decoder
 #
 # Bugs: uncompressed mode untested.
@@ -15,20 +14,12 @@
 import sys
 import array
 
-import six  # Python 2+3 compatibility
-
-if six.PY3:
-    def get_bytes(data):
-        for byte in data:
-            yield byte
-else:
-    def get_bytes(data):
-        for char in data:
-            yield ord(char)
 
+def get_bytes(data):
+    yield from data
 
-class BitParser(object):
 
+class BitParser:
     def __init__(self):
         self._pos = 0
         return

diff --git a/pdfminer/cmapdb.py b/pdfminer/cmapdb.py
@@ -13,10 +13,7 @@
 import os
 import os.path
 import gzip
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle as pickle
+import pickle as pickle
 import struct
 import logging
 from .psparser import PSStackParser
@@ -29,7 +26,6 @@
 from .utils import choplist
 from .utils import nunpack
 
-import six
 
 log = logging.getLogger(__name__)
 
@@ -38,7 +34,7 @@ class CMapError(Exception):
     pass
 
 
-class CMapBase(object):
+class CMapBase:
 
     debug = 0
 
@@ -77,7 +73,7 @@ def use_cmap(self, cmap):
         assert isinstance(cmap, CMap), str(type(cmap))
 
         def copy(dst, src):
-            for (k, v) in six.iteritems(src):
+            for (k, v) in src.items():
                 if isinstance(v, dict):
                     d = {}
                     dst[k] = d
@@ -90,7 +86,7 @@ def copy(dst, src):
     def decode(self, code):
         log.debug('decode: %r, %r', self, code)
         d = self.code2cid
-        for i in six.iterbytes(code):
+        for i in iter(code):
             if i in d:
                 d = d[i]
                 if isinstance(d, int):
@@ -104,7 +100,7 @@ def dump(self, out=sys.stdout, code2cid=None, code=None):
         if code2cid is None:
             code2cid = self.code2cid
             code = ()
-        for (k, v) in sorted(six.iteritems(code2cid)):
+        for (k, v) in sorted(code2cid.items()):
             c = code+(k,)
             if isinstance(v, int):
                 out.write('code %r = cid %d\n' % (c, v))
@@ -148,7 +144,7 @@ def get_unichr(self, cid):
         return self.cid2unichr[cid]
 
     def dump(self, out=sys.stdout):
-        for (k, v) in sorted(six.iteritems(self.cid2unichr)):
+        for (k, v) in sorted(self.cid2unichr.items()):
             out.write('cid %d = unicode %r\n' % (k, v))
         return
 
@@ -183,7 +179,7 @@ def add_cid2unichr(self, cid, code):
             # Interpret as UTF-16BE.
             self.cid2unichr[cid] = code.decode('UTF-16BE', 'ignore')
         elif isinstance(code, int):
-            self.cid2unichr[cid] = six.unichr(code)
+            self.cid2unichr[cid] = chr(code)
         else:
             raise TypeError(code)
         return
@@ -211,7 +207,7 @@ def __init__(self, name, module, vertical):
         return
 
 
-class CMapDB(object):
+class CMapDB:
 
     _cmap_cache = {}
     _umap_cache = {}

diff --git a/pdfminer/converter.py b/pdfminer/converter.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import logging
 import re
 import sys
@@ -23,7 +22,6 @@
 from .utils import bbox2str
 from . import utils
 
-import six
 
 log = logging.getLogger(__name__)
 
@@ -115,7 +113,7 @@ def render_char(self, matrix, font, fontsize, scaling, rise, cid, ncs,
                     graphicstate):
         try:
             text = font.to_unichr(cid)
-            assert isinstance(text, six.text_type), str(type(text))
+            assert isinstance(text, str), str(type(text))
         except PDFUnicodeNotDefined:
             text = self.handle_undefined_char(font, cid)
         textwidth = font.char_width(cid)
@@ -168,7 +166,7 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1,
                 self.outfp_binary = False
             else:
                 try:
-                    self.outfp.write(u"é")
+                    self.outfp.write("é")
                     self.outfp_binary = False
                 except TypeError:
                     self.outfp_binary = True
@@ -186,7 +184,7 @@ def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
 
     def write_text(self, text):
         text = utils.compatible_encode_method(text, self.codec, 'ignore')
-        if six.PY3 and self.outfp_binary:
+        if self.outfp_binary:
             text = text.encode()
         self.outfp.write(text)
         return
@@ -285,7 +283,7 @@ def write_header(self):
         return
 
     def write_footer(self):
-        page_links = ['<a href="#%s">%s</a>' % (i, i)
+        page_links = ['<a href="#{}">{}</a>'.format(i, i)
                       for i in range(1, self.pageno)]
         s = '<div style="position:absolute; top:0px;">Page: %s</div>\n' % \
             ', '.join(page_links)
@@ -385,8 +383,8 @@ def render(item):
                 if self.showpageno:
                     self.write('<div style="position:absolute; top:%dpx;">' %
                                ((self._yoffset-item.y1)*self.scale))
-                    self.write('<a name="%s">Page %s</a></div>\n' % (
-                        item.pageid, item.pageid))
+                    self.write('<a name="{}">Page {}</a></div>\n'
+                               .format(item.pageid, item.pageid))
                 for child in item:
                     render(child)
                 if item.groups is not None:
@@ -449,7 +447,7 @@ def close(self):
 
 class XMLConverter(PDFConverter):
 
-    CONTROL = re.compile(u'[\x00-\x08\x0b-\x0c\x0e-\x1f]')
+    CONTROL = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')
 
     def __init__(self, rsrcmgr, outfp, codec='utf-8', pageno=1, laparams=None,
                  imagewriter=None, stripcontrol=False):
@@ -480,7 +478,7 @@ def write_footer(self):
 
     def write_text(self, text):
         if self.stripcontrol:
-            text = self.CONTROL.sub(u'', text)
+            text = self.CONTROL.sub('', text)
         self.write(enc(text, None))
         return
 

diff --git a/pdfminer/encodingdb.py b/pdfminer/encodingdb.py
@@ -1,8 +1,6 @@
 import logging
 import re
 
-import six  # Python 2+3 compatibility
-
 from .glyphlist import glyphname2unicode
 from .latin_enc import ENCODING
 from .psparser import PSLiteral
@@ -45,7 +43,7 @@ def name2unicode(name):
                                   for i in range(0, len(name_without_uni), 4)]
                 for digit in unicode_digits:
                     raise_key_error_for_invalid_unicode(digit)
-                characters = map(six.unichr, unicode_digits)
+                characters = map(chr, unicode_digits)
                 return ''.join(characters)
 
         elif name.startswith('u'):
@@ -55,7 +53,7 @@ def name2unicode(name):
                     4 <= len(name_without_u) <= 6:
                 unicode_digit = int(name_without_u, base=16)
                 raise_key_error_for_invalid_unicode(unicode_digit)
-                return six.unichr(unicode_digit)
+                return chr(unicode_digit)
 
     raise KeyError('Could not convert unicode name "%s" to character because '
                    'it does not match specification' % name)
@@ -72,7 +70,7 @@ def raise_key_error_for_invalid_unicode(unicode_digit):
                        'it is in the range D800 through DFFF' % unicode_digit)
 
 
-class EncodingDB(object):
+class EncodingDB:
 
     std2unicode = {}
     mac2unicode = {}