all: remove "u" string prefix (#1536)

* all: remove "u" string prefix * util: remove unirange Since Python 3.3, all builds are wide unicode compatible. * unistring: remove support for narrow-unicode builds which stopped being relevant with Python 3.3
pygments · Sep 8, 2020 · 9f56726 · 9f56726
1 parent d9a9e9e
commit 9f56726
Show file tree

Hide file tree

Showing 76 changed files with 3,632 additions and 3,745 deletions.
diff --git a/doc/conf.py b/doc/conf.py
@@ -34,8 +34,8 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u'Pygments'
-copyright = u'2006-2020, Georg Brandl and Pygments contributors'
+project = 'Pygments'
+copyright = '2006-2020, Georg Brandl and Pygments contributors'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -180,8 +180,8 @@
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('docs/index', 'Pygments.tex', u'Pygments Documentation',
-   u'Pygments authors', 'manual'),
+    ('docs/index', 'Pygments.tex', 'Pygments Documentation',
+     'Pygments authors', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -210,8 +210,8 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ('docs/index', 'pygments', u'Pygments Documentation',
-     [u'Pygments authors'], 1)
+    ('docs/index', 'pygments', 'Pygments Documentation',
+     ['Pygments authors'], 1)
 ]
 
 # If true, show URL addresses after external links.

diff --git a/external/rst-directive.py b/external/rst-directive.py
@@ -76,7 +76,7 @@ def run(self):
             lexer = TextLexer()
         # take an arbitrary option if more than one is given
         formatter = self.options and VARIANTS[list(self.options)[0]] or DEFAULT
-        parsed = highlight(u'\n'.join(self.content), lexer, formatter)
+        parsed = highlight('\n'.join(self.content), lexer, formatter)
         return [nodes.raw('', parsed, format='html')]
 
 directives.register_directive('sourcecode', Pygments)
diff --git a/pygments/filters/__init__.py b/pygments/filters/__init__.py
diff --git a/pygments/formatters/html.py b/pygments/formatters/html.py
@@ -27,11 +27,11 @@
 
 
 _escape_html_table = {
-    ord('&'): u'&amp;',
-    ord('<'): u'&lt;',
-    ord('>'): u'&gt;',
-    ord('"'): u'&quot;',
-    ord("'"): u'&#39;',
+    ord('&'): '&amp;',
+    ord('<'): '&lt;',
+    ord('>'): '&gt;',
+    ord('"'): '&quot;',
+    ord("'"): '&#39;',
 }
 
 
@@ -433,7 +433,7 @@ def __init__(self, **options):
         self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
         self.linenospecial = abs(get_int_opt(options, 'linenospecial', 0))
         self.nobackground = get_bool_opt(options, 'nobackground', False)
-        self.lineseparator = options.get('lineseparator', u'\n')
+        self.lineseparator = options.get('lineseparator', '\n')
         self.lineanchors = options.get('lineanchors', '')
         self.linespans = options.get('linespans', '')
         self.anchorlinenos = get_bool_opt(options, 'anchorlinenos', False)

diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py
@@ -265,7 +265,7 @@ def __init__(self, **options):
             self.right = self.escapeinside[1]
         else:
             self.escapeinside = ''
-        self.envname = options.get('envname', u'Verbatim')
+        self.envname = options.get('envname', 'Verbatim')
 
         self._create_stylesheet()
 
@@ -336,17 +336,17 @@ def format_unencoded(self, tokensource, outfile):
             realoutfile = outfile
             outfile = StringIO()
 
-        outfile.write(u'\\begin{' + self.envname + u'}[commandchars=\\\\\\{\\}')
+        outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}')
         if self.linenos:
             start, step = self.linenostart, self.linenostep
-            outfile.write(u',numbers=left' +
-                          (start and u',firstnumber=%d' % start or u'') +
-                          (step and u',stepnumber=%d' % step or u''))
+            outfile.write(',numbers=left' +
+                          (start and ',firstnumber=%d' % start or '') +
+                          (step and ',stepnumber=%d' % step or ''))
         if self.mathescape or self.texcomments or self.escapeinside:
-            outfile.write(u',codes={\\catcode`\\$=3\\catcode`\\^=7\\catcode`\\_=8}')
+            outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7\\catcode`\\_=8}')
         if self.verboptions:
-            outfile.write(u',' + self.verboptions)
-        outfile.write(u']\n')
+            outfile.write(',' + self.verboptions)
+        outfile.write(']\n')
 
         for ttype, value in tokensource:
             if ttype in Token.Comment:
@@ -409,7 +409,7 @@ def format_unencoded(self, tokensource, outfile):
             else:
                 outfile.write(value)
 
-        outfile.write(u'\\end{' + self.envname + u'}\n')
+        outfile.write('\\end{' + self.envname + '}\n')
 
         if self.full:
             encoding = self.encoding or 'utf8'

diff --git a/pygments/formatters/other.py b/pygments/formatters/other.py
@@ -119,12 +119,12 @@ def write(text):
         flush()
 
 
-TESTCASE_BEFORE = u'''\
+TESTCASE_BEFORE = '''\
     def testNeedsName(lexer):
         fragment = %r
         tokens = [
 '''
-TESTCASE_AFTER = u'''\
+TESTCASE_AFTER = '''\
         ]
         assert list(lexer.get_tokens(fragment)) == tokens
 '''
@@ -152,8 +152,8 @@ def format(self, tokensource, outfile):
             rawbuf.append(value)
             outbuf.append('%s(%s, %r),\n' % (indentation, ttype, value))
 
-        before = TESTCASE_BEFORE % (u''.join(rawbuf),)
-        during = u''.join(outbuf)
+        before = TESTCASE_BEFORE % (''.join(rawbuf),)
+        during = ''.join(outbuf)
         after = TESTCASE_AFTER
         if self.encoding is None:
             outfile.write(before + during + after)

diff --git a/pygments/formatters/rtf.py b/pygments/formatters/rtf.py
@@ -10,7 +10,7 @@
 """
 
 from pygments.formatter import Formatter
-from pygments.util import get_int_opt, _surrogatepair
+from pygments.util import get_int_opt, surrogatepair
 
 
 __all__ = ['RtfFormatter']
@@ -65,14 +65,14 @@ def __init__(self, **options):
         self.fontsize = get_int_opt(options, 'fontsize', 0)
 
     def _escape(self, text):
-        return text.replace(u'\\', u'\\\\') \
-                   .replace(u'{', u'\\{') \
-                   .replace(u'}', u'\\}')
+        return text.replace('\\', '\\\\') \
+                   .replace('{', '\\{') \
+                   .replace('}', '\\}')
 
     def _escape_text(self, text):
         # empty strings, should give a small performance improvement
         if not text:
-            return u''
+            return ''
 
         # escape text
         text = self._escape(text)
@@ -85,21 +85,21 @@ def _escape_text(self, text):
                 buf.append(str(c))
             elif (2**7) <= cn < (2**16):
                 # single unicode escape sequence
-                buf.append(u'{\\u%d}' % cn)
+                buf.append('{\\u%d}' % cn)
             elif (2**16) <= cn:
                 # RTF limits unicode to 16 bits.
                 # Force surrogate pairs
-                buf.append(u'{\\u%d}{\\u%d}' % _surrogatepair(cn))
+                buf.append('{\\u%d}{\\u%d}' % surrogatepair(cn))
 
-        return u''.join(buf).replace(u'\n', u'\\par\n')
+        return ''.join(buf).replace('\n', '\\par\n')
 
     def format_unencoded(self, tokensource, outfile):
         # rtf 1.8 header
-        outfile.write(u'{\\rtf1\\ansi\\uc0\\deff0'
-                      u'{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0%s;}}'
-                      u'{\\colortbl;' % (self.fontface and
-                                         u' ' + self._escape(self.fontface) or
-                                         u''))
+        outfile.write('{\\rtf1\\ansi\\uc0\\deff0'
+                      '{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0%s;}}'
+                      '{\\colortbl;' % (self.fontface and
+                                        ' ' + self._escape(self.fontface) or
+                                        ''))
 
         # convert colors and save them in a mapping to access them later.
         color_mapping = {}
@@ -108,15 +108,15 @@ def format_unencoded(self, tokensource, outfile):
             for color in style['color'], style['bgcolor'], style['border']:
                 if color and color not in color_mapping:
                     color_mapping[color] = offset
-                    outfile.write(u'\\red%d\\green%d\\blue%d;' % (
+                    outfile.write('\\red%d\\green%d\\blue%d;' % (
                         int(color[0:2], 16),
                         int(color[2:4], 16),
                         int(color[4:6], 16)
                     ))
                     offset += 1
-        outfile.write(u'}\\f0 ')
+        outfile.write('}\\f0 ')
         if self.fontsize:
-            outfile.write(u'\\fs%d' % (self.fontsize))
+            outfile.write('\\fs%d' % self.fontsize)
 
         # highlight stream
         for ttype, value in tokensource:
@@ -125,23 +125,23 @@ def format_unencoded(self, tokensource, outfile):
             style = self.style.style_for_token(ttype)
             buf = []
             if style['bgcolor']:
-                buf.append(u'\\cb%d' % color_mapping[style['bgcolor']])
+                buf.append('\\cb%d' % color_mapping[style['bgcolor']])
             if style['color']:
-                buf.append(u'\\cf%d' % color_mapping[style['color']])
+                buf.append('\\cf%d' % color_mapping[style['color']])
             if style['bold']:
-                buf.append(u'\\b')
+                buf.append('\\b')
             if style['italic']:
-                buf.append(u'\\i')
+                buf.append('\\i')
             if style['underline']:
-                buf.append(u'\\ul')
+                buf.append('\\ul')
             if style['border']:
-                buf.append(u'\\chbrdr\\chcfpat%d' %
+                buf.append('\\chbrdr\\chcfpat%d' %
                            color_mapping[style['border']])
-            start = u''.join(buf)
+            start = ''.join(buf)
             if start:
-                outfile.write(u'{%s ' % start)
+                outfile.write('{%s ' % start)
             outfile.write(self._escape_text(value))
             if start:
-                outfile.write(u'}')
+                outfile.write('}')
 
-        outfile.write(u'}')
+        outfile.write('}')
diff --git a/pygments/lexer.py b/pygments/lexer.py
@@ -166,11 +166,11 @@ def get_tokens(self, text, unfiltered=False):
                 text = decoded
             else:
                 text = text.decode(self.encoding)
-                if text.startswith(u'\ufeff'):
-                    text = text[len(u'\ufeff'):]
+                if text.startswith('\ufeff'):
+                    text = text[len('\ufeff'):]
         else:
-            if text.startswith(u'\ufeff'):
-                text = text[len(u'\ufeff'):]
+            if text.startswith('\ufeff'):
+                text = text[len('\ufeff'):]
 
         # text now *is* a unicode string
         text = text.replace('\r\n', '\n')
@@ -663,7 +663,7 @@ def get_tokens_unprocessed(self, text, stack=('root',)):
                         # at EOL, reset state to "root"
                         statestack = ['root']
                         statetokens = tokendefs['root']
-                        yield pos, Text, u'\n'
+                        yield pos, Text, '\n'
                         pos += 1
                         continue
                     yield pos, Error, text[pos]
@@ -751,7 +751,7 @@ def get_tokens_unprocessed(self, text=None, context=None):
                         # at EOL, reset state to "root"
                         ctx.stack = ['root']
                         statetokens = tokendefs['root']
-                        yield ctx.pos, Text, u'\n'
+                        yield ctx.pos, Text, '\n'
                         ctx.pos += 1
                         continue
                     yield ctx.pos, Error, text[ctx.pos]

diff --git a/pygments/lexers/apl.py b/pygments/lexers/apl.py
@@ -35,7 +35,7 @@ class APLLexer(RegexLexer):
             # Comment
             # =======
             # '⍝' is traditional; '#' is supported by GNU APL and NGN (but not Dyalog)
-            (u'[⍝#].*$', Comment.Single),
+            (r'[⍝#].*$', Comment.Single),
             #
             # Strings
             # =======
@@ -46,7 +46,7 @@ class APLLexer(RegexLexer):
             # ===========
             # This token type is used for diamond and parenthesis
             # but not for bracket and ; (see below)
-            (u'[⋄◇()]', Punctuation),
+            (r'[⋄◇()]', Punctuation),
             #
             # Array indexing
             # ==============
@@ -57,45 +57,45 @@ class APLLexer(RegexLexer):
             # Distinguished names
             # ===================
             # following IBM APL2 standard
-            (u'⎕[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Function),
+            (r'⎕[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Function),
             #
             # Labels
             # ======
             # following IBM APL2 standard
-            # (u'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*:', Name.Label),
+            # (r'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*:', Name.Label),
             #
             # Variables
             # =========
             # following IBM APL2 standard
-            (u'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Variable),
+            (r'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Variable),
             #
             # Numbers
             # =======
-            (u'¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞)'
-             u'([Jj]¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞))?',
+            (r'¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞)'
+             r'([Jj]¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞))?',
              Number),
             #
             # Operators
             # ==========
-            (u'[\\.\\\\\\/⌿⍀¨⍣⍨⍠⍤∘⌸&⌶@⌺⍥⍛⍢]', Name.Attribute),  # closest token type
-            (u'[+\\-×÷⌈⌊∣|⍳?*⍟○!⌹<≤=>≥≠≡≢∊⍷∪∩~∨∧⍱⍲⍴,⍪⌽⊖⍉↑↓⊂⊃⌷⍋⍒⊤⊥⍕⍎⊣⊢⍁⍂≈⌸⍯↗⊆⊇⍸√⌾…⍮]',
+            (r'[\.\\\/⌿⍀¨⍣⍨⍠⍤∘⌸&⌶@⌺⍥⍛⍢]', Name.Attribute),  # closest token type
+            (r'[+\-×÷⌈⌊∣|⍳?*⍟○!⌹<≤=>≥≠≡≢∊⍷∪∩~∨∧⍱⍲⍴,⍪⌽⊖⍉↑↓⊂⊃⌷⍋⍒⊤⊥⍕⍎⊣⊢⍁⍂≈⌸⍯↗⊆⊇⍸√⌾…⍮]',
              Operator),
             #
             # Constant
             # ========
-            (u'⍬', Name.Constant),
+            (r'⍬', Name.Constant),
             #
             # Quad symbol
             # ===========
-            (u'[⎕⍞]', Name.Variable.Global),
+            (r'[⎕⍞]', Name.Variable.Global),
             #
             # Arrows left/right
             # =================
-            (u'[←→]', Keyword.Declaration),
+            (r'[←→]', Keyword.Declaration),
             #
             # D-Fn
             # ====
-            (u'[⍺⍵⍶⍹∇:]', Name.Builtin.Pseudo),
+            (r'[⍺⍵⍶⍹∇:]', Name.Builtin.Pseudo),
             (r'[{}]', Keyword.Type),
         ],
     }
diff --git a/pygments/lexers/archetype.py b/pygments/lexers/archetype.py
@@ -212,9 +212,9 @@ class CadlLexer(AtomsLexer):
             (r'(not)\W', Operator),
             (r'(matches|is_in)\W', Operator),
             # is_in / not is_in char
-            (u'(\u2208|\u2209)', Operator),
+            ('(\u2208|\u2209)', Operator),
             # there_exists / not there_exists / for_all / and / or
-            (u'(\u2203|\u2204|\u2200|\u2227|\u2228|\u22BB|\223C)',
+            ('(\u2203|\u2204|\u2200|\u2227|\u2228|\u22BB|\223C)',
              Operator),
             # regex in slot or as string constraint
             (r'(\{)(\s*/[^}]+/\s*)(\})',

diff --git a/pygments/lexers/erlang.py b/pygments/lexers/erlang.py
@@ -504,7 +504,7 @@ def get_tokens_unprocessed(self, text):
         insertions = []
         for match in line_re.finditer(text):
             line = match.group()
-            if line.startswith(u'** '):
+            if line.startswith('** '):
                 in_error = True
                 insertions.append((len(curcode),
                                    [(0, Generic.Error, line[:-1])]))