Skip to content

Commit

Permalink
all: remove "u" string prefix (#1536)
Browse files Browse the repository at this point in the history
* all: remove "u" string prefix

* util: remove unirange

Since Python 3.3, all builds are wide unicode compatible.

* unistring: remove support for narrow-unicode builds

which stopped being relevant with Python 3.3
  • Loading branch information
birkenfeld committed Sep 8, 2020
1 parent d9a9e9e commit 9f56726
Show file tree
Hide file tree
Showing 76 changed files with 3,632 additions and 3,745 deletions.
12 changes: 6 additions & 6 deletions doc/conf.py
Expand Up @@ -34,8 +34,8 @@
master_doc = 'index'

# General information about the project.
project = u'Pygments'
copyright = u'2006-2020, Georg Brandl and Pygments contributors'
project = 'Pygments'
copyright = '2006-2020, Georg Brandl and Pygments contributors'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
Expand Down Expand Up @@ -180,8 +180,8 @@
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('docs/index', 'Pygments.tex', u'Pygments Documentation',
u'Pygments authors', 'manual'),
('docs/index', 'Pygments.tex', 'Pygments Documentation',
'Pygments authors', 'manual'),
]

# The name of an image file (relative to this directory) to place at the top of
Expand Down Expand Up @@ -210,8 +210,8 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('docs/index', 'pygments', u'Pygments Documentation',
[u'Pygments authors'], 1)
('docs/index', 'pygments', 'Pygments Documentation',
['Pygments authors'], 1)
]

# If true, show URL addresses after external links.
Expand Down
2 changes: 1 addition & 1 deletion external/rst-directive.py
Expand Up @@ -76,7 +76,7 @@ def run(self):
lexer = TextLexer()
# take an arbitrary option if more than one is given
formatter = self.options and VARIANTS[list(self.options)[0]] or DEFAULT
parsed = highlight(u'\n'.join(self.content), lexer, formatter)
parsed = highlight('\n'.join(self.content), lexer, formatter)
return [nodes.raw('', parsed, format='html')]

directives.register_directive('sourcecode', Pygments)
1,124 changes: 562 additions & 562 deletions pygments/filters/__init__.py

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions pygments/formatters/html.py
Expand Up @@ -27,11 +27,11 @@


_escape_html_table = {
ord('&'): u'&',
ord('<'): u'&lt;',
ord('>'): u'&gt;',
ord('"'): u'&quot;',
ord("'"): u'&#39;',
ord('&'): '&amp;',
ord('<'): '&lt;',
ord('>'): '&gt;',
ord('"'): '&quot;',
ord("'"): '&#39;',
}


Expand Down Expand Up @@ -433,7 +433,7 @@ def __init__(self, **options):
self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
self.linenospecial = abs(get_int_opt(options, 'linenospecial', 0))
self.nobackground = get_bool_opt(options, 'nobackground', False)
self.lineseparator = options.get('lineseparator', u'\n')
self.lineseparator = options.get('lineseparator', '\n')
self.lineanchors = options.get('lineanchors', '')
self.linespans = options.get('linespans', '')
self.anchorlinenos = get_bool_opt(options, 'anchorlinenos', False)
Expand Down
18 changes: 9 additions & 9 deletions pygments/formatters/latex.py
Expand Up @@ -265,7 +265,7 @@ def __init__(self, **options):
self.right = self.escapeinside[1]
else:
self.escapeinside = ''
self.envname = options.get('envname', u'Verbatim')
self.envname = options.get('envname', 'Verbatim')

self._create_stylesheet()

Expand Down Expand Up @@ -336,17 +336,17 @@ def format_unencoded(self, tokensource, outfile):
realoutfile = outfile
outfile = StringIO()

outfile.write(u'\\begin{' + self.envname + u'}[commandchars=\\\\\\{\\}')
outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}')
if self.linenos:
start, step = self.linenostart, self.linenostep
outfile.write(u',numbers=left' +
(start and u',firstnumber=%d' % start or u'') +
(step and u',stepnumber=%d' % step or u''))
outfile.write(',numbers=left' +
(start and ',firstnumber=%d' % start or '') +
(step and ',stepnumber=%d' % step or ''))
if self.mathescape or self.texcomments or self.escapeinside:
outfile.write(u',codes={\\catcode`\\$=3\\catcode`\\^=7\\catcode`\\_=8}')
outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7\\catcode`\\_=8}')
if self.verboptions:
outfile.write(u',' + self.verboptions)
outfile.write(u']\n')
outfile.write(',' + self.verboptions)
outfile.write(']\n')

for ttype, value in tokensource:
if ttype in Token.Comment:
Expand Down Expand Up @@ -409,7 +409,7 @@ def format_unencoded(self, tokensource, outfile):
else:
outfile.write(value)

outfile.write(u'\\end{' + self.envname + u'}\n')
outfile.write('\\end{' + self.envname + '}\n')

if self.full:
encoding = self.encoding or 'utf8'
Expand Down
8 changes: 4 additions & 4 deletions pygments/formatters/other.py
Expand Up @@ -119,12 +119,12 @@ def write(text):
flush()


TESTCASE_BEFORE = u'''\
TESTCASE_BEFORE = '''\
def testNeedsName(lexer):
fragment = %r
tokens = [
'''
TESTCASE_AFTER = u'''\
TESTCASE_AFTER = '''\
]
assert list(lexer.get_tokens(fragment)) == tokens
'''
Expand Down Expand Up @@ -152,8 +152,8 @@ def format(self, tokensource, outfile):
rawbuf.append(value)
outbuf.append('%s(%s, %r),\n' % (indentation, ttype, value))

before = TESTCASE_BEFORE % (u''.join(rawbuf),)
during = u''.join(outbuf)
before = TESTCASE_BEFORE % (''.join(rawbuf),)
during = ''.join(outbuf)
after = TESTCASE_AFTER
if self.encoding is None:
outfile.write(before + during + after)
Expand Down
52 changes: 26 additions & 26 deletions pygments/formatters/rtf.py
Expand Up @@ -10,7 +10,7 @@
"""

from pygments.formatter import Formatter
from pygments.util import get_int_opt, _surrogatepair
from pygments.util import get_int_opt, surrogatepair


__all__ = ['RtfFormatter']
Expand Down Expand Up @@ -65,14 +65,14 @@ def __init__(self, **options):
self.fontsize = get_int_opt(options, 'fontsize', 0)

def _escape(self, text):
return text.replace(u'\\', u'\\\\') \
.replace(u'{', u'\\{') \
.replace(u'}', u'\\}')
return text.replace('\\', '\\\\') \
.replace('{', '\\{') \
.replace('}', '\\}')

def _escape_text(self, text):
# empty strings, should give a small performance improvement
if not text:
return u''
return ''

# escape text
text = self._escape(text)
Expand All @@ -85,21 +85,21 @@ def _escape_text(self, text):
buf.append(str(c))
elif (2**7) <= cn < (2**16):
# single unicode escape sequence
buf.append(u'{\\u%d}' % cn)
buf.append('{\\u%d}' % cn)
elif (2**16) <= cn:
# RTF limits unicode to 16 bits.
# Force surrogate pairs
buf.append(u'{\\u%d}{\\u%d}' % _surrogatepair(cn))
buf.append('{\\u%d}{\\u%d}' % surrogatepair(cn))

return u''.join(buf).replace(u'\n', u'\\par\n')
return ''.join(buf).replace('\n', '\\par\n')

def format_unencoded(self, tokensource, outfile):
# rtf 1.8 header
outfile.write(u'{\\rtf1\\ansi\\uc0\\deff0'
u'{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0%s;}}'
u'{\\colortbl;' % (self.fontface and
u' ' + self._escape(self.fontface) or
u''))
outfile.write('{\\rtf1\\ansi\\uc0\\deff0'
'{\\fonttbl{\\f0\\fmodern\\fprq1\\fcharset0%s;}}'
'{\\colortbl;' % (self.fontface and
' ' + self._escape(self.fontface) or
''))

# convert colors and save them in a mapping to access them later.
color_mapping = {}
Expand All @@ -108,15 +108,15 @@ def format_unencoded(self, tokensource, outfile):
for color in style['color'], style['bgcolor'], style['border']:
if color and color not in color_mapping:
color_mapping[color] = offset
outfile.write(u'\\red%d\\green%d\\blue%d;' % (
outfile.write('\\red%d\\green%d\\blue%d;' % (
int(color[0:2], 16),
int(color[2:4], 16),
int(color[4:6], 16)
))
offset += 1
outfile.write(u'}\\f0 ')
outfile.write('}\\f0 ')
if self.fontsize:
outfile.write(u'\\fs%d' % (self.fontsize))
outfile.write('\\fs%d' % self.fontsize)

# highlight stream
for ttype, value in tokensource:
Expand All @@ -125,23 +125,23 @@ def format_unencoded(self, tokensource, outfile):
style = self.style.style_for_token(ttype)
buf = []
if style['bgcolor']:
buf.append(u'\\cb%d' % color_mapping[style['bgcolor']])
buf.append('\\cb%d' % color_mapping[style['bgcolor']])
if style['color']:
buf.append(u'\\cf%d' % color_mapping[style['color']])
buf.append('\\cf%d' % color_mapping[style['color']])
if style['bold']:
buf.append(u'\\b')
buf.append('\\b')
if style['italic']:
buf.append(u'\\i')
buf.append('\\i')
if style['underline']:
buf.append(u'\\ul')
buf.append('\\ul')
if style['border']:
buf.append(u'\\chbrdr\\chcfpat%d' %
buf.append('\\chbrdr\\chcfpat%d' %
color_mapping[style['border']])
start = u''.join(buf)
start = ''.join(buf)
if start:
outfile.write(u'{%s ' % start)
outfile.write('{%s ' % start)
outfile.write(self._escape_text(value))
if start:
outfile.write(u'}')
outfile.write('}')

outfile.write(u'}')
outfile.write('}')
12 changes: 6 additions & 6 deletions pygments/lexer.py
Expand Up @@ -166,11 +166,11 @@ def get_tokens(self, text, unfiltered=False):
text = decoded
else:
text = text.decode(self.encoding)
if text.startswith(u'\ufeff'):
text = text[len(u'\ufeff'):]
if text.startswith('\ufeff'):
text = text[len('\ufeff'):]
else:
if text.startswith(u'\ufeff'):
text = text[len(u'\ufeff'):]
if text.startswith('\ufeff'):
text = text[len('\ufeff'):]

# text now *is* a unicode string
text = text.replace('\r\n', '\n')
Expand Down Expand Up @@ -663,7 +663,7 @@ def get_tokens_unprocessed(self, text, stack=('root',)):
# at EOL, reset state to "root"
statestack = ['root']
statetokens = tokendefs['root']
yield pos, Text, u'\n'
yield pos, Text, '\n'
pos += 1
continue
yield pos, Error, text[pos]
Expand Down Expand Up @@ -751,7 +751,7 @@ def get_tokens_unprocessed(self, text=None, context=None):
# at EOL, reset state to "root"
ctx.stack = ['root']
statetokens = tokendefs['root']
yield ctx.pos, Text, u'\n'
yield ctx.pos, Text, '\n'
ctx.pos += 1
continue
yield ctx.pos, Error, text[ctx.pos]
Expand Down
26 changes: 13 additions & 13 deletions pygments/lexers/apl.py
Expand Up @@ -35,7 +35,7 @@ class APLLexer(RegexLexer):
# Comment
# =======
# '⍝' is traditional; '#' is supported by GNU APL and NGN (but not Dyalog)
(u'[⍝#].*$', Comment.Single),
(r'[⍝#].*$', Comment.Single),
#
# Strings
# =======
Expand All @@ -46,7 +46,7 @@ class APLLexer(RegexLexer):
# ===========
# This token type is used for diamond and parenthesis
# but not for bracket and ; (see below)
(u'[⋄◇()]', Punctuation),
(r'[⋄◇()]', Punctuation),
#
# Array indexing
# ==============
Expand All @@ -57,45 +57,45 @@ class APLLexer(RegexLexer):
# Distinguished names
# ===================
# following IBM APL2 standard
(u'⎕[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Function),
(r'⎕[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Function),
#
# Labels
# ======
# following IBM APL2 standard
# (u'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*:', Name.Label),
# (r'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*:', Name.Label),
#
# Variables
# =========
# following IBM APL2 standard
(u'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Variable),
(r'[A-Za-zΔ∆⍙][A-Za-zΔ∆⍙_¯0-9]*', Name.Variable),
#
# Numbers
# =======
(u'¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞)'
u'([Jj]¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞))?',
(r'¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞)'
r'([Jj]¯?(0[Xx][0-9A-Fa-f]+|[0-9]*\.?[0-9]+([Ee][+¯]?[0-9]+)?|¯|∞))?',
Number),
#
# Operators
# ==========
(u'[\\.\\\\\\/⌿⍀¨⍣⍨⍠⍤∘⌸&⌶@⌺⍥⍛⍢]', Name.Attribute), # closest token type
(u'[+\\-×÷⌈⌊∣|⍳?*⍟○!⌹<≤=>≥≠≡≢∊⍷∪∩~∨∧⍱⍲⍴,⍪⌽⊖⍉↑↓⊂⊃⌷⍋⍒⊤⊥⍕⍎⊣⊢⍁⍂≈⌸⍯↗⊆⊇⍸√⌾…⍮]',
(r'[\.\\\/⌿⍀¨⍣⍨⍠⍤∘⌸&⌶@⌺⍥⍛⍢]', Name.Attribute), # closest token type
(r'[+\-×÷⌈⌊∣|⍳?*⍟○!⌹<≤=>≥≠≡≢∊⍷∪∩~∨∧⍱⍲⍴,⍪⌽⊖⍉↑↓⊂⊃⌷⍋⍒⊤⊥⍕⍎⊣⊢⍁⍂≈⌸⍯↗⊆⊇⍸√⌾…⍮]',
Operator),
#
# Constant
# ========
(u'⍬', Name.Constant),
(r'⍬', Name.Constant),
#
# Quad symbol
# ===========
(u'[⎕⍞]', Name.Variable.Global),
(r'[⎕⍞]', Name.Variable.Global),
#
# Arrows left/right
# =================
(u'[←→]', Keyword.Declaration),
(r'[←→]', Keyword.Declaration),
#
# D-Fn
# ====
(u'[⍺⍵⍶⍹∇:]', Name.Builtin.Pseudo),
(r'[⍺⍵⍶⍹∇:]', Name.Builtin.Pseudo),
(r'[{}]', Keyword.Type),
],
}
4 changes: 2 additions & 2 deletions pygments/lexers/archetype.py
Expand Up @@ -212,9 +212,9 @@ class CadlLexer(AtomsLexer):
(r'(not)\W', Operator),
(r'(matches|is_in)\W', Operator),
# is_in / not is_in char
(u'(\u2208|\u2209)', Operator),
('(\u2208|\u2209)', Operator),
# there_exists / not there_exists / for_all / and / or
(u'(\u2203|\u2204|\u2200|\u2227|\u2228|\u22BB|\223C)',
('(\u2203|\u2204|\u2200|\u2227|\u2228|\u22BB|\223C)',
Operator),
# regex in slot or as string constraint
(r'(\{)(\s*/[^}]+/\s*)(\})',
Expand Down
2 changes: 1 addition & 1 deletion pygments/lexers/erlang.py
Expand Up @@ -504,7 +504,7 @@ def get_tokens_unprocessed(self, text):
insertions = []
for match in line_re.finditer(text):
line = match.group()
if line.startswith(u'** '):
if line.startswith('** '):
in_error = True
insertions.append((len(curcode),
[(0, Generic.Error, line[:-1])]))
Expand Down

0 comments on commit 9f56726

Please sign in to comment.