Skip to content

Commit

Permalink
#455 Fix bug with tricky unicode symbols (#1047)
Browse files Browse the repository at this point in the history
* add test for special unicode symbol which usual re can not process correctly
add regex lib which supports unicode 12.1.0 standard
replace re usage in project in favor to regex

* #455 fix dependency
  • Loading branch information
dyus authored and zsol committed Oct 13, 2019
1 parent faaa2c8 commit 6aef6c9
Show file tree
Hide file tree
Showing 11 changed files with 100 additions and 73 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ toml = ">=0.9.4"
black = {path = ".",extras = ["d"],editable = true}
aiohttp-cors = "*"
typed-ast = ">=1.3.1"
regex = "*"

[dev-packages]
pre-commit = "*"
Expand Down
142 changes: 76 additions & 66 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions black.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import os
from pathlib import Path
import pickle
import re
import regex as re
import signal
import sys
import tempfile
Expand Down Expand Up @@ -3810,7 +3810,8 @@ def re_compile_maybe_verbose(regex: str) -> Pattern[str]:
"""
if "\n" in regex:
regex = "(?x)" + regex
return re.compile(regex)
compiled: Pattern[str] = re.compile(regex)
return compiled


def enumerate_reversed(sequence: Sequence[T]) -> Iterator[Tuple[Index, T]]:
Expand Down
2 changes: 1 addition & 1 deletion blib2to3/pgen2/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"""

# Python imports
import re
import regex as re

# Local imports
from pgen2 import grammar, token
Expand Down
2 changes: 1 addition & 1 deletion blib2to3/pgen2/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

"""Safely evaluate Python string literals without using eval()."""

import re
import regex as re

simple_escapes = {"a": "\a",
"b": "\b",
Expand Down
2 changes: 1 addition & 1 deletion blib2to3/pgen2/tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
__credits__ = \
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'

import re
import regex as re
from codecs import BOM_UTF8, lookup
from blib2to3.pgen2.token import *

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
from pathlib import Path
import re
import regex as re
import shutil
import string

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ click = "^6.5"
toml = "^0.9.4"
appdirs = "^1.4"
aiohttp = { version = "^3.4", optional = true }
regex = "^2019.8"

[tool.poetry.extras]
d = ["aiohttp"]
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def get_long_description() -> str:
"appdirs",
"toml>=0.9.4",
"typed-ast>=1.3.1",
"regex",
],
extras_require={"d": ["aiohttp>=3.3.2", "aiohttp-cors"]},
test_suite="tests.test_black",
Expand Down
6 changes: 6 additions & 0 deletions tests/data/tricky_unicode_symbols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
ä = 1
µ = 2
= 3
x󠄀 = 4
មុ = 1
Q̇_per_meter = 4
9 changes: 8 additions & 1 deletion tests/test_black.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from io import BytesIO, TextIOWrapper
import os
from pathlib import Path
import re
import regex as re
import sys
from tempfile import TemporaryDirectory
from typing import Any, BinaryIO, Generator, List, Tuple, Iterator, TypeVar
Expand Down Expand Up @@ -1245,6 +1245,13 @@ def test_read_cache_line_lengths(self) -> None:
two = black.read_cache(short_mode)
self.assertNotIn(path, two)

def test_tricky_unicode_symbols(self) -> None:
source, expected = read_data("tricky_unicode_symbols")
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())

def test_single_file_force_pyi(self) -> None:
reg_mode = black.FileMode()
pyi_mode = black.FileMode(is_pyi=True)
Expand Down

0 comments on commit 6aef6c9

Please sign in to comment.