Skip to content

Commit

Permalink
Git issue 473: Emoji classified as letter
Browse files Browse the repository at this point in the history
The values for GC:Assigned and GC:LC were flipped.
  • Loading branch information
Matthew Barnett committed Jul 8, 2022
1 parent 8a514bc commit 5c9b260
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 9 deletions.
6 changes: 3 additions & 3 deletions regex_3/_regex_unicode.c
Expand Up @@ -2451,9 +2451,9 @@ RE_PropertyValue re_property_values[] = {
{1226, 6, 36}, /* SEPARATOR */
{1455, 6, 36}, /* Z */
{1456, 6, 36}, /* Z& */
{ 317, 6, 37}, /* CASEDLETTER */
{ 820, 6, 37}, /* LC */
{ 224, 6, 38}, /* ASSIGNED */
{ 224, 6, 37}, /* ASSIGNED */
{ 317, 6, 38}, /* CASEDLETTER */
{ 820, 6, 38}, /* LC */
{1092, 7, 0}, /* OTHER */
{1439, 7, 0}, /* XX */
{ 445, 7, 1}, /* CONTROL */
Expand Down
2 changes: 1 addition & 1 deletion regex_3/regex.py
Expand Up @@ -241,7 +241,7 @@
"VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
"__doc__", "RegexFlag"]

__version__ = "2.5.115"
__version__ = "2.5.116"

# --------------------------------------------------------------------
# Public interface.
Expand Down
4 changes: 4 additions & 0 deletions regex_3/test_regex.py
Expand Up @@ -4313,6 +4313,10 @@ def test_hg_bugs(self):
self.assertEqual(regex.match(r'(?a:\w)\w', 'd\N{CYRILLIC SMALL LETTER ZHE}').span(), (0, 2))
self.assertEqual(regex.match(r'(?a:\w)(?u:\w)', 'd\N{CYRILLIC SMALL LETTER ZHE}').span(), (0, 2))

# Git issue 473: Emoji classified as letter
self.assertEqual(regex.match(r'^\p{LC}+$', '\N{SMILING CAT FACE WITH OPEN MOUTH}'), None)
self.assertEqual(regex.match(r'^\p{So}+$', '\N{SMILING CAT FACE WITH OPEN MOUTH}').span(), (0, 1))

def test_fuzzy_ext(self):
self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')),
True)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -7,7 +7,7 @@

setup(
name='regex',
version='2022.6.2',
version='2022.7.9',
description='Alternative regular expression module, to replace re.',
long_description=long_description,
long_description_content_type='text/x-rst',
Expand Down
8 changes: 4 additions & 4 deletions tools/build_regex_unicode.py
Expand Up @@ -1398,7 +1398,7 @@ def make_key(value):
val_list = list(unique(property['values'].values(), key=id))

for value in sorted(val_list, key=lambda val: val['id']):
valueset.append(tuple(value['names']))
valueset.append((value['id'], tuple(value['names'])))

valueset_id = valueset_dict.setdefault(tuple(valueset),
len(valueset_dict))
Expand Down Expand Up @@ -1477,10 +1477,10 @@ def make_key(names):

for valset, valset_id in sorted(valueset_dict.items(), key=lambda pair:
pair[1]):
if valset_id == gc_valset_id:
valset = sorted(valset, key=make_key)
for val_id, names in valset:
if valset_id == gc_valset_id:
names = sorted(names, key=make_key)

for val_id, names in enumerate(valset):
for name in names:
c_file.write(''' {{{:4}, {:2}, {:3}}}, /* {} */\n'''.format(strings_dict[munge(name)],
valset_id, val_id, munge(name)))
Expand Down

0 comments on commit 5c9b260

Please sign in to comment.