Skip to content

Commit

Permalink
Git issue 477: \v for vertical spacing
Browse files Browse the repository at this point in the history
Added \p{HorizSpace} (\p{H}) and \p{VertSpace} (\p{V}).
  • Loading branch information
Matthew Barnett committed Aug 17, 2022
1 parent 0175830 commit 71be78a
Show file tree
Hide file tree
Showing 8 changed files with 1,304 additions and 1,226 deletions.
227 changes: 89 additions & 138 deletions README.rst

Large diffs are not rendered by default.

171 changes: 74 additions & 97 deletions docs/Features.html

Large diffs are not rendered by default.

2,073 changes: 1,102 additions & 971 deletions regex_3/_regex_unicode.c

Large diffs are not rendered by default.

34 changes: 18 additions & 16 deletions regex_3/_regex_unicode.h
Expand Up @@ -34,9 +34,9 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);

#define RE_PROP_GC 0x1E
#define RE_PROP_CASED 0xA
#define RE_PROP_UPPERCASE 0x56
#define RE_PROP_LOWERCASE 0x33
#define RE_PROP_SCX 0x50
#define RE_PROP_UPPERCASE 0x57
#define RE_PROP_LOWERCASE 0x34
#define RE_PROP_SCX 0x51

#define RE_PROP_C 30
#define RE_PROP_L 31
Expand Down Expand Up @@ -95,16 +95,16 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);
#define RE_PROP_CNTRL 0x1E0001
#define RE_PROP_DIGIT 0x1E0009
#define RE_PROP_GRAPH 0x1F0001
#define RE_PROP_LOWER 0x330001
#define RE_PROP_PRINT 0x4B0001
#define RE_PROP_SPACE 0x580001
#define RE_PROP_UPPER 0x560001
#define RE_PROP_WORD 0x590001
#define RE_PROP_XDIGIT 0x5B0001
#define RE_PROP_POSIX_ALNUM 0x460001
#define RE_PROP_POSIX_DIGIT 0x470001
#define RE_PROP_POSIX_PUNCT 0x480001
#define RE_PROP_POSIX_XDIGIT 0x490001
#define RE_PROP_LOWER 0x340001
#define RE_PROP_PRINT 0x4C0001
#define RE_PROP_SPACE 0x5A0001
#define RE_PROP_UPPER 0x570001
#define RE_PROP_WORD 0x5B0001
#define RE_PROP_XDIGIT 0x5D0001
#define RE_PROP_POSIX_ALNUM 0x470001
#define RE_PROP_POSIX_DIGIT 0x480001
#define RE_PROP_POSIX_PUNCT 0x490001
#define RE_PROP_POSIX_XDIGIT 0x4A0001

#define RE_WBREAK_OTHER 0
#define RE_WBREAK_LF 1
Expand Down Expand Up @@ -193,11 +193,11 @@ typedef RE_UINT32 (*RE_GetPropertyFunc)(RE_UINT32 codepoint);
#define RE_LBREAK_REGIONALINDICATOR 41
#define RE_LBREAK_EMODIFIER 42

extern char* re_strings[1472];
extern RE_Property re_properties[173];
extern char* re_strings[1474];
extern RE_Property re_properties[177];
extern RE_PropertyValue re_property_values[1619];
extern RE_UINT16 re_expand_on_folding[104];
extern RE_GetPropertyFunc re_get_property[94];
extern RE_GetPropertyFunc re_get_property[96];

RE_UINT32 re_get_alphabetic(RE_UINT32 codepoint);
RE_UINT32 re_get_alphanumeric(RE_UINT32 codepoint);
Expand Down Expand Up @@ -237,6 +237,7 @@ RE_UINT32 re_get_grapheme_extend(RE_UINT32 codepoint);
RE_UINT32 re_get_grapheme_link(RE_UINT32 codepoint);
RE_UINT32 re_get_hangul_syllable_type(RE_UINT32 codepoint);
RE_UINT32 re_get_hex_digit(RE_UINT32 codepoint);
RE_UINT32 re_get_horiz_space(RE_UINT32 codepoint);
RE_UINT32 re_get_hyphen(RE_UINT32 codepoint);
RE_UINT32 re_get_id_continue(RE_UINT32 codepoint);
RE_UINT32 re_get_ideographic(RE_UINT32 codepoint);
Expand Down Expand Up @@ -287,6 +288,7 @@ RE_UINT32 re_get_terminal_punctuation(RE_UINT32 codepoint);
RE_UINT32 re_get_unified_ideograph(RE_UINT32 codepoint);
RE_UINT32 re_get_uppercase(RE_UINT32 codepoint);
RE_UINT32 re_get_variation_selector(RE_UINT32 codepoint);
RE_UINT32 re_get_vert_space(RE_UINT32 codepoint);
RE_UINT32 re_get_white_space(RE_UINT32 codepoint);
RE_UINT32 re_get_word(RE_UINT32 codepoint);
RE_UINT32 re_get_word_break(RE_UINT32 codepoint);
Expand Down
2 changes: 1 addition & 1 deletion regex_3/regex.py
Expand Up @@ -241,7 +241,7 @@
"VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__",
"__doc__", "RegexFlag"]

__version__ = "2.5.118"
__version__ = "2.5.119"

# --------------------------------------------------------------------
# Public interface.
Expand Down
4 changes: 4 additions & 0 deletions regex_3/test_regex.py
Expand Up @@ -4321,6 +4321,10 @@ def test_hg_bugs(self):
self.assertEqual(regex.match(r'(.)+', 'abc').allcaptures(), (['abc'], ['a', 'b', 'c']))
self.assertEqual(regex.match(r'(.)+', 'abc').allspans(), ([(0, 3)], [(0, 1), (1, 2), (2, 3)]))

# Git issue 477: \v for vertical spacing
self.assertEqual(bool(regex.fullmatch(r'\p{HorizSpace}+', '\t \xA0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u202F\u205F\u3000')), True)
self.assertEqual(bool(regex.fullmatch(r'\p{VertSpace}+', '\n\v\f\r\x85\u2028\u2029')), True)

def test_fuzzy_ext(self):
self.assertEqual(bool(regex.fullmatch(r'(?r)(?:a){e<=1:[a-z]}', 'e')),
True)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -7,7 +7,7 @@

setup(
name='regex',
version='2022.7.25',
version='2022.8.17',
description='Alternative regular expression module, to replace re.',
long_description=long_description,
long_description_content_type='text/x-rst',
Expand Down
17 changes: 15 additions & 2 deletions tools/build_regex_unicode.py
Expand Up @@ -511,6 +511,9 @@ def make_binary_property(properties, names, codepoints):
for name in names:
properties[munge(name)] = property

def make_ranges(*values):
return Ranges((value, value) for value in values)

def make_additional_properties(unicode_data):

def get_values(prop_name):
Expand Down Expand Up @@ -616,6 +619,16 @@ def get_codepoints(prop_name, val_name):

make_binary_property(properties, ['Posix_XDigit'], posix_xdigit)

# Make the 'Horiz_Space' property.
horiz_space = make_ranges(0x09, 0x20, 0xA0, 0x1680, 0x180E) | Ranges([(0x2000, 0x200A)]) | make_ranges(0x202F, 0x205F, 0x3000)

make_binary_property(properties, ['Horiz_Space', 'H'], horiz_space)

# Make the 'Vert_Space' property.
vert_space = Ranges([(0x0A, 0x0D)]) | make_ranges(0x85, 0x2028, 0x2029)

make_binary_property(properties, ['Vert_Space', 'V'], vert_space)

def preferred(d):
return munge(d['names'][0])

Expand Down Expand Up @@ -1310,7 +1323,7 @@ def generate_code(unicode_data, tools_folder):
prop_list = list(unique(properties.values(), key=id))
prop_list.sort(key=preferred)

unicode_data['property_tablee_count'] = len(properties)
unicode_data['property_table_count'] = len(properties)
unicode_data['property_count'] = len(prop_list)

no_yes_maybe = {
Expand Down Expand Up @@ -1680,7 +1693,7 @@ def make_key(names):
h_file.write('\n')

h_file.write('extern char* re_strings[{}];\n'.format(unicode_data['string_count']))
h_file.write('extern RE_Property re_properties[{}];\n'.format(unicode_data['property_tablee_count']))
h_file.write('extern RE_Property re_properties[{}];\n'.format(unicode_data['property_table_count']))
h_file.write('extern RE_PropertyValue re_property_values[{}];\n'.format(unicode_data['valueset_table_count']))
h_file.write('extern RE_UINT16 re_expand_on_folding[{}];\n'.format(unicode_data['expanded_count']))
h_file.write('extern RE_GetPropertyFunc re_get_property[{}];\n'.format(unicode_data['property_count']))
Expand Down

0 comments on commit 71be78a

Please sign in to comment.