Skip to content

Commit

Permalink
Update to PCRE2 10.35
Browse files Browse the repository at this point in the history
  • Loading branch information
cmb69 committed Jun 30, 2020
1 parent b3698ed commit b419f96
Show file tree
Hide file tree
Showing 39 changed files with 4,595 additions and 3,261 deletions.
10 changes: 7 additions & 3 deletions ext/pcre/pcre2lib/pcre2.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, second API, to be
#included by applications that call PCRE2 functions.
Copyright (c) 2016-2019 University of Cambridge
Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */

#define PCRE2_MAJOR 10
#define PCRE2_MINOR 34
#define PCRE2_MINOR 35
#define PCRE2_PRERELEASE
#define PCRE2_DATE 2019-11-21
#define PCRE2_DATE 2020-05-09

/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE2, the appropriate
Expand Down Expand Up @@ -181,6 +181,9 @@ pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
#define PCRE2_SUBSTITUTE_MATCHED 0x00010000u /* pcre2_substitute() only */
#define PCRE2_SUBSTITUTE_REPLACEMENT_ONLY 0x00020000u /* pcre2_substitute() only */

/* Options for pcre2_pattern_convert(). */

Expand Down Expand Up @@ -445,6 +448,7 @@ released, the numbers must not be changed. */
#define PCRE2_CONFIG_HEAPLIMIT 12
#define PCRE2_CONFIG_NEVER_BACKSLASH_C 13
#define PCRE2_CONFIG_COMPILED_WIDTHS 14
#define PCRE2_CONFIG_TABLES_LENGTH 15


/* Types for code units in patterns and subject strings. */
Expand Down
34 changes: 20 additions & 14 deletions ext/pcre/pcre2lib/pcre2_auto_possess.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -292,6 +292,7 @@ possessification, and if so, fills a list with its properties.
Arguments:
code points to start of expression
utf TRUE if in UTF mode
ucp TRUE if in UCP mode
fcc points to the case-flipping table
list points to output list
list[0] will be filled with the opcode
Expand All @@ -304,7 +305,7 @@ Returns: points to the start of the next opcode if *code is accepted
*/

static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
uint32_t *list)
{
PCRE2_UCHAR c = *code;
Expand All @@ -316,7 +317,8 @@ uint32_t chr;
uint32_t *clist_dest;
const uint32_t *clist_src;
#else
(void)utf; /* Suppress "unused parameter" compiler warning */
(void)utf; /* Suppress "unused parameter" compiler warnings */
(void)ucp;
#endif

list[0] = c;
Expand Down Expand Up @@ -396,7 +398,7 @@ switch(c)
list[2] = chr;

#ifdef SUPPORT_UNICODE
if (chr < 128 || (chr < 256 && !utf))
if (chr < 128 || (chr < 256 && !utf && !ucp))
list[3] = fcc[chr];
else
list[3] = UCD_OTHERCASE(chr);
Expand Down Expand Up @@ -503,6 +505,7 @@ which case the base cannot be possessified.
Arguments:
code points to the byte code
utf TRUE in UTF mode
ucp TRUE in UCP mode
cb compile data block
base_list the data list of the base opcode
base_end the end of the base opcode
Expand All @@ -512,7 +515,7 @@ Returns: TRUE if the auto-possessification is possible
*/

static BOOL
compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
{
PCRE2_UCHAR c;
Expand Down Expand Up @@ -651,7 +654,7 @@ for(;;)

while (*next_code == OP_ALT)
{
if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit))
if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
return FALSE;
code = next_code + 1 + LINK_SIZE;
next_code += GET(next_code, 1);
Expand All @@ -672,7 +675,8 @@ for(;;)
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */

next_code += 1 + LINK_SIZE;
if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit))
if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
rec_limit))
return FALSE;

code += PRIV(OP_lengths)[c];
Expand All @@ -688,7 +692,7 @@ for(;;)
/* We now have the next appropriate opcode to compare with the base. Check
for a supported opcode, and load its properties. */

code = get_chr_property_list(code, utf, cb->fcc, list);
code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
if (code == NULL) return FALSE; /* Unsupported */

/* If either opcode is a small character list, set pointers for comparing
Expand Down Expand Up @@ -1100,21 +1104,22 @@ leaving the remainder of the pattern unpossessified.
Arguments:
code points to start of the byte code
utf TRUE in UTF mode
cb compile data block
Returns: 0 for success
-1 if a non-existant opcode is encountered
*/

int
PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
{
PCRE2_UCHAR c;
PCRE2_SPTR end;
PCRE2_UCHAR *repeat_opcode;
uint32_t list[8];
int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */
BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;

for (;;)
{
Expand All @@ -1126,10 +1131,11 @@ for (;;)
{
c -= get_repeat_base(c) - OP_STAR;
end = (c <= OP_MINUPTO) ?
get_chr_property_list(code, utf, cb->fcc, list) : NULL;
get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;

if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit))
if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
&rec_limit))
{
switch(c)
{
Expand Down Expand Up @@ -1181,11 +1187,11 @@ for (;;)
if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
{
/* end must not be NULL. */
end = get_chr_property_list(code, utf, cb->fcc, list);
end = get_chr_property_list(code, utf, ucp, cb->fcc, list);

list[1] = (c & 1) == 0;

if (compare_opcodes(end, utf, cb, list, end, &rec_limit))
if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
{
switch (c)
{
Expand Down
38 changes: 21 additions & 17 deletions ext/pcre/pcre2lib/pcre2_chartables.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@
* Perl-Compatible Regular Expressions *
*************************************************/

/* This file was automatically written by the dftables auxiliary
/* This file was automatically written by the pcre2_dftables auxiliary
program. It contains character tables that are used when no external
tables are passed to PCRE2 by the application that calls it. The tables
are used only for characters whose code values are less than 256. */

/*The dftables program (which is distributed with PCRE2) can be used to
build alternative versions of this file. This is necessary if you are
/* This set of tables was written in the C locale. */

/* The pcre2_ftables program (which is distributed with PCRE2) can be used
to build alternative versions of this file. This is necessary if you are
running in an EBCDIC environment, or if you want to default to a different
encoding, for example ISO-8859-1. When dftables is run, it creates these
tables in the current locale. This happens automatically if PCRE2 is
configured with --enable-rebuild-chartables. */
encoding, for example ISO-8859-1. When pcre2_dftables is run, it creates
these tables in the "C" locale by default. This happens automatically if
PCRE2 is configured with --enable-rebuild-chartables. However, you can run
pcre2_dftables manually with the -L option to build tables using the LC_ALL
locale. */

/* The following #include is present because without it gcc 4.x may remove
the array definition from the final binary if PCRE2 is built into a static
Expand Down Expand Up @@ -102,54 +106,54 @@ const uint8_t PRIV(default_tables)[] = {
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
graph print, punct, and cntrl. Other classes are built from combinations. */
graph, print, punct, and cntrl. Other classes are built from combinations. */

0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00, /* space */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* xdigit */
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* digit */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* upper */
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* lower */
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03, /* word */
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff, /* graph */
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff, /* print */
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc, /* punct */
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,

0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00, /* cntrl */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
Expand Down

0 comments on commit b419f96

Please sign in to comment.