php
diff --git a/‎NEWS
Lines changed: 3 additions & 0 deletions b/‎NEWS
Lines changed: 3 additions & 0 deletions
diff --git a/‎ext/pcre/pcre2lib/pcre2.h
Lines changed: 7 additions & 5 deletions b/‎ext/pcre/pcre2lib/pcre2.h
Lines changed: 7 additions & 5 deletions
diff --git a/‎ext/pcre/pcre2lib/pcre2_compile.c
Lines changed: 24 additions & 10 deletions b/‎ext/pcre/pcre2lib/pcre2_compile.c
Lines changed: 24 additions & 10 deletions
diff --git a/‎ext/pcre/pcre2lib/pcre2_dfa_match.c
Lines changed: 44 additions & 25 deletions b/‎ext/pcre/pcre2lib/pcre2_dfa_match.c
Lines changed: 44 additions & 25 deletions
diff --git a/‎ext/pcre/pcre2lib/pcre2_error.c
Lines changed: 2 additions & 1 deletion b/‎ext/pcre/pcre2lib/pcre2_error.c
Lines changed: 2 additions & 1 deletion
@@ -6,6 +6,9 @@ PHP                                                                        NEWS
   . Fixed bug #81649 (imap_(un)delete accept sequences, not single numbers).
     (cmb)
 
+- PCRE:
+  . Update bundled PCRE2 to 10.39 (cmb)
+
 25 Nov 2021, PHP 8.1.0
 
 - Core:
 
@@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, second API, to be
 #included by applications that call PCRE2 functions.
 
-           Copyright (c) 2016-2020 University of Cambridge
+           Copyright (c) 2016-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE2_MAJOR           10
-#define PCRE2_MINOR           37
+#define PCRE2_MINOR           39
 #define PCRE2_PRERELEASE      
-#define PCRE2_DATE            2021-05-26
+#define PCRE2_DATE            2021-10-29
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE2, the appropriate
@@ -84,8 +84,8 @@ set, we ensure here that it has no effect. */
 /* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
 uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
 not have stdint.h, which is why we use inttypes.h, which according to the C
-standard is a superset of stdint.h. If none of these headers are available,
-the relevant values must be provided by some other means. */
+standard is a superset of stdint.h. If inttypes.h is not available the build
+will break and the relevant values must be provided by some other means. */
 
 #include <limits.h>
 #include <stdlib.h>
@@ -152,6 +152,7 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_EXTRA_MATCH_LINE               0x00000008u  /* C */
 #define PCRE2_EXTRA_ESCAPED_CR_IS_LF         0x00000010u  /* C */
 #define PCRE2_EXTRA_ALT_BSUX                 0x00000020u  /* C */
+#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     0x00000040u  /* C */
 
 /* These are for pcre2_jit_compile(). */
 
@@ -311,6 +312,7 @@ pcre2_pattern_convert(). */
 #define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE       196
 #define PCRE2_ERROR_TOO_MANY_CAPTURES              197
 #define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED  198
+#define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND      199
 
 
 /* "Expected" matching error codes: no match and partial match. */
 
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2020 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -137,7 +137,7 @@ static BOOL
 
 static int
   check_lookbehinds(uint32_t *, uint32_t **, parsed_recurse_check *,
-    compile_block *);
+    compile_block *, int *);
 
 
 /*************************************************
@@ -782,12 +782,15 @@ are allowed. */
 #define PUBLIC_COMPILE_EXTRA_OPTIONS \
    (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \
     PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \
-    PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX)
+    PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \
+    PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)
 
 /* Compile time error code numbers. They are given names so that they can more
 easily be tracked. When a new number is added, the tables called eint1 and
 eint2 in pcre2posix.c may need to be updated, and a new error text must be
-added to compile_error_texts in pcre2_error.c. */
+added to compile_error_texts in pcre2_error.c. Also, the error codes in
+pcre2.h.in must be updated - their values are exactly 100 greater than these
+values. */
 
 enum { ERR0 = COMPILE_ERROR_BASE,
        ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  ERR10,
@@ -799,7 +802,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
        ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
        ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
        ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
-       ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98 };
+       ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99 };
 
 /* This is a table of start-of-pattern options such as (*UTF) and settings such
 as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@@ -7799,6 +7802,16 @@ for (;; pptr++)
       }
 #endif
 
+    /* \K is forbidden in lookarounds since 10.38 because that's what Perl has
+    done. However, there's an option, in case anyone was relying on it. */
+
+    if (cb->assert_depth > 0 && meta_arg == ESC_K &&
+        (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0)
+      {
+      *errorcodeptr = ERR99;
+      return 0;
+      }
+
     /* For the rest (including \X when Unicode is supported - if not it's
     faulted at parse time), the OP value is the escape value when PCRE2_UCP is
     not set; if it is set, these escapes do not show up here because they are
@@ -9148,7 +9161,7 @@ for (;; pptr++)
     case META_LOOKAHEAD:
     case META_LOOKAHEADNOT:
     case META_LOOKAHEAD_NA:
-    *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb);
+    *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb, lcptr);
     if (*errcodeptr != 0) return -1;
 
     /* Ignore any qualifiers that follow a lookahead assertion. */
@@ -9488,16 +9501,16 @@ Arguments
   retptr    if not NULL, return the ket pointer here
   recurses  chain of recurse_check to catch mutual recursion
   cb        points to the compile block
+  lcptr     points to loop counter
 
 Returns:    0 on success, or an errorcode (cb->erroroffset will be set)
 */
 
 static int
 check_lookbehinds(uint32_t *pptr, uint32_t **retptr,
-  parsed_recurse_check *recurses, compile_block *cb)
+  parsed_recurse_check *recurses, compile_block *cb, int *lcptr)
 {
 int errorcode = 0;
-int loopcount = 0;
 int nestlevel = 0;
 
 cb->erroroffset = PCRE2_UNSET;
@@ -9623,7 +9636,7 @@ for (; *pptr != META_END; pptr++)
     case META_LOOKBEHIND:
     case META_LOOKBEHINDNOT:
     case META_LOOKBEHIND_NA:
-    if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount, recurses, cb))
+    if (!set_lookbehind_lengths(&pptr, &errorcode, lcptr, recurses, cb))
       return errorcode;
     break;
     }
@@ -10078,7 +10091,8 @@ lengths. */
 
 if (has_lookbehind)
   {
-  errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb);
+  int loopcount = 0;
+  errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb, &loopcount);
   if (errorcode != 0) goto HAD_CB_ERROR;
   }
 
 
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2020 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -3256,8 +3256,8 @@ BOOL has_first_cu = FALSE;
 BOOL has_req_cu = FALSE;
 
 #if PCRE2_CODE_UNIT_WIDTH == 8
-BOOL memchr_not_found_first_cu = FALSE;
-BOOL memchr_not_found_first_cu2 = FALSE;
+PCRE2_SPTR memchr_found_first_cu = NULL;
+PCRE2_SPTR memchr_found_first_cu2 = NULL;
 #endif
 
 PCRE2_UCHAR first_cu = 0;
@@ -3648,57 +3648,76 @@ for (;;)
         }
       }
 
-    /* Not anchored. Advance to a unique first code unit if there is one. In
-    8-bit mode, the use of memchr() gives a big speed up, even though we have
-    to call it twice in caseless mode, in order to find the earliest occurrence
-    of the character in either of its cases. If a call to memchr() that
-    searches the rest of the subject fails to find one case, remember that in
-    order not to keep on repeating the search. This can make a huge difference
-    when the strings are very long and only one case is present. */
+    /* Not anchored. Advance to a unique first code unit if there is one. */
 
     else
       {
       if (has_first_cu)
         {
         if (first_cu != first_cu2)  /* Caseless */
           {
+          /* In 16-bit and 32_bit modes we have to do our own search, so can
+          look for both cases at once. */
+
 #if PCRE2_CODE_UNIT_WIDTH != 8
           PCRE2_UCHAR smc;
           while (start_match < end_subject &&
                 (smc = UCHAR21TEST(start_match)) != first_cu &&
-                  smc != first_cu2)
+                 smc != first_cu2)
             start_match++;
+#else
+          /* In 8-bit mode, the use of memchr() gives a big speed up, even
+          though we have to call it twice in order to find the earliest
+          occurrence of the code unit in either of its cases. Caching is used
+          to remember the positions of previously found code units. This can
+          make a huge difference when the strings are very long and only one
+          case is actually present. */
 
-#else  /* 8-bit code units */
           PCRE2_SPTR pp1 = NULL;
           PCRE2_SPTR pp2 = NULL;
-          PCRE2_SIZE cu2size = end_subject - start_match;
+          PCRE2_SIZE searchlength = end_subject - start_match;
 
-          if (!memchr_not_found_first_cu)
+          /* If we haven't got a previously found position for first_cu, or if
+          the current starting position is later, we need to do a search. If
+          the code unit is not found, set it to the end. */
+
+          if (memchr_found_first_cu == NULL ||
+              start_match > memchr_found_first_cu)
             {
-            pp1 = memchr(start_match, first_cu, end_subject - start_match);
-            if (pp1 == NULL) memchr_not_found_first_cu = TRUE;
-              else cu2size = pp1 - start_match;
+            pp1 = memchr(start_match, first_cu, searchlength);
+            memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
             }
 
-          /* If pp1 is not NULL, we have arranged to search only as far as pp1,
-          to see if the other case is earlier, so we can set "not found" only
-          when both searches have returned NULL. */
+          /* If the start is before a previously found position, use the
+          previous position, or NULL if a previous search failed. */
+
+          else pp1 = (memchr_found_first_cu == end_subject)? NULL :
+            memchr_found_first_cu;
 
-          if (!memchr_not_found_first_cu2)
+          /* Do the same thing for the other case. */
+
+          if (memchr_found_first_cu2 == NULL ||
+              start_match > memchr_found_first_cu2)
             {
-            pp2 = memchr(start_match, first_cu2, cu2size);
-            memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
+            pp2 = memchr(start_match, first_cu2, searchlength);
+            memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
             }
 
+          else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
+            memchr_found_first_cu2;
+
+          /* Set the start to the end of the subject if neither case was found.
+          Otherwise, use the earlier found point. */
+
           if (pp1 == NULL)
             start_match = (pp2 == NULL)? end_subject : pp2;
           else
             start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
-#endif
+
+#endif  /* 8-bit handling */
           }
 
-        /* The caseful case */
+        /* The caseful case is much simpler. */
 
         else
           {
 
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -186,6 +186,7 @@ static const unsigned char compile_error_texts[] =
   "script runs require Unicode support, which this version of PCRE2 does not have\0"
   "too many capturing groups (maximum 65535)\0"
   "atomic assertion expected after (?( or (?(?C)\0"
+  "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
   ;
 
 /* Match-time and UTF error texts are in the same format. */