Skip to content

Commit

Permalink
Switch from pcre to pcre2
Browse files Browse the repository at this point in the history
The issue at [0] was opened and I just took a stab at it. I have no
prior experience with pcre and pcre2, but using [1,2] I hacked together
something that seems to work. Next, Michael told me to turn that
patch/hack into a PR, so here we are.

The dependency in meson.build now uses version:'>=10', but this is more
a random guess than actual knowledge.

There was a while loop in regex_new() that dealt with an error when pcre
was not compiled with UTF-8 support. This loop uses a magic constant of
32 for the error code. I just dropped this loop, because I was just
writing a hack and did not intend to turn this into a PR. Also, a quick "grep
32 /usr/include/pcre.h" does not find anything useful, so... *shrug*

pcre_study() was removed without replacement, so the corresponding code
is also simply removed.

Testing done: The test suite passes for me. YMMV.

[0]: i3#4682
[1]: https://www.pcre.org/current/doc/html/pcre2api.html
[2]: https://www.pcre.org/current/doc/html/pcre2demo.html

Signed-off-by: Uli Schlachter <psychon@znc.in>
Fixes: i3#4682
  • Loading branch information
psychon committed Nov 29, 2021
1 parent d44e144 commit 5cb544b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 27 deletions.
7 changes: 4 additions & 3 deletions include/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@
*/
#pragma once

#define PCRE2_CODE_UNIT_WIDTH 8

#define SN_API_NOT_YET_FROZEN 1
#include <libsn/sn-launcher.h>

#include <xcb/randr.h>
#include <pcre.h>
#include <pcre2.h>
#include <sys/time.h>
#include <cairo/cairo.h>

Expand Down Expand Up @@ -248,8 +250,7 @@ struct Startup_Sequence {
*/
struct regex {
char *pattern;
pcre *regex;
pcre_extra *extra;
pcre2_code *regex;
};

/**
Expand Down
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ xcb_util_xrm_dep = dependency('xcb-xrm', method: 'pkg-config')
xkbcommon_dep = dependency('xkbcommon', method: 'pkg-config')
xkbcommon_x11_dep = dependency('xkbcommon-x11', method: 'pkg-config')
yajl_dep = dependency('yajl', method: 'pkg-config')
libpcre_dep = dependency('libpcre', version: '>=8.10', method: 'pkg-config')
libpcre_dep = dependency('libpcre2-8', version: '>=8.10', method: 'pkg-config')
cairo_dep = dependency('cairo', version: '>=1.14.4', method: 'pkg-config')
pangocairo_dep = dependency('pangocairo', method: 'pkg-config')
glib_dep = dependency('glib-2.0', method: 'pkg-config')
Expand Down
39 changes: 16 additions & 23 deletions src/regex.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,34 +20,23 @@
*
*/
struct regex *regex_new(const char *pattern) {
const char *error;
int errorcode, offset;
int errorcode;
PCRE2_SIZE offset;

struct regex *re = scalloc(1, sizeof(struct regex));
re->pattern = sstrdup(pattern);
int options = PCRE_UTF8;
uint32_t options = PCRE2_UTF;
/* We use PCRE_UCP so that \B, \b, \D, \d, \S, \s, \W, \w and some POSIX
* character classes play nicely with Unicode */
options |= PCRE_UCP;
while (!(re->regex = pcre_compile2(pattern, options, &errorcode, &error, &offset, NULL))) {
/* If the error is that PCRE was not compiled with UTF-8 support we
* disable it and try again */
if (errorcode == 32) {
options &= ~PCRE_UTF8;
continue;
}
ELOG("PCRE regular expression compilation failed at %d: %s\n",
offset, error);
options |= PCRE2_UCP;
if (!(re->regex = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED, options, &errorcode, &offset, NULL))) {
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errorcode, buffer, sizeof(buffer));
ELOG("PCRE regular expression compilation failed at %lu: %s\n",
offset, buffer);
regex_free(re);
return NULL;
}
re->extra = pcre_study(re->regex, 0, &error);
/* If an error happened, we print the error message, but continue.
* Studying the regular expression leads to faster matching, but it’s not
* absolutely necessary. */
if (error) {
ELOG("PCRE regular expression studying failed: %s\n", error);
}
return re;
}

Expand All @@ -60,7 +49,6 @@ void regex_free(struct regex *regex) {
return;
FREE(regex->pattern);
FREE(regex->regex);
FREE(regex->extra);
FREE(regex);
}

Expand All @@ -71,17 +59,22 @@ void regex_free(struct regex *regex) {
*
*/
bool regex_matches(struct regex *regex, const char *input) {
pcre2_match_data *match_data;
int rc;

match_data = pcre2_match_data_create_from_pattern(regex->regex, NULL);

/* We use strlen() because pcre_exec() expects the length of the input
* string in bytes */
if ((rc = pcre_exec(regex->regex, regex->extra, input, strlen(input), 0, 0, NULL, 0)) == 0) {
rc = pcre2_match(regex->regex, (PCRE2_SPTR)input, strlen(input), 0, 0, match_data, NULL);
pcre2_match_data_free(match_data);
if (rc > 0) {
LOG("Regular expression \"%s\" matches \"%s\"\n",
regex->pattern, input);
return true;
}

if (rc == PCRE_ERROR_NOMATCH) {
if (rc == PCRE2_ERROR_NOMATCH) {
LOG("Regular expression \"%s\" does not match \"%s\"\n",
regex->pattern, input);
return false;
Expand Down

0 comments on commit 5cb544b

Please sign in to comment.