Skip to content

Commit

Permalink
Added cregex_replace*() [implemented in utf8code.c]. Added examples/r…
Browse files Browse the repository at this point in the history
…egex_replace.c. Docs not ready, i.e. API not fixed. Some other refactoring and minor fixes/improvements. cstr_assign_sv() now returns char* like the other cstr_assign*().
  • Loading branch information
Tyge Lovset committed Jul 20, 2022
1 parent 78cb613 commit 3f89c29
Show file tree
Hide file tree
Showing 9 changed files with 181 additions and 108 deletions.
2 changes: 1 addition & 1 deletion examples/regex2.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ int main()
printf("input: %s\n", inputs[i]);
if (cregex_match(&re, inputs[i], 20, m, 0) > 0)
{
c_forrange (j, cregex_captures(re))
c_forrange (j, cregex_captures(&re))
{
printf(" submatch %" PRIuMAX ": %" c_PRIsv "\n", j, c_ARGsv(m[j]));
}
Expand Down
6 changes: 3 additions & 3 deletions examples/regex_match.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ int main()
{
int res = cregex_compile(&re, "[+-]?([0-9]*\\.)?\\d+([Ee][+-]?\\d+)?", 0);
printf("%d\n", res);
cregmatch m[10];
csview m[10];
if (cregex_match(&re, s, 10, m, 0) > 0) {
printf("Found digits at position %" PRIuMAX "-%" PRIuMAX "\n", m[0].str - s, m[0].str - s + m[0].size);
} else {
printf("Could not find any digits\n");
}

while (cregex_match(&re, s, 10, m, creg_next) > 0) {
while (cregex_match(&re, s, 10, m, cregex_NEXT) > 0) {
printf("%" c_PRIsv " ; ", c_ARGsv(m[0]));
}
puts("");

res = cregex_compile(&re, "(.+)\\b(.+)", 0);
printf("groups: %d\n", res);
if ((res = cregex_match(&re, "hello@wørld", 10, m, 0)) > 0) {
c_forrange (i, res)
c_forrange (i, res)
printf("match: [%" c_PRIsv "]\n", c_ARGsv(m[i]));
} else
printf("err: %d\n", res);
Expand Down
35 changes: 35 additions & 0 deletions examples/regex_replace.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#define i_implement
#include <stc/cstr.h>
#include <stc/cregex.h>
#include <stc/csview.h>

cstr sub_20y(int i, csview m) {
if (i == 1) { // year
int year;
sscanf(m.str, "%4d", &year);
return cstr_from_fmt("%04d", year - 20);
}
return cstr_from_sv(m);
}

int main()
{
const char* pattern = "\\b(\\d\\d\\d\\d)-(1[0-2]|0[1-9])-(3[01]|[12][0-9]|0[1-9])\\b";
const char* input = "start date: 2015-12-31, end date: 2022-02-28";

c_auto (cregex, re)
c_auto (cstr, str1, str2)
{
printf("input: %s\n", input);
/* European date format */
str1 = cregex_replace(input, pattern, "\\3.\\2.\\1");
printf("euros: %s\n", cstr_str(&str1));

/* US date format, and subtract 20 years: */
str2 = cregex_replace_fn(input, pattern, "\\1/\\3/\\2", sub_20y, 0, 0);
printf("us-20: %s\n", cstr_str(&str2));
}
}

#include "../src/cregex.c"
#include "../src/utf8code.c"
2 changes: 1 addition & 1 deletion include/stc/ccommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ typedef const char* crawstr;
#define crawstr_cmp(xp, yp) strcmp(*(xp), *(yp))
#define crawstr_hash(p) c_strhash(*(p))
#define c_strlen_lit(literal) (sizeof "" literal - 1U)
#define c_sv(lit) c_make(csview){lit, c_strlen_lit(lit)}
#define c_sv(lit) (c_make(csview){lit, c_strlen_lit(lit)})
#define c_PRIsv ".*s"
#define c_ARGsv(sv) (int)(sv).size, (sv).str

Expand Down
60 changes: 36 additions & 24 deletions include/stc/cregex.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,32 +34,33 @@ THE SOFTWARE.
#include "forward.h" // csview

typedef enum {
creg_nomatch = -1,
creg_matcherror = -2,
creg_outofmemory = -3,
creg_unmatchedleftparenthesis = -4,
creg_unmatchedrightparenthesis = -5,
creg_toomanysubexpressions = -6,
creg_toomanycharacterclasses = -7,
creg_malformedcharacterclass = -8,
creg_missingoperand = -9,
creg_unknownoperator = -10,
creg_operandstackoverflow = -11,
creg_operatorstackoverflow = -12,
creg_operatorstackunderflow = -13,
creg_success = 1,
creg_nomatch = 0,
creg_matcherror = -1,
creg_outofmemory = -2,
creg_unmatchedleftparenthesis = -3,
creg_unmatchedrightparenthesis = -4,
creg_toomanysubexpressions = -5,
creg_toomanycharacterclasses = -6,
creg_malformedcharacterclass = -7,
creg_missingoperand = -8,
creg_unknownoperator = -9,
creg_operandstackoverflow = -10,
creg_operatorstackoverflow = -11,
creg_operatorstackunderflow = -12,
} cregex_error_t;

enum {
/* compile flags */
creg_dotall = 1<<0,
creg_caseless = 1<<1,
cregex_DOTALL = 1<<0,
cregex_CASELESS = 1<<1,
/* execution flags */
creg_fullmatch = 1<<2,
creg_next = 1<<3,
creg_startend = 1<<4,
cregex_FULLMATCH = 1<<2,
cregex_NEXT = 1<<3,
cregex_STARTEND = 1<<4,
/* limits */
creg_max_classes = 16,
creg_max_captures = 32,
cregex_MAXCLASSES = 16,
cregex_MAXCAPTURES = 32,
};

typedef struct {
Expand All @@ -76,15 +77,26 @@ static inline cregex cregex_init(void) {
int cregex_compile(cregex *self, const char* pattern, int cflags);

/* number of capture groups in a regex pattern */
int cregex_captures(cregex rx);
int cregex_captures(const cregex* self);

/* return number of capture groups on success, or (negative) error code on failure. */
int cregex_match(const cregex *self, const char* string,
size_t nmatch, cregmatch match[], int mflags);
unsigned nmatch, csview match[], int mflags);

void cregex_replace(const char* src, char* dst, int dsize,
int nmatch, const cregmatch match[]);
/* replace regular expression */
void cregex_build_replace(const char* repl, unsigned nmatch, const csview match[],
cstr (*mfun)(int i, csview match), cstr* out);

cstr cregex_replace_re(const char* input, const cregex* re, const char* repl,
cstr (*mfun)(int i, csview match), int cflags, unsigned count);

cstr cregex_replace_fn(const char* input, const char* pattern, const char* replace,
cstr (*mfun)(int i, csview match), int cflags, unsigned count);
static inline
cstr cregex_replace(const char* input, const char* pattern, const char* replace)
{ return cregex_replace_fn(input, pattern, replace, NULL, 0, 0); }

/* destroy regex */
void cregex_drop(cregex* self);

#endif
10 changes: 6 additions & 4 deletions include/stc/cstr.h
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,9 @@ STC_DEF char* cstr_reserve(cstr* self, const size_t cap) {
if (cap > cstr_s_cap) {
char* data = (char *)c_malloc(cap + 1);
const size_t len = cstr_s_size(self);
memcpy(data, self->sml.data, len);
memcpy(data, self->sml.data, cstr_s_cap + 1);
self->lon.data = data;
cstr_l_set_size(self, len);
self->lon.size = len;
cstr_l_set_cap(self, cap);
return data;
}
Expand Down Expand Up @@ -525,15 +525,17 @@ STC_DEF int cstr_vfmt(cstr* self, const char* fmt, va_list args) {

STC_DEF cstr cstr_from_fmt(const char* fmt, ...) {
cstr s = cstr_null;
va_list args; va_start(args, fmt);
va_list args;
va_start(args, fmt);
cstr_vfmt(&s, fmt, args);
va_end(args);
return s;
}

STC_DEF int cstr_printf(cstr* self, const char* fmt, ...) {
cstr s = cstr_null;
va_list args; va_start(args, fmt);
va_list args;
va_start(args, fmt);
const int n = cstr_vfmt(&s, fmt, args);
va_end(args);
cstr_drop(self); *self = s;
Expand Down
4 changes: 2 additions & 2 deletions include/stc/csview.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ STC_INLINE csview cstr_substr_ex(const cstr* self, intptr_t pos, size_t n)
STC_INLINE csview cstr_slice_ex(const cstr* self, intptr_t p1, intptr_t p2)
{ return csview_slice_ex(csview_from_s(self), p1, p2); }

STC_INLINE csview cstr_assign_sv(cstr* self, csview sv)
{ return c_make(csview){cstr_assign_n(self, sv.str, sv.size), sv.size}; }
STC_INLINE char* cstr_assign_sv(cstr* self, csview sv)
{ return cstr_assign_n(self, sv.str, sv.size); }

STC_INLINE void cstr_append_sv(cstr* self, csview sv)
{ cstr_append_n(self, sv.str, sv.size); }
Expand Down
Loading

0 comments on commit 3f89c29

Please sign in to comment.