forked from mariomka/regex-benchmark
/
benchmark.c
94 lines (71 loc) · 1.84 KB
/
benchmark.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#if __has_include(<time.h>)
#include <time.h>
#endif
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
char *read_file(char *filename)
{
char *data;
long length = 0;
FILE *fh = fopen(filename, "rb");
fseek(fh, 0, SEEK_END);
length = ftell(fh);
fseek(fh, 0, SEEK_SET);
data = malloc(length);
size_t result = fread(data, length, 1, fh);
if (result != 1)
{
fputs("err", stderr);
exit(1);
}
fclose(fh);
return data;
}
void measure(char *data, char *pattern)
{
int count = 0;
double elapsed;
struct timespec start, end;
pcre2_code *re;
int errorcode;
PCRE2_SIZE erroroffset;
pcre2_match_data *match_data;
int length;
PCRE2_SIZE offset = 0;
PCRE2_SIZE *ovector;
clock_gettime(CLOCK_MONOTONIC, &start);
re = pcre2_compile((PCRE2_SPTR) pattern, PCRE2_ZERO_TERMINATED, 0, &errorcode, &erroroffset, NULL);
match_data = pcre2_match_data_create_from_pattern(re, NULL);
length = strlen(data);
while (pcre2_match(re, (PCRE2_SPTR8) data, length, offset, 0, match_data, NULL) == 1)
{
count++;
ovector = pcre2_get_ovector_pointer(match_data);
offset = ovector[1];
}
clock_gettime(CLOCK_MONOTONIC, &end);
elapsed = ((end.tv_sec - start.tv_sec) * 1e9 + end.tv_nsec - start.tv_nsec) / 1e6;
printf("%f - %d\n", elapsed, count);
pcre2_match_data_free(match_data);
pcre2_code_free(re);
}
int main(int argc, char **argv)
{
if (argc != 2)
{
printf("Usage: benchmark <filename>");
exit(1);
}
char *data = read_file(argv[1]);
// Email
measure(data, "[\\w\\.+-]+@[\\w\\.-]+\\.[\\w\\.-]+");
// URI
measure(data, "[\\w]+://[^/\\s?#]+[^\\s?#]+(?:\\?[^\\s#]*)?(?:#[^\\s]*)?");
// IP
measure(data, "(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])");
free(data);
return 0;
}