-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrb_u_string_sub.c
More file actions
147 lines (137 loc) · 5.18 KB
/
rb_u_string_sub.c
File metadata and controls
147 lines (137 loc) · 5.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#include "rb_includes.h"
#include "rb_u_re.h"
/* @overload sub(pattern, replacement)
*
* Returns the receiver with the first match of PATTERN replaced by
* REPLACEMENT, inheriting any taint and untrust from the receiver and from
* REPLACEMENT, or nil if there’s no match.
*
* The REPLACEMENT is used as a specification for what to replace matches
* with:
*
* <table>
* <thead>
* <tr><th>Specification</th><th>Replacement</th></tr>
* </thead>
* <tbody>
* <tr>
* <td><code>\1</code>, <code>\2</code>, …, <code>\</code><em>n</em></td>
* <td>Numbered sub-match <em>n</em></td>
* </tr>
* <tr>
* <td><code>\k<</code><em>name</em><code>></code></td>
* <td>Named sub-match <em>name</em></td>
* </tr>
* </tbody>
* </table>
*
* The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …,
* `$`_n_ are updated accordingly.
*
* @param [Regexp, #to_str] pattern
* @param [#to_str] replacement
* @return [U::String, nil]
*
* @overload sub(pattern, replacements)
*
* Returns the receiver with the first match of PATTERN replaced by
* REPLACEMENTS#[_match_], where _match_ is the matched substring, inheriting
* any taint and untrust from the receiver, REPLACEMENTS, and
* REPLACEMENTS#[_match_], or nil if there’s no match.
*
* The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …,
* `$`_n_ are updated accordingly.
*
* @param [Regexp, #to_str] pattern
* @param [#to_hash] replacements
* @raise [Exception] Any error raised by REPLACEMENTS#default, if it gets
* called
* @return [U::String, nil]
*
* @overload sub(pattern){ |match| … }
*
* Returns the receiver with all instances of PATTERN replaced by the results
* of the given block, inheriting any taint and untrust from the receiver and
* from the results of the given block, or nil if there’s no match.
*
* The Regexp special variables `$&`, `$'`, <code>$\`</code>, `$1`, `$2`, …,
* `$`_n_ are updated accordingly.
*
* @param [Regexp, #to_str] pattern
* @yieldparam [U::String] match
* @yieldreturn [#to_str]
* @return [U::String, nil] */
VALUE
rb_u_string_sub(int argc, VALUE *argv, VALUE self)
{
VALUE pattern, replacement;
VALUE replacements = Qnil;
bool use_block = false;
bool tainted = false;
bool untrusted = false;
if (argc == 1)
use_block = true;
if (rb_scan_args(argc, argv, "11", &pattern, &replacement) == 2) {
replacements = rb_check_convert_type(replacement, T_HASH,
"Hash", "to_hash");
if (NIL_P(replacements))
StringValue(replacement);
if (OBJ_TAINTED(replacement))
tainted = true;
if (OBJ_UNTRUSTED(replacement))
untrusted = true;
}
pattern = rb_u_pattern_argument(pattern, true);
VALUE str = rb_str_to_str(self);
long begin = rb_reg_search(pattern, str, 0, 0);
if (begin < 0)
return Qnil;
VALUE match = rb_backref_get();
struct re_registers *registers = RMATCH_REGS(match);
VALUE result;
if (use_block || !NIL_P(replacements)) {
if (use_block) {
VALUE ustr = rb_u_string_new_rb(rb_reg_nth_match(0, match));
result = rb_u_string_object_as_string(rb_yield(ustr));
} else {
VALUE ustr = rb_u_string_new_c(self,
RSTRING_PTR(str) + registers->beg[0],
registers->end[0] - registers->beg[0]);
result = rb_u_string_object_as_string(rb_hash_aref(replacements, ustr));
}
} else
result =
#ifdef HAVE_RB_REG_REGSUB4
rb_reg_regsub(replacement, str, registers, pattern);
#else
rb_reg_regsub(replacement, str, registers);
#endif
if (OBJ_TAINTED(result))
tainted = true;
if (OBJ_UNTRUSTED(result))
untrusted = true;
const struct rb_u_string *value = RVAL2USTRING_ANY(result);
size_t length = registers->beg[0] +
USTRING_LENGTH(value) +
(RSTRING_LEN(str) - registers->end[0]);
char *base = ALLOC_N(char, length + 1);
MEMCPY(base,
RSTRING_PTR(str),
char,
registers->beg[0]);
MEMCPY(base + registers->beg[0],
USTRING_STR(value),
char,
USTRING_LENGTH(value));
MEMCPY(base + registers->beg[0] + USTRING_LENGTH(value),
RSTRING_PTR(str) + registers->end[0],
char,
RSTRING_LEN(str) - registers->end[0]);
base[length] = '\0';
VALUE substituted = rb_u_string_new_c_own(self, base, length);
if (tainted)
OBJ_TAINT(substituted);
if (untrusted)
OBJ_UNTRUST(substituted);
return substituted;
}