-
Notifications
You must be signed in to change notification settings - Fork 0
/
rb_u_string_each_line.c
142 lines (127 loc) · 4.43 KB
/
rb_u_string_each_line.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#include "rb_includes.h"
#include "yield.h"
static void
rb_u_string_each_line_default(VALUE self, struct yield *yield)
{
const struct rb_u_string *string = RVAL2USTRING(self);
const char *begin = USTRING_STR(string);
const char *base = begin;
const char *p = begin;
const char *end = USTRING_END(string);
while (p < end) {
p = memchr(p, '\n', end - p);
if (p == NULL)
break;
p++;
yield_call(yield, rb_u_string_new_c(self, base, p - base));
base = p;
}
if (base != end)
yield_call(yield, rb_u_string_new_c(self, base, end - base));
}
static void
rb_u_string_each_line_separator(VALUE self, const struct rb_u_string *separator,
struct yield *yield)
{
const struct rb_u_string *string = RVAL2USTRING(self);
long separator_length = USTRING_LENGTH(separator);
const char *q;
uint32_t first = separator_length == 0 ?
'\n' :
u_decode(&q, USTRING_STR(separator), USTRING_END(separator));
const char *begin = USTRING_STR(string);
const char *base = begin;
const char *p = begin;
const char *end = USTRING_END(string);
while (p < end) {
uint32_t c = u_decode(&q, p, end);
again:
if (separator_length == 0 && c == first) {
p = q;
if (p < end) {
c = u_decode(&q, p, end);
if (c != first)
goto again;
}
while (p < end) {
if (u_decode(&q, p, end) != first)
break;
p = q;
}
}
if (c == first &&
(separator_length < 2 ||
(end - p >= separator_length &&
memcmp(USTRING_STR(separator), p, separator_length) == 0))) {
p += separator_length;
yield_call(yield, rb_u_string_new_c(self, base, p - base));
base = p;
} else
p = q;
}
if (base != end)
yield_call(yield, rb_u_string_new_c(self, base, end - base));
}
static void
each(int argc, VALUE *argv, VALUE self, struct yield *yield)
{
VALUE rs;
if (argc == 0)
rs = rb_rs;
else
rb_scan_args(argc, argv, "01", &rs);
if (NIL_P(rs)) {
yield_call(yield, self);
return;
}
const struct rb_u_string *separator = RVAL2USTRING_ANY(rs);
if (rs == rb_default_rs)
rb_u_string_each_line_default(self, yield);
else
rb_u_string_each_line_separator(self, separator, yield);
}
/* @overload each_line(separator = $/){ |lp| … }
*
* Enumerates the lines of the receiver, inheriting any taint and untrust.
*
* If SEPARATOR is nil, yields self. If SEPARATOR is {#empty?}, separates
* each line (paragraph) by two or more U+000A LINE FEED characters.
*
* @param [U::String, #to_str] separator
* @yieldparam [U::String, self] lp
* @return [self]
*
* @overload each_line(separator = $/)
*
* Returns an Enumerator over the lines of the receiver.
*
* If SEPARATOR is nil, self will be yielded. If SEPARATOR is {#empty?},
* separates each line (paragraph) by two or more U+000A LINE FEED
* characters.
*
* @param [U::String, #to_str] separator
* @return [Enumerator] */
VALUE
rb_u_string_each_line(int argc, VALUE *argv, VALUE self)
{
RETURN_ENUMERATOR(self, argc, argv);
struct yield y = YIELD_INIT;
each(argc, argv, self, &y);
return self;
}
/* @overload lines(separator = $/)
*
* Returns the lines of the receiver, inheriting any taint and untrust.
*
* If SEPARATOR is nil, yields self. If SEPARATOR is {#empty?}, separates
* each line (paragraph) by two or more U+000A LINE FEED characters.
*
* @param [U::String, #to_str] separator
* @return [Array<U::String>] */
VALUE
rb_u_string_lines(int argc, VALUE *argv, VALUE self)
{
struct yield_array y = YIELD_ARRAY_INIT;
each(argc, argv, self, &y.yield);
return y.array;
}