@@ -19,7 +19,39 @@ import kernel
19
19
import collection ::array
20
20
intrude import text ::flat
21
21
22
+ # Any kind of entity which can be searched for in a Sequence of Byte
23
+ interface BytePattern
24
+ # Return the first occurence of `self` in `b`, or -1 if not found
25
+ fun first_index_in (b : SequenceRead [Byte ]): Int do return first_index_in_from (b , 0 )
26
+
27
+ # Return the first occurence of `self` in `b` starting at `from`, or -1 if not found
28
+ fun first_index_in_from (b : SequenceRead [Byte ], from : Int ): Int is abstract
29
+
30
+ # Return the last occurence of `self` in `b`, or -1 if not found
31
+ fun last_index_in (b : SequenceRead [Byte ]): Int do return last_index_in_from (b , b .length - 1 )
32
+
33
+ # Return the last occurence of `self` in `b`, or -1 if not found
34
+ fun last_index_in_from (b : SequenceRead [Byte ], from : Int ): Int is abstract
35
+
36
+ # Returns the indexes of all the occurences of `self` in `b`
37
+ fun search_all_in (b : SequenceRead [Byte ]): SequenceRead [Int ] is abstract
38
+
39
+ # Length of the pattern
40
+ fun pattern_length : Int is abstract
41
+
42
+ # Appends `self` to `b`
43
+ fun append_to (b : Sequence [Byte ]) is abstract
44
+
45
+ # Is `self` a prefix for `b` ?
46
+ fun is_prefix (b : SequenceRead [Byte ]): Bool is abstract
47
+
48
+ # Is `self` a suffix for `b` ?
49
+ fun is_suffix (b : SequenceRead [Byte ]): Bool is abstract
50
+ end
51
+
22
52
redef class Byte
53
+ super BytePattern
54
+
23
55
# Write self as a string into `ns` at position `pos`
24
56
private fun add_digest_at (ns : NativeString , pos : Int ) do
25
57
var tmp = (0xF0u8 & self ) >> 4
@@ -72,13 +104,47 @@ redef class Byte
72
104
# i.e. this abort is here to please the compiler
73
105
abort
74
106
end
107
+
108
+ redef fun first_index_in_from (b , from ) do
109
+ for i in [from .. b .length [ do if b [i ] == self then return i
110
+ return -1
111
+ end
112
+
113
+ redef fun last_index_in_from (b , from ) do
114
+ for i in [0 .. from ].step (-1 ) do if b [i ] == self then return i
115
+ return -1
116
+ end
117
+
118
+ redef fun search_all_in (b ) do
119
+ var ret = new Array [Int ]
120
+ var pos = 0
121
+ loop
122
+ pos = first_index_in_from (b , pos )
123
+ if pos == -1 then return ret
124
+ ret .add pos
125
+ pos += 1
126
+ end
127
+ end
128
+
129
+ redef fun pattern_length do return 1
130
+
131
+ redef fun append_to (b ) do b .push self
132
+
133
+ # assert 'b'.ascii.is_suffix("baqsdb".to_bytes)
134
+ # assert not 'b'.ascii.is_suffix("baqsd".to_bytes)
135
+ redef fun is_suffix (b ) do return b .length != 0 and b .last == self
136
+
137
+ # assert 'b'.ascii.is_prefix("baqsdb".to_bytes)
138
+ # assert not 'b'.ascii.is_prefix("aqsdb".to_bytes)
139
+ redef fun is_prefix (b ) do return b .length != 0 and b .first == self
75
140
end
76
141
77
142
# A buffer containing Byte-manipulation facilities
78
143
#
79
144
# Uses Copy-On-Write when persisted
80
145
class Bytes
81
146
super AbstractArray [Byte ]
147
+ super BytePattern
82
148
83
149
# A NativeString being a char*, it can be used as underlying representation here.
84
150
var items : NativeString
@@ -107,7 +173,9 @@ class Bytes
107
173
init (ns , 0 , cap )
108
174
end
109
175
110
- redef fun is_empty do return length != 0
176
+ redef fun pattern_length do return length
177
+
178
+ redef fun is_empty do return length == 0
111
179
112
180
# var b = new Bytes.empty
113
181
# b.add 101u8
@@ -118,6 +186,71 @@ class Bytes
118
186
return items [i ]
119
187
end
120
188
189
+ # Returns a copy of `self`
190
+ fun clone : Bytes do
191
+ var b = new Bytes .with_capacity (length )
192
+ b .append (self )
193
+ return b
194
+ end
195
+
196
+ # Trims off the whitespaces at the beginning and the end of `self`
197
+ #
198
+ # var b = "102041426E6F1020" .hexdigest_to_bytes
199
+ # assert b.trim.hexdigest == "41426E6F"
200
+ #
201
+ # NOTE: A whitespace is defined here as a byte whose value is <= 0x20
202
+ fun trim : Bytes do
203
+ var st = 0
204
+ while st < length do
205
+ if self [st ] > 0x20u8 then break
206
+ st += 1
207
+ end
208
+ if st >= length then return new Bytes .empty
209
+ var ed = length - 1
210
+ while ed > 0 do
211
+ if self [ed ] > 0x20u8 then break
212
+ ed -= 1
213
+ end
214
+ return slice (st , ed - st + 1 )
215
+ end
216
+
217
+ # Returns a subset of the content of `self` starting at `from` and of length `count`
218
+ #
219
+ # var b = "abcd".to_bytes
220
+ # assert b.slice(1, 2).hexdigest == "6263"
221
+ # assert b.slice(-1, 2).hexdigest == "61"
222
+ # assert b.slice(1, 0).hexdigest == ""
223
+ # assert b.slice(2, 5).hexdigest == "6364"
224
+ fun slice (from , count : Int ): Bytes do
225
+ if count <= 0 then return new Bytes .empty
226
+
227
+ if from < 0 then
228
+ count += from
229
+ if count < 0 then count = 0
230
+ from = 0
231
+ end
232
+
233
+ if (count + from ) > length then count = length - from
234
+ if count <= 0 then return new Bytes .empty
235
+
236
+ var ret = new Bytes .with_capacity (count )
237
+
238
+ ret .append_ns (items .fast_cstring (from ), count )
239
+ return ret
240
+ end
241
+
242
+ # Returns a copy of `self` starting at `from`
243
+ #
244
+ # var b = "abcd".to_bytes
245
+ # assert b.slice_from(1).hexdigest == "626364"
246
+ # assert b.slice_from(-1).hexdigest == "61626364"
247
+ # assert b.slice_from(2).hexdigest == "6364"
248
+ fun slice_from (from : Int ): Bytes do
249
+ if from >= length then return new Bytes .empty
250
+ if from < 0 then from = 0
251
+ return slice (from , length )
252
+ end
253
+
121
254
# Returns self as a hexadecimal digest
122
255
fun hexdigest : String do
123
256
var elen = length * 2
@@ -218,6 +351,15 @@ class Bytes
218
351
length += ln
219
352
end
220
353
354
+ # Appends the bytes of `s` to `selftextextt`
355
+ fun append_text (s : Text ) do
356
+ for i in s .substrings do
357
+ append_ns (i .fast_cstring , i .bytelen )
358
+ end
359
+ end
360
+
361
+ redef fun append_to (b ) do b .append self
362
+
221
363
redef fun enlarge (sz ) do
222
364
if capacity >= sz then return
223
365
persisted = false
@@ -237,6 +379,157 @@ class Bytes
237
379
238
380
redef fun iterator do return new BytesIterator .with_buffer (self )
239
381
382
+ redef fun first_index_in_from (b , from ) do
383
+ if is_empty then return -1
384
+ var fst = self [0 ]
385
+ var bpos = fst .first_index_in_from (self , from )
386
+ for i in [0 .. length [ do
387
+ if self [i ] != b [bpos ] then return first_index_in_from (b , bpos + 1 )
388
+ bpos += 1
389
+ end
390
+ return bpos
391
+ end
392
+
393
+ redef fun last_index_in_from (b , from ) do
394
+ if is_empty then return -1
395
+ var lst = self [length - 1 ]
396
+ var bpos = lst .last_index_in_from (b , from )
397
+ for i in [0 .. length [.step (-1 ) do
398
+ if self [i ] != b [bpos ] then return last_index_in_from (b , bpos - 1 )
399
+ bpos -= 1
400
+ end
401
+ return bpos
402
+ end
403
+
404
+ redef fun search_all_in (b ) do
405
+ var ret = new Array [Int ]
406
+ var pos = first_index_in_from (b , 0 )
407
+ if pos == -1 then return ret
408
+ pos = pos + 1
409
+ ret .add pos
410
+ loop
411
+ pos = first_index_in_from (b , pos )
412
+ if pos == -1 then return ret
413
+ ret .add pos
414
+ pos += length
415
+ end
416
+ end
417
+
418
+ # Splits the content on self when encountering `b`
419
+ #
420
+ # var a = "String is string".to_bytes.split_with('s'.ascii)
421
+ # assert a.length == 3
422
+ # assert a[0].hexdigest == "537472696E672069"
423
+ # assert a[1].hexdigest == "20"
424
+ # assert a[2].hexdigest == "7472696E67"
425
+ fun split_with (b : BytePattern ): Array [Bytes ] do
426
+ var fst = b .search_all_in (self )
427
+ if fst .is_empty then return [clone ]
428
+ var retarr = new Array [Bytes ]
429
+ var prev = 0
430
+ for i in fst do
431
+ retarr .add (slice (prev , i - prev ))
432
+ prev = i + b .pattern_length
433
+ end
434
+ retarr .add slice_from (prev )
435
+ return retarr
436
+ end
437
+
438
+ # Splits `self` in two parts at the first occurence of `b`
439
+ #
440
+ # var a = "String is string".to_bytes.split_once_on('s'.ascii)
441
+ # assert a[0].hexdigest == "537472696E672069"
442
+ # assert a[1].hexdigest == "20737472696E67"
443
+ fun split_once_on (b : BytePattern ): Array [Bytes ] do
444
+ var spl = b .first_index_in (self )
445
+ if spl == -1 then return [clone ]
446
+ var ret = new Array [Bytes ].with_capacity (2 )
447
+ ret .add (slice (0 , spl ))
448
+ ret .add (slice_from (spl + b .pattern_length ))
449
+ return ret
450
+ end
451
+
452
+ # Replaces all the occurences of `this` in `self` by `by`
453
+ #
454
+ # var b = "String is string".to_bytes.replace(0x20u8, 0x41u8)
455
+ # assert b.hexdigest == "537472696E6741697341737472696E67"
456
+ fun replace (pattern : BytePattern , bytes : BytePattern ): Bytes do
457
+ if is_empty then return new Bytes .empty
458
+ var pos = pattern .search_all_in (self )
459
+ if pos .is_empty then return clone
460
+ var ret = new Bytes .with_capacity (length )
461
+ var prev = 0
462
+ for i in pos do
463
+ ret .append_ns (items .fast_cstring (prev ), i - prev )
464
+ bytes .append_to ret
465
+ prev = i + pattern .pattern_length
466
+ end
467
+ ret .append (slice_from (pos .last + pattern .pattern_length ))
468
+ return ret
469
+ end
470
+
471
+ # Decode `self` from percent (or URL) encoding to a clear string
472
+ #
473
+ # Replace invalid use of '%' with '?'.
474
+ #
475
+ # assert "aBc09-._~".to_bytes.from_percent_encoding == "aBc09-._~".to_bytes
476
+ # assert "%25%28%29%3c%20%3e".to_bytes.from_percent_encoding == "%()< >".to_bytes
477
+ # assert ".com%2fpost%3fe%3dasdf%26f%3d123".to_bytes.from_percent_encoding == ".com/post?e=asdf&f=123".to_bytes
478
+ # assert "%25%28%29%3C%20%3E".to_bytes.from_percent_encoding == "%()< >".to_bytes
479
+ # assert "incomplete %".to_bytes.from_percent_encoding == "incomplete ?".to_bytes
480
+ # assert "invalid % usage".to_bytes.from_percent_encoding == "invalid ? usage".to_bytes
481
+ # assert "%c3%a9%e3%81%82%e3%81%84%e3%81%86".to_bytes.from_percent_encoding == "éあいう".to_bytes
482
+ fun from_percent_encoding : Bytes do
483
+ var tmp = new Bytes .with_capacity (length )
484
+ var pos = 0
485
+ while pos < length do
486
+ var b = self [pos ]
487
+ if b != '%' .ascii then
488
+ tmp .add b
489
+ pos += 1
490
+ continue
491
+ end
492
+ if length - pos < 2 then
493
+ tmp .add '?' .ascii
494
+ pos += 1
495
+ continue
496
+ end
497
+ var bn = self [pos + 1 ]
498
+ var bnn = self [pos + 2 ]
499
+ if not bn .is_valid_hexdigit or not bnn .is_valid_hexdigit then
500
+ tmp .add '?' .ascii
501
+ pos += 1
502
+ continue
503
+ end
504
+ tmp .add ((bn .hexdigit_to_byteval << 4 ) + bnn .hexdigit_to_byteval )
505
+ pos += 3
506
+ end
507
+ return tmp
508
+ end
509
+
510
+ # Is `b` a prefix of `self` ?
511
+ fun has_prefix (b : BytePattern ): Bool do return b .is_prefix (self )
512
+
513
+ # Is `b` a suffix of `self` ?
514
+ fun has_suffix (b : BytePattern ): Bool do return b .is_suffix (self )
515
+
516
+ redef fun is_suffix (b ) do
517
+ if length > b .length then return false
518
+ var j = b .length - 1
519
+ var i = length - 1
520
+ while i > 0 do
521
+ if self [i ] != b [j ] then return false
522
+ i -= 1
523
+ j -= 1
524
+ end
525
+ return true
526
+ end
527
+
528
+ redef fun is_prefix (b ) do
529
+ if length > b .length then return false
530
+ for i in [0 .. length [ do if self [i ] != b [i ] then return false
531
+ return true
532
+ end
240
533
end
241
534
242
535
private class BytesIterator
@@ -397,3 +690,20 @@ redef class NativeString
397
690
return new Bytes (nns , len , len )
398
691
end
399
692
end
693
+
694
+ # Joins an array of bytes `arr` separated by `sep`
695
+ #
696
+ # assert join_bytes(["String".to_bytes, "is".to_bytes, "string".to_bytes], ' '.ascii).hexdigest == "537472696E6720697320737472696E67"
697
+ fun join_bytes (arr : Array [Bytes ], sep : nullable BytePattern ): Bytes do
698
+ if arr .is_empty then return new Bytes .empty
699
+ sep = sep or else new Bytes .empty
700
+ var endln = sep .pattern_length * (arr .length - 1 )
701
+ for i in arr do endln += i .length
702
+ var ret = new Bytes .with_capacity (endln )
703
+ ret .append (arr .first )
704
+ for i in [1 .. arr .length [ do
705
+ sep .append_to (ret )
706
+ ret .append arr [i ]
707
+ end
708
+ return ret
709
+ end
0 commit comments