mem: add split_rev

Over the last couple of weeks weeks I needed to iterate over a collection backwards at least twice. Do we want to have this in stdlib? If yes, click "Merge" and start using today! Free shipping and returns (before 1.0). Why is this useful? ------------------- I need this for building an error wrapper: errors are added in the wrapper from "lowest" level to "highest" level, and then printed in reverse order. Imagine `UpdateUsers` call, which needs to return `error.InvalidInput` and a wrappable error context. In Go we would add a context to the error when returning it: // if update_user fails, add context on which user we are operating if err := update_user(user); err != nil { return fmt.Errorf("user id=%d: %w", user.id, err) } Since Zig cannot pass anything else than u16 with an error (ziglang#2647), I will pass a `err_ctx: *Err`, to the callers, where they can, besides returning an error, augment it with auxiliary data. `Err` is a preallocated array that can add zero-byte-separated strings. For a concrete example, imagine such a call graph: update_user(User, *Err) error{InvalidInput}!<...> validate_user([]const u8, *Err) error{InvalidInput}!<...> Where `validate_user` would like, besides only the error, signal the invalid field. And `update_user`, besides the error, would signal the offending user id. We also don't want the low-level functions to know in which context they are operating to construct a meaningful error message: if validation fails, they append their "context" to the buffer. To translate/augment the Go example above: pub fn validate_user(err_ctx: *Err, user: User) error{InvalidInput}!void { if (!ascii.isAlpha(name)) { err_ctx.print("name '{s}' must be ascii-letters only", .{name}); return error.InvalidInput; } <...> } // update_user validates each user and does something with it. pub fn update_user(err_ctx: *Err, user: User) error{InvalidInput}!void { // validate the user before updating it validate_user(user) catch { err_ctx.print("user id={d}", .{user.id}); return error.InvalidInput; }; <...> } Then the top-level function (in my case, CLI) will read the buffer backwards (splitting on `"\x00"`) and print: user id=123: name 'žemas' must be ascii-letters only To read that buffer backwards, dear readers of this commit message, I need `mem.split_rev`.
motiejus · Jun 22, 2022 · b11ba83 · b11ba83
1 parent e44f927
commit b11ba83
Showing 1 changed file with 123 additions and 6 deletions.
diff --git a/lib/std/mem.zig b/lib/std/mem.zig
@@ -1690,7 +1690,6 @@ test "tokenize (reset)" {
 /// If `delimiter` does not exist in buffer,
 /// the iterator will return `buffer`, null, in that order.
 /// The delimiter length must not be zero.
-/// See also the related function `tokenize`.
 pub fn split(comptime T: type, buffer: []const T, delimiter: []const T) SplitIterator(T) {
     assert(delimiter.len != 0);
     return .{
@@ -1754,11 +1753,100 @@ test "split (multibyte)" {
         std.unicode.utf8ToUtf16LeStringLiteral("a, b ,, c, d, e"),
         std.unicode.utf8ToUtf16LeStringLiteral(", "),
     );
-    try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a")));
-    try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b ,")));
-    try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c")));
-    try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d")));
-    try testing.expect(eql(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("e")));
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a"));
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b ,"));
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c"));
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d"));
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("e"));
+    try testing.expect(it16.next() == null);
+}
+
+/// Returns an iterator that iterates over the slices of `buffer` that
+/// are separated by bytes in `delimiter`, backwards.
+/// split(u8, "abc|def||ghi", "|")
+/// will return slices for "ghi", "", "def", "abc", null, in that order.
+/// If `delimiter` does not exist in buffer,
+/// the iterator will return `buffer`, null, in that order.
+/// The delimiter length must not be zero.
+/// See also the related function `tokenize`.
+pub fn split_rev(comptime T: type, buffer: []const T, delimiter: []const T) SplitIteratorRev(T) {
+    assert(delimiter.len != 0);
+    return SplitIteratorRev(T){
+        .index = buffer.len,
+        .buffer = buffer,
+        .delimiter = delimiter,
+    };
+}
+
+test "split_rev" {
+    var it = split_rev(u8, "abc|def||ghi", "|");
+    try testing.expectEqualSlices(u8, it.rest(), "abc|def||ghi");
+    try testing.expectEqualSlices(u8, it.next().?, "ghi");
+
+    try testing.expectEqualSlices(u8, it.rest(), "abc|def|");
+    try testing.expectEqualSlices(u8, it.next().?, "");
+
+    try testing.expectEqualSlices(u8, it.rest(), "abc|def");
+    try testing.expectEqualSlices(u8, it.next().?, "def");
+
+    try testing.expectEqualSlices(u8, it.rest(), "abc");
+    try testing.expectEqualSlices(u8, it.next().?, "abc");
+
+    try testing.expectEqualSlices(u8, it.rest(), "");
+    try testing.expect(it.next() == null);
+
+    it = split_rev(u8, "", "|");
+    try testing.expectEqualSlices(u8, it.next().?, "");
+    try testing.expect(it.next() == null);
+
+    it = split_rev(u8, "|", "|");
+    try testing.expectEqualSlices(u8, it.next().?, "");
+    try testing.expectEqualSlices(u8, it.next().?, "");
+    try testing.expect(it.next() == null);
+
+    it = split_rev(u8, "hello", " ");
+    try testing.expectEqualSlices(u8, it.next().?, "hello");
+    try testing.expect(it.next() == null);
+
+    var it16 = split_rev(
+        u16,
+        std.unicode.utf8ToUtf16LeStringLiteral("hello"),
+        std.unicode.utf8ToUtf16LeStringLiteral(" "),
+    );
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("hello"));
+    try testing.expect(it16.next() == null);
+}
+
+test "split_rev (multibyte)" {
+    var it = split_rev(u8, "a, b ,, c, d, e", ", ");
+    try testing.expectEqualSlices(u8, it.rest(), "a, b ,, c, d, e");
+    try testing.expectEqualSlices(u8, it.next().?, "e");
+
+    try testing.expectEqualSlices(u8, it.rest(), "a, b ,, c, d");
+    try testing.expectEqualSlices(u8, it.next().?, "d");
+
+    try testing.expectEqualSlices(u8, it.rest(), "a, b ,, c");
+    try testing.expectEqualSlices(u8, it.next().?, "c");
+
+    try testing.expectEqualSlices(u8, it.rest(), "a, b ,");
+    try testing.expectEqualSlices(u8, it.next().?, "b ,");
+
+    try testing.expectEqualSlices(u8, it.rest(), "a");
+    try testing.expectEqualSlices(u8, it.next().?, "a");
+
+    try testing.expectEqualSlices(u8, it.rest(), "");
+    try testing.expect(it.next() == null);
+
+    var it16 = split_rev(
+        u16,
+        std.unicode.utf8ToUtf16LeStringLiteral("a, b ,, c, d, e"),
+        std.unicode.utf8ToUtf16LeStringLiteral(", "),
+    );
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("e"));
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("d"));
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("c"));
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("b ,"));
+    try testing.expectEqualSlices(u16, it16.next().?, std.unicode.utf8ToUtf16LeStringLiteral("a"));
     try testing.expect(it16.next() == null);
 }
 
@@ -1858,6 +1946,35 @@ pub fn SplitIterator(comptime T: type) type {
     };
 }
 
+pub fn SplitIteratorRev(comptime T: type) type {
+    return struct {
+        buffer: []const T,
+        index: ?usize,
+        delimiter: []const T,
+
+        const Self = @This();
+
+        /// Returns a slice of the next field, or null if splitting is complete.
+        pub fn next(self: *Self) ?[]const T {
+            const end = self.index orelse return null;
+            const start = if (lastIndexOf(T, self.buffer[0..end], self.delimiter)) |delim_start| blk: {
+                self.index = delim_start;
+                break :blk delim_start + self.delimiter.len;
+            } else blk: {
+                self.index = null;
+                break :blk 0;
+            };
+            return self.buffer[start..end];
+        }
+
+        /// Returns a slice of the remaining bytes. Does not affect iterator state.
+        pub fn rest(self: Self) []const T {
+            const end = self.index orelse 0;
+            return self.buffer[0..end];
+        }
+    };
+}
+
 /// Naively combines a series of slices with a separator.
 /// Allocates memory for the result, which must be freed by the caller.
 pub fn join(allocator: Allocator, separator: []const u8, slices: []const []const u8) ![]u8 {