From b052bb57d1d50d70196db4636c175fb7ba6e3571 Mon Sep 17 00:00:00 2001
From: hatf0 <harrison@0xcc.pw>
Date: Mon, 15 Nov 2021 15:19:22 -0500
Subject: [PATCH 1/4] Add simple base64 de/encoding support

---
 source/mir/base64.d | 317 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 317 insertions(+)
 create mode 100644 source/mir/base64.d

diff --git a/source/mir/base64.d b/source/mir/base64.d
new file mode 100644
index 00000000..08933e46
--- /dev/null
+++ b/source/mir/base64.d
@@ -0,0 +1,317 @@
+/++
+$(H1 @nogc Simple Base64 parsing)
+
+License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
+Authors: Harrison Ford
+Copyright: 2021 Harrison Ford, Kaleidic Associates Advisory Limited, Symmetry Investments
++/
+module mir.base64;
+import mir.ndslice.topology;
+import core.bitop : bswap;
+
+package static immutable base64DecodeInvalidCharMsg = "Invalid character encountered.";
+package static immutable base64DecodeInvalidLenMsg = "Cannot decode a buffer with given length (not a multiple of 4, missing padding?)";
+version(D_Exceptions) {
+    package static immutable base64DecodeInvalidCharException = new Exception(base64DecodeInvalidCharMsg);
+    package static immutable base64DecodeInvalidLenException = new Exception(base64DecodeInvalidLenMsg);
+}
+
+// NOTE: I do not know if this would work on big-endian systems.
+// Needs further testing to figure out if it *does* work on them.
+
+// Technique borrowed from http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html#branchless-code-for-lookup-table
+private char lookup_encoding(ubyte i) @safe @nogc pure {
+    assert(i < 64);
+
+    ubyte shift;
+
+    if (i < 26)
+    {
+        // range A-Z
+        shift = 'A';
+    }
+    else if (i >= 26 && i < 52)
+    {
+        // range a-z
+        shift = 'a' - 26;
+    }
+    else if (i >= 52 && i < 62)
+    {
+        // range 0-9
+        shift = cast(ubyte)('0' - 52);
+    }
+    else if (i == 62)
+    {
+        // character plus
+        shift = cast(ubyte)('+' - 62);
+    }
+    else if (i == 63)
+    {
+        // character slash
+        shift = cast(ubyte)('/' - 63);
+    }
+
+    return cast(char)(i + shift);
+}
+
+// Do the inverse of above (convert an ASCII value into the Base64 character set)
+private ubyte lookup_decoding(char i) @safe @nogc pure
+{
+    // Branching bad, but this isn't performance sensitive
+    if (i <= 'Z' && i >= 'A') {
+        return cast(ubyte)(i - 'A');
+    }
+    else if (i <= 'z' && i >= 'a') {
+        return cast(ubyte)(i - 'a' + 26); 
+    }
+    else if (i <= '9' && i >= '0') {
+        return cast(ubyte)(i - '0' + 52);
+    }
+    else if (i == '+') {
+        return 62;
+    }
+    else if (i == '/') {
+        return 63;
+    }
+    // Just return 0 for padding,
+    // as it typically means nothing.
+    else if (i == '=') {
+        return 0;
+    }
+    else {
+        version(D_Exceptions) {
+            throw base64DecodeInvalidCharException;
+        } else {
+            assert(0, base64DecodeInvalidCharMsg);
+        }
+    }
+
+}
+
+/++
+Decode a Base64 encoded value, returning the buffer.
++/
+ubyte[] decodeBase64(scope ubyte[] buf) @safe pure
+{
+    import mir.appender : scopedBuffer;
+    auto app = scopedBuffer!ubyte;
+    decodeBase64(buf, app);
+    return app.data.dup;
+}
+
+/++
+Decode a Base64 encoded value, placing the result onto an Appender.
++/
+void decodeBase64(Appender)(scope ubyte[] input, ref Appender appender) @safe @nogc pure
+{
+    // We expect data should be well-formed (with padding),
+    // so we should throw if it is not well-formed.
+    if (input.length % 4 != 0)
+    {
+        version(D_Exceptions) {
+            throw base64DecodeInvalidLenException;
+        } else {
+            assert(0, base64DecodeInvalidLenMsg);
+        }
+    }
+    foreach(group; input.bytegroup!(4, uint).map!bswap)
+    {
+        // We only expect valid ASCII values for these,
+        // hence the 0x7f.
+        const(ubyte) a = lookup_decoding((group >> 24) & 0x7f);
+        const(ubyte) b = lookup_decoding((group >> 16) & 0x7f);
+        const(ubyte) c = lookup_decoding((group >> 8) & 0x7f);
+        const(ubyte) d = lookup_decoding((group) & 0x7f);
+
+        // We do the inverse of how we encoded it...
+        uint transformed_group = (a << 26) | (b << 20) | (c << 14) | (d << 8);
+
+        const(ubyte) t_a = (transformed_group >> 24) & 0xff;
+        const(ubyte) t_b = (transformed_group >> 16) & 0xff;
+        const(ubyte) t_c = (transformed_group >> 8) & 0xff;
+        const(ubyte) t_d = (transformed_group) & 0xff;
+
+        // We should *always* have enough for at least
+        // one, but we don't need to have enough for the rest..
+        appender.put(t_a);
+
+        // Only emit transformed groups if we have enough data for them.
+        if (t_b == 0 && t_c == 0 && t_d == 0)
+        {
+            return;
+        }
+        else if (t_c == 0 && t_d == 0)
+        {
+            appender.put(t_b);
+        } 
+        else if (t_d == 0)
+        {
+            appender.put(t_b);
+            appender.put(t_c);
+        }
+        else
+        {
+            appender.put(t_b);
+            appender.put(t_c);
+            appender.put(t_d);
+        }
+    }
+}
+
+/// Test decoding of data which has a length which can be
+/// cleanly decoded.
+unittest
+{
+    {
+        ubyte[] data = cast(ubyte[])"QUJD";
+        assert(data.decodeBase64 == "ABC");
+    }
+
+    {
+        ubyte[] data = cast(ubyte[])"QQ==";
+        assert(data.decodeBase64 == "A");
+    }
+
+    {
+        ubyte[] data = cast(ubyte[])"YSBiIGMgZCBlIGYgZyBoIGkgaiBrIGwgbSBuIG8gcCBxIHIgcyB0IHUgdiB3IHggeSB6";
+        assert(data.decodeBase64 == "a b c d e f g h i j k l m n o p q r s t u v w x y z");
+    }
+
+    {
+        ubyte[] data = cast(ubyte[])"LCAuIDsgLyBbICcgXSBcID0gLSAwIDkgOCA3IDYgNSA0IDMgMiAxIGAgfiAhIEAgIyAkICUgXiAmICogKCApIF8gKyB8IDogPCA+ID8=";
+        assert(data.decodeBase64 == ", . ; / [ ' ] \\ = - 0 9 8 7 6 5 4 3 2 1 ` ~ ! @ # $ % ^ & * ( ) _ + | : < > ?");
+    }
+}
+
+/++
+Encode a ubyte array as Base64, returning the encoded value.
++/
+ubyte[] encodeBase64(scope ubyte[] buf) @safe pure
+{
+    import mir.appender : scopedBuffer;
+    auto app = scopedBuffer!ubyte;
+    encodeBase64(buf, app);
+    return app.data.dup;
+}
+
+/++
+Encode a ubyte array as Base64, placing the result onto an Appender.
++/
+void encodeBase64(Appender)(scope ubyte[] input, ref Appender appender) @safe @nogc pure
+{
+    // Slice our input array so that n % 3 == 0 (we have a multiple of 3) 
+    // If we have less then 3, then this is effectively a no-op (will result in a 0-length slice)
+    ubyte[] window = input[0 .. input.length - (input.length % 3)];
+    foreach(group; window.bytegroup!(3, uint).map!bswap) {
+        const(ubyte) a = (group >> 26) & 0x3f;
+        const(ubyte) b = (group >> 20) & 0x3f;
+        const(ubyte) c = (group >> 14) & 0x3f;
+        const(ubyte) d = (group >> 8) & 0x3f;
+
+        appender.put(a.lookup_encoding);
+        appender.put(b.lookup_encoding);
+        appender.put(c.lookup_encoding);
+        appender.put(d.lookup_encoding);
+    }
+
+    // If it's a clean multiple of 3, then it requires no padding.
+    // If not, then we need to add padding.
+    if (input.length % 3 != 0)
+    {
+        window = input[window.length .. input.length];
+
+        uint group = (window[0] << 24);
+
+        if (window.length == 1) {
+            const(ubyte) a = (group >> 26) & 0x3f;
+            const(ubyte) b = (group >> 20) & 0x3f;
+            appender.put(a.lookup_encoding);
+            appender.put(b.lookup_encoding);
+            appender.put('=');
+            appender.put('=');
+        }
+        else {
+            // Just in case math fails or something
+            assert(window.length == 2);
+
+            group |= (window[1] << 16);
+            const(ubyte) a = (group >> 26) & 0x3f;
+            const(ubyte) b = (group >> 20) & 0x3f;
+            const(ubyte) c = (group >> 14) & 0x3f;
+            appender.put(a.lookup_encoding);
+            appender.put(b.lookup_encoding);
+            appender.put(c.lookup_encoding);
+            appender.put('=');
+        }
+    }
+}
+
+/// Test encoding of data which has a length that can be cleanly
+/// encoded.
+unittest
+{
+    // 3 bytes
+    {
+        ubyte[] data = cast(ubyte[])"ABC";
+        assert(data.encodeBase64 == cast(ubyte[])"QUJD");
+    }
+
+    // 6 bytes
+    {
+        ubyte[] data = cast(ubyte[])"ABCDEF";
+        assert(data.encodeBase64 == cast(ubyte[])"QUJDREVG");
+    }
+
+    // 9 bytes
+    {
+        ubyte[] data = cast(ubyte[])"ABCDEFGHI";
+        assert(data.encodeBase64 == cast(ubyte[])"QUJDREVGR0hJ");
+    }
+
+    // 12 bytes
+    {
+        ubyte[] data = cast(ubyte[])"ABCDEFGHIJKL";
+        assert(data.encodeBase64 == cast(ubyte[])"QUJDREVGR0hJSktM");
+    }
+}
+
+/// Test encoding of data which has a length which CANNOT be cleanly encoded.
+/// This typically means that there's padding.
+unittest
+{
+    // 1 byte 
+    {
+        ubyte[] data = cast(ubyte[])"A";
+        assert(data.encodeBase64 == cast(ubyte[])"QQ==");
+    }
+    // 2 bytes
+    {
+        ubyte[] data = cast(ubyte[])"AB";
+        assert(data.encodeBase64 == cast(ubyte[])"QUI=");
+    }
+    // 4 bytes
+    {
+        ubyte[] data = [0xDE, 0xAD, 0xBA, 0xBE];
+        assert(data.encodeBase64 == cast(ubyte[])"3q26vg==");
+    }
+    // 37 bytes
+    {
+        ubyte[] data = cast(ubyte[])"A Very Very Very Very Large Test Blob";
+        assert(data.encodeBase64 == cast(ubyte[])"QSBWZXJ5IFZlcnkgVmVyeSBWZXJ5IExhcmdlIFRlc3QgQmxvYg==");
+    }
+}
+
+/// Make sure we can decode what we encode.
+unittest
+{
+    // Test an example string
+    {
+        enum ubyte[] data = cast(ubyte[])"abc123!?$*&()'-=@~";
+        assert(data.encodeBase64.decodeBase64 == data);
+    }
+    // Test an example from Ion data
+    {
+        enum ubyte[] data = cast(ubyte[])"a b c d e f g h i j k l m n o p q r s t u v w x y z";
+        assert(data.encodeBase64.decodeBase64 == data);
+    }
+}
\ No newline at end of file

From 74bc1d4a05ecf5823e491a9bebf0d51c2ef5969b Mon Sep 17 00:00:00 2001
From: hatf0 <harrison@0xcc.pw>
Date: Wed, 17 Nov 2021 10:24:31 -0500
Subject: [PATCH 2/4] Add fixes, improve coverage

---
 source/mir/base64.d | 274 ++++++++++++++++++++++++++++++--------------
 1 file changed, 191 insertions(+), 83 deletions(-)

diff --git a/source/mir/base64.d b/source/mir/base64.d
index 08933e46..c645ee2f 100644
--- a/source/mir/base64.d
+++ b/source/mir/base64.d
@@ -7,7 +7,6 @@ Copyright: 2021 Harrison Ford, Kaleidic Associates Advisory Limited, Symmetry In
 +/
 module mir.base64;
 import mir.ndslice.topology;
-import core.bitop : bswap;
 
 package static immutable base64DecodeInvalidCharMsg = "Invalid character encountered.";
 package static immutable base64DecodeInvalidLenMsg = "Cannot decode a buffer with given length (not a multiple of 4, missing padding?)";
@@ -20,7 +19,7 @@ version(D_Exceptions) {
 // Needs further testing to figure out if it *does* work on them.
 
 // Technique borrowed from http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html#branchless-code-for-lookup-table
-private char lookup_encoding(ubyte i) @safe @nogc pure {
+private char lookup_encoding(char PlusChar = '+', char SlashChar = '/')(ubyte i) @safe @nogc pure {
     assert(i < 64);
 
     ubyte shift;
@@ -43,19 +42,19 @@ private char lookup_encoding(ubyte i) @safe @nogc pure {
     else if (i == 62)
     {
         // character plus
-        shift = cast(ubyte)('+' - 62);
+        shift = cast(ubyte)(PlusChar - 62);
     }
     else if (i == 63)
     {
         // character slash
-        shift = cast(ubyte)('/' - 63);
+        shift = cast(ubyte)(SlashChar - 63);
     }
 
     return cast(char)(i + shift);
 }
 
 // Do the inverse of above (convert an ASCII value into the Base64 character set)
-private ubyte lookup_decoding(char i) @safe @nogc pure
+private ubyte lookup_decoding(char PlusChar = '+', char SlashChar = '/')(char i) @safe @nogc pure
 {
     // Branching bad, but this isn't performance sensitive
     if (i <= 'Z' && i >= 'A') {
@@ -67,10 +66,10 @@ private ubyte lookup_decoding(char i) @safe @nogc pure
     else if (i <= '9' && i >= '0') {
         return cast(ubyte)(i - '0' + 52);
     }
-    else if (i == '+') {
+    else if (i == PlusChar) {
         return 62;
     }
-    else if (i == '/') {
+    else if (i == SlashChar) {
         return 63;
     }
     // Just return 0 for padding,
@@ -91,22 +90,25 @@ private ubyte lookup_decoding(char i) @safe @nogc pure
 /++
 Decode a Base64 encoded value, returning the buffer.
 +/
-ubyte[] decodeBase64(scope ubyte[] buf) @safe pure
+ubyte[] decodeBase64(char PlusChar = '+', char SlashChar = '/')(scope const(char)[] data) @safe pure
 {
     import mir.appender : scopedBuffer;
     auto app = scopedBuffer!ubyte;
-    decodeBase64(buf, app);
+    decodeBase64!(PlusChar, SlashChar)(data, app);
     return app.data.dup;
 }
 
 /++
 Decode a Base64 encoded value, placing the result onto an Appender.
 +/
-void decodeBase64(Appender)(scope ubyte[] input, ref Appender appender) @safe @nogc pure
+void decodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope const(char)[] data,
+                                                                       scope return ref Appender appender) @safe pure
 {
+    import mir.ndslice.slice : sliced;
+    import mir.ndslice.chunks : chunks;
     // We expect data should be well-formed (with padding),
     // so we should throw if it is not well-formed.
-    if (input.length % 4 != 0)
+    if (data.length % 4 != 0)
     {
         version(D_Exceptions) {
             throw base64DecodeInvalidLenException;
@@ -114,47 +116,93 @@ void decodeBase64(Appender)(scope ubyte[] input, ref Appender appender) @safe @n
             assert(0, base64DecodeInvalidLenMsg);
         }
     }
-    foreach(group; input.bytegroup!(4, uint).map!bswap)
+    
+    ubyte[3] decodedByteGroup;
+    ubyte sz = 0;
+    auto groups = data.sliced.chunks(4);
+    for (size_t i = 0; i < groups.length; i++)
     {
-        // We only expect valid ASCII values for these,
-        // hence the 0x7f.
-        const(ubyte) a = lookup_decoding((group >> 24) & 0x7f);
-        const(ubyte) b = lookup_decoding((group >> 16) & 0x7f);
-        const(ubyte) c = lookup_decoding((group >> 8) & 0x7f);
-        const(ubyte) d = lookup_decoding((group) & 0x7f);
-
-        // We do the inverse of how we encoded it...
-        uint transformed_group = (a << 26) | (b << 20) | (c << 14) | (d << 8);
-
-        const(ubyte) t_a = (transformed_group >> 24) & 0xff;
-        const(ubyte) t_b = (transformed_group >> 16) & 0xff;
-        const(ubyte) t_c = (transformed_group >> 8) & 0xff;
-        const(ubyte) t_d = (transformed_group) & 0xff;
-
-        // We should *always* have enough for at least
-        // one, but we don't need to have enough for the rest..
-        appender.put(t_a);
-
-        // Only emit transformed groups if we have enough data for them.
-        if (t_b == 0 && t_c == 0 && t_d == 0)
+        auto group = groups[i];
+
+        ubyte[4] decodedBytes;
+        decodedBytes[0] = lookup_decoding!(PlusChar, SlashChar)(group[0]);
+        decodedBytes[1] = lookup_decoding!(PlusChar, SlashChar)(group[1]);
+
+        uint transformed_group = (decodedBytes[0] << 26) | (decodedBytes[1] << 20);
+
+        // According to RFC4648 Section 3.3, we don't have to accept extra padding characters,
+        // and we can safely throw (and stay within spec).
+        // x=== is also invalid, so we can just throw on that here.
+        if (group[0] == '=' || group[1] == '=')
         {
-            return;
+            version(D_Exceptions)
+                throw base64DecodeInvalidCharException;
+            else
+                assert(0, base64DecodeInvalidCharMsg);
         }
-        else if (t_c == 0 && t_d == 0)
+
+        // xx=(=)?
+        if (group[2] == '=')
         {
-            appender.put(t_b);
-        } 
-        else if (t_d == 0)
+            // If we are not at the end of a string, according to RFC4648,
+            // we can safely treat a padding character as "non-alphabet data",
+            // and as such, we should throw. See RFC4648 Section 3.3 for more information
+            if (i != (groups.length - 1))
+            {
+                version(D_Exceptions)
+                    throw base64DecodeInvalidCharException;
+                else
+                    assert(0, base64DecodeInvalidCharMsg);
+            }
+
+            if (group[3] == '=')
+            {
+                // xx==
+                sz = 1;
+            }
+            // xx=x (invalid)
+            // Padding should not be in the middle of a chunk
+            else
+            {
+                version(D_Exceptions)
+                    throw base64DecodeInvalidCharException;
+                else
+                    assert(0, base64DecodeInvalidCharMsg);
+            }
+        }
+        // xxx=
+        else if (group[3] == '=')
         {
-            appender.put(t_b);
-            appender.put(t_c);
+            // If we are not at the end of a string, according to RFC4648,
+            // we can safely treat a padding character as "non-alphabet data",
+            // and as such, we should throw. See RFC4648 Section 3.3 for more information
+            if (i != (groups.length - 1))
+            {
+                version(D_Exceptions)
+                    throw base64DecodeInvalidCharException;
+                else
+                    assert(0, base64DecodeInvalidCharMsg);
+            }
+
+            decodedBytes[2] = lookup_decoding!(PlusChar, SlashChar)(group[2]);
+            transformed_group |= (decodedBytes[2] << 14);
+            sz = 2;
         }
-        else
+        // xxxx
+        else 
         {
-            appender.put(t_b);
-            appender.put(t_c);
-            appender.put(t_d);
+            decodedBytes[2] = lookup_decoding!(PlusChar, SlashChar)(group[2]);
+            decodedBytes[3] = lookup_decoding!(PlusChar, SlashChar)(group[3]);
+            transformed_group |= ((decodedBytes[2] << 14) | (decodedBytes[3] << 8)); 
+            sz = 3;
         }
+
+        decodedByteGroup[0] = (transformed_group >> 24) & 0xff;
+        decodedByteGroup[1] = (transformed_group >> 16) & 0xff;
+        decodedByteGroup[2] = (transformed_group >> 8) & 0xff;
+
+        // Only emit the transformed bytes that we got data for. 
+        appender.put(decodedByteGroup[0 .. sz]);
     }
 }
 
@@ -163,55 +211,113 @@ void decodeBase64(Appender)(scope ubyte[] input, ref Appender appender) @safe @n
 unittest
 {
     {
-        ubyte[] data = cast(ubyte[])"QUJD";
+        enum data = "QUJD";
         assert(data.decodeBase64 == "ABC");
     }
 
     {
-        ubyte[] data = cast(ubyte[])"QQ==";
+        enum data = "QQ==";
         assert(data.decodeBase64 == "A");
     }
 
     {
-        ubyte[] data = cast(ubyte[])"YSBiIGMgZCBlIGYgZyBoIGkgaiBrIGwgbSBuIG8gcCBxIHIgcyB0IHUgdiB3IHggeSB6";
+        enum data = "YSBiIGMgZCBlIGYgZyBoIGkgaiBrIGwgbSBuIG8gcCBxIHIgcyB0IHUgdiB3IHggeSB6";
         assert(data.decodeBase64 == "a b c d e f g h i j k l m n o p q r s t u v w x y z");
     }
 
     {
-        ubyte[] data = cast(ubyte[])"LCAuIDsgLyBbICcgXSBcID0gLSAwIDkgOCA3IDYgNSA0IDMgMiAxIGAgfiAhIEAgIyAkICUgXiAmICogKCApIF8gKyB8IDogPCA+ID8=";
+        enum data = "LCAuIDsgLyBbICcgXSBcID0gLSAwIDkgOCA3IDYgNSA0IDMgMiAxIGAgfiAhIEAgIyAkICUgXiAmICogKCApIF8gKyB8IDogPCA+ID8=";
         assert(data.decodeBase64 == ", . ; / [ ' ] \\ = - 0 9 8 7 6 5 4 3 2 1 ` ~ ! @ # $ % ^ & * ( ) _ + | : < > ?");
     }
+
+    {
+        enum data = "AAA=";
+        assert(data.decodeBase64 == "\x00\x00");
+    }
+
+    {
+        enum data = "AAAABBCC";
+        assert(data.decodeBase64 == "\x00\x00\x00\x04\x10\x82");
+    }
+
+    {
+        enum data = "AA==";
+        assert(data.decodeBase64 == "\x00");
+    }
+    
+    {
+        enum data = "AA/=";
+        assert(data.decodeBase64 == "\x00\x0f");
+    }
+}
+
+/// Test decoding invalid data
+unittest
+{
+    void testFail(const(char)[] input)
+    {
+        bool thrown = false;
+        try {
+            ubyte[] decoded = input.decodeBase64;
+        } catch (Throwable t) {
+            thrown = true;
+        }
+
+        assert(thrown);
+    }
+
+    testFail("===A");
+    testFail("A=");
+    testFail("AA=");
+    testFail("A=AA");
+    testFail("AA=A");
+    testFail("AA=A====");
+    testFail("=AAA");
+    testFail("AAA=QUJD");
+    // This fails because we don't allow extra padding (than what is necessary)
+    testFail("AA======");
+    // This fails because we don't allow padding before the end of the string (otherwise we'd have a side-channel)
+    testFail("QU==QUJD");
+    testFail("QU======QUJD");
+    // Invalid data that's out of the alphabet
+    testFail("!@##@@!@");
 }
 
 /++
 Encode a ubyte array as Base64, returning the encoded value.
 +/
-ubyte[] encodeBase64(scope ubyte[] buf) @safe pure
+const(char)[] encodeBase64(char PlusChar = '+', char SlashChar = '/')(scope const(ubyte)[] buf) @safe pure
 {
     import mir.appender : scopedBuffer;
-    auto app = scopedBuffer!ubyte;
-    encodeBase64(buf, app);
+    // XXX: is a stringBuf more appropriate here?
+    auto app = scopedBuffer!char;
+    encodeBase64!(PlusChar, SlashChar)(buf, app);
     return app.data.dup;
 }
 
 /++
 Encode a ubyte array as Base64, placing the result onto an Appender.
 +/
-void encodeBase64(Appender)(scope ubyte[] input, ref Appender appender) @safe @nogc pure
+void encodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope const(ubyte)[] input,
+                                                                       scope return ref Appender appender) @safe pure
 {
+    import mir.ndslice.topology : bytegroup, map;
+    import core.bitop : bswap;
     // Slice our input array so that n % 3 == 0 (we have a multiple of 3) 
     // If we have less then 3, then this is effectively a no-op (will result in a 0-length slice)
-    ubyte[] window = input[0 .. input.length - (input.length % 3)];
+    char[4] encodedByteGroup;
+    const(ubyte)[] window = input[0 .. input.length - (input.length % 3)];
     foreach(group; window.bytegroup!(3, uint).map!bswap) {
         const(ubyte) a = (group >> 26) & 0x3f;
         const(ubyte) b = (group >> 20) & 0x3f;
         const(ubyte) c = (group >> 14) & 0x3f;
         const(ubyte) d = (group >> 8) & 0x3f;
 
-        appender.put(a.lookup_encoding);
-        appender.put(b.lookup_encoding);
-        appender.put(c.lookup_encoding);
-        appender.put(d.lookup_encoding);
+        encodedByteGroup[0] = a.lookup_encoding!(PlusChar, SlashChar);
+        encodedByteGroup[1] = b.lookup_encoding!(PlusChar, SlashChar);
+        encodedByteGroup[2] = c.lookup_encoding!(PlusChar, SlashChar);
+        encodedByteGroup[3] = d.lookup_encoding!(PlusChar, SlashChar);
+        appender.put(encodedByteGroup[]);
     }
 
     // If it's a clean multiple of 3, then it requires no padding.
@@ -225,10 +331,11 @@ void encodeBase64(Appender)(scope ubyte[] input, ref Appender appender) @safe @n
         if (window.length == 1) {
             const(ubyte) a = (group >> 26) & 0x3f;
             const(ubyte) b = (group >> 20) & 0x3f;
-            appender.put(a.lookup_encoding);
-            appender.put(b.lookup_encoding);
-            appender.put('=');
-            appender.put('=');
+            encodedByteGroup[0] = a.lookup_encoding!(PlusChar, SlashChar);
+            encodedByteGroup[1] = b.lookup_encoding!(PlusChar, SlashChar);
+            encodedByteGroup[2] = '=';
+            encodedByteGroup[3] = '=';
+            appender.put(encodedByteGroup[]);
         }
         else {
             // Just in case math fails or something
@@ -238,10 +345,11 @@ void encodeBase64(Appender)(scope ubyte[] input, ref Appender appender) @safe @n
             const(ubyte) a = (group >> 26) & 0x3f;
             const(ubyte) b = (group >> 20) & 0x3f;
             const(ubyte) c = (group >> 14) & 0x3f;
-            appender.put(a.lookup_encoding);
-            appender.put(b.lookup_encoding);
-            appender.put(c.lookup_encoding);
-            appender.put('=');
+            encodedByteGroup[0] = a.lookup_encoding!(PlusChar, SlashChar);
+            encodedByteGroup[1] = b.lookup_encoding!(PlusChar, SlashChar);
+            encodedByteGroup[2] = c.lookup_encoding!(PlusChar, SlashChar);
+            encodedByteGroup[3] = '=';
+            appender.put(encodedByteGroup[]);
         }
     }
 }
@@ -252,26 +360,26 @@ unittest
 {
     // 3 bytes
     {
-        ubyte[] data = cast(ubyte[])"ABC";
-        assert(data.encodeBase64 == cast(ubyte[])"QUJD");
+        enum data = cast(ubyte[])"ABC";
+        assert(data.encodeBase64 == "QUJD");
     }
 
     // 6 bytes
     {
-        ubyte[] data = cast(ubyte[])"ABCDEF";
-        assert(data.encodeBase64 == cast(ubyte[])"QUJDREVG");
+        enum data = cast(ubyte[])"ABCDEF";
+        assert(data.encodeBase64 == "QUJDREVG");
     }
 
     // 9 bytes
     {
-        ubyte[] data = cast(ubyte[])"ABCDEFGHI";
-        assert(data.encodeBase64 == cast(ubyte[])"QUJDREVGR0hJ");
+        enum data = cast(ubyte[])"ABCDEFGHI";
+        assert(data.encodeBase64 == "QUJDREVGR0hJ");
     }
 
     // 12 bytes
     {
-        ubyte[] data = cast(ubyte[])"ABCDEFGHIJKL";
-        assert(data.encodeBase64 == cast(ubyte[])"QUJDREVGR0hJSktM");
+        enum data = cast(ubyte[])"ABCDEFGHIJKL";
+        assert(data.encodeBase64 == "QUJDREVGR0hJSktM");
     }
 }
 
@@ -281,23 +389,23 @@ unittest
 {
     // 1 byte 
     {
-        ubyte[] data = cast(ubyte[])"A";
-        assert(data.encodeBase64 == cast(ubyte[])"QQ==");
+        enum data = cast(ubyte[])"A";
+        assert(data.encodeBase64 == "QQ==");
     }
     // 2 bytes
     {
-        ubyte[] data = cast(ubyte[])"AB";
-        assert(data.encodeBase64 == cast(ubyte[])"QUI=");
+        enum data = cast(ubyte[])"AB";
+        assert(data.encodeBase64 == "QUI=");
     }
     // 4 bytes
     {
-        ubyte[] data = [0xDE, 0xAD, 0xBA, 0xBE];
-        assert(data.encodeBase64 == cast(ubyte[])"3q26vg==");
+        enum data = [0xDE, 0xAD, 0xBA, 0xBE];
+        assert(data.encodeBase64 == "3q26vg==");
     }
     // 37 bytes
     {
-        ubyte[] data = cast(ubyte[])"A Very Very Very Very Large Test Blob";
-        assert(data.encodeBase64 == cast(ubyte[])"QSBWZXJ5IFZlcnkgVmVyeSBWZXJ5IExhcmdlIFRlc3QgQmxvYg==");
+        enum data = cast(ubyte[])"A Very Very Very Very Large Test Blob";
+        assert(data.encodeBase64 == "QSBWZXJ5IFZlcnkgVmVyeSBWZXJ5IExhcmdlIFRlc3QgQmxvYg==");
     }
 }
 
@@ -306,12 +414,12 @@ unittest
 {
     // Test an example string
     {
-        enum ubyte[] data = cast(ubyte[])"abc123!?$*&()'-=@~";
+        enum data = cast(ubyte[])"abc123!?$*&()'-=@~";
         assert(data.encodeBase64.decodeBase64 == data);
     }
     // Test an example from Ion data
     {
-        enum ubyte[] data = cast(ubyte[])"a b c d e f g h i j k l m n o p q r s t u v w x y z";
+        enum data = cast(ubyte[])"a b c d e f g h i j k l m n o p q r s t u v w x y z";
         assert(data.encodeBase64.decodeBase64 == data);
     }
 }
\ No newline at end of file

From 217898ee187ddce13a245cb5fd9e42633e3c3384 Mon Sep 17 00:00:00 2001
From: hatf0 <harrison@0xcc.pw>
Date: Wed, 17 Nov 2021 10:32:41 -0500
Subject: [PATCH 3/4] Fix build

---
 source/mir/base64.d | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/source/mir/base64.d b/source/mir/base64.d
index c645ee2f..f305e514 100644
--- a/source/mir/base64.d
+++ b/source/mir/base64.d
@@ -104,8 +104,6 @@ Decode a Base64 encoded value, placing the result onto an Appender.
 void decodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope const(char)[] data,
                                                                        scope return ref Appender appender) @safe pure
 {
-    import mir.ndslice.slice : sliced;
-    import mir.ndslice.chunks : chunks;
     // We expect data should be well-formed (with padding),
     // so we should throw if it is not well-formed.
     if (data.length % 4 != 0)
@@ -119,10 +117,12 @@ void decodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope con
     
     ubyte[3] decodedByteGroup;
     ubyte sz = 0;
-    auto groups = data.sliced.chunks(4);
-    for (size_t i = 0; i < groups.length; i++)
+    
+    // We can't use mir.ndslice.chunk.chunks here, as it violates
+    // the scope requirements.
+    for (size_t i = 0; i < data.length; i += 4)
     {
-        auto group = groups[i];
+        auto group = data[i .. (i + 4)];
 
         ubyte[4] decodedBytes;
         decodedBytes[0] = lookup_decoding!(PlusChar, SlashChar)(group[0]);
@@ -147,7 +147,7 @@ void decodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope con
             // If we are not at the end of a string, according to RFC4648,
             // we can safely treat a padding character as "non-alphabet data",
             // and as such, we should throw. See RFC4648 Section 3.3 for more information
-            if (i != (groups.length - 1))
+            if ((i / 4) != ((data.length / 4) - 1))
             {
                 version(D_Exceptions)
                     throw base64DecodeInvalidCharException;
@@ -176,7 +176,7 @@ void decodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope con
             // If we are not at the end of a string, according to RFC4648,
             // we can safely treat a padding character as "non-alphabet data",
             // and as such, we should throw. See RFC4648 Section 3.3 for more information
-            if (i != (groups.length - 1))
+            if ((i / 4) != ((data.length / 4) - 1))
             {
                 version(D_Exceptions)
                     throw base64DecodeInvalidCharException;

From 511ab90a173b6b2f553d74b0d4e0ee5293ae4bd4 Mon Sep 17 00:00:00 2001
From: hatf0 <harrison@0xcc.pw>
Date: Fri, 19 Nov 2021 10:59:06 -0500
Subject: [PATCH 4/4] Remove Kaledic from copyright, add fixes requested

---
 source/mir/base64.d | 133 +++++++++++++++++++++++++++-----------------
 1 file changed, 81 insertions(+), 52 deletions(-)

diff --git a/source/mir/base64.d b/source/mir/base64.d
index f305e514..9b754f93 100644
--- a/source/mir/base64.d
+++ b/source/mir/base64.d
@@ -3,7 +3,7 @@ $(H1 @nogc Simple Base64 parsing)
 
 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
 Authors: Harrison Ford
-Copyright: 2021 Harrison Ford, Kaleidic Associates Advisory Limited, Symmetry Investments
+Copyright: 2021 Harrison Ford, Symmetry Investments
 +/
 module mir.base64;
 import mir.ndslice.topology;
@@ -19,7 +19,7 @@ version(D_Exceptions) {
 // Needs further testing to figure out if it *does* work on them.
 
 // Technique borrowed from http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html#branchless-code-for-lookup-table
-private char lookup_encoding(char PlusChar = '+', char SlashChar = '/')(ubyte i) @safe @nogc pure {
+private char lookup_encoding(ubyte i, char plusChar = '+', char slashChar = '/') @safe @nogc pure {
     assert(i < 64);
 
     ubyte shift;
@@ -42,19 +42,19 @@ private char lookup_encoding(char PlusChar = '+', char SlashChar = '/')(ubyte i)
     else if (i == 62)
     {
         // character plus
-        shift = cast(ubyte)(PlusChar - 62);
+        shift = cast(ubyte)(plusChar - 62);
     }
     else if (i == 63)
     {
         // character slash
-        shift = cast(ubyte)(SlashChar - 63);
+        shift = cast(ubyte)(slashChar - 63);
     }
 
     return cast(char)(i + shift);
 }
 
 // Do the inverse of above (convert an ASCII value into the Base64 character set)
-private ubyte lookup_decoding(char PlusChar = '+', char SlashChar = '/')(char i) @safe @nogc pure
+private ubyte lookup_decoding(char i, char plusChar = '+', char slashChar = '/') @safe @nogc pure
 {
     // Branching bad, but this isn't performance sensitive
     if (i <= 'Z' && i >= 'A') {
@@ -66,10 +66,10 @@ private ubyte lookup_decoding(char PlusChar = '+', char SlashChar = '/')(char i)
     else if (i <= '9' && i >= '0') {
         return cast(ubyte)(i - '0' + 52);
     }
-    else if (i == PlusChar) {
+    else if (i == plusChar) {
         return 62;
     }
-    else if (i == SlashChar) {
+    else if (i == slashChar) {
         return 63;
     }
     // Just return 0 for padding,
@@ -90,19 +90,21 @@ private ubyte lookup_decoding(char PlusChar = '+', char SlashChar = '/')(char i)
 /++
 Decode a Base64 encoded value, returning the buffer.
 +/
-ubyte[] decodeBase64(char PlusChar = '+', char SlashChar = '/')(scope const(char)[] data) @safe pure
+ubyte[] decodeBase64(scope const(char)[] data, char plusChar = '+', char slashChar = '/') @safe pure
 {
     import mir.appender : scopedBuffer;
     auto app = scopedBuffer!ubyte;
-    decodeBase64!(PlusChar, SlashChar)(data, app);
+    decodeBase64(data, app, plusChar, slashChar);
     return app.data.dup;
 }
 
 /++
 Decode a Base64 encoded value, placing the result onto an Appender.
 +/
-void decodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope const(char)[] data,
-                                                                       scope return ref Appender appender) @safe pure
+void decodeBase64(Appender)(scope const(char)[] data,
+                            scope return ref Appender appender,
+                            char plusChar = '+',
+                            char slashChar = '/') @safe pure
 {
     // We expect data should be well-formed (with padding),
     // so we should throw if it is not well-formed.
@@ -117,7 +119,7 @@ void decodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope con
     
     ubyte[3] decodedByteGroup;
     ubyte sz = 0;
-    
+
     // We can't use mir.ndslice.chunk.chunks here, as it violates
     // the scope requirements.
     for (size_t i = 0; i < data.length; i += 4)
@@ -125,8 +127,8 @@ void decodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope con
         auto group = data[i .. (i + 4)];
 
         ubyte[4] decodedBytes;
-        decodedBytes[0] = lookup_decoding!(PlusChar, SlashChar)(group[0]);
-        decodedBytes[1] = lookup_decoding!(PlusChar, SlashChar)(group[1]);
+        decodedBytes[0] = lookup_decoding(group[0], plusChar, slashChar);
+        decodedBytes[1] = lookup_decoding(group[1], plusChar, slashChar);
 
         uint transformed_group = (decodedBytes[0] << 26) | (decodedBytes[1] << 20);
 
@@ -184,15 +186,15 @@ void decodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope con
                     assert(0, base64DecodeInvalidCharMsg);
             }
 
-            decodedBytes[2] = lookup_decoding!(PlusChar, SlashChar)(group[2]);
+            decodedBytes[2] = lookup_decoding(group[2], plusChar, slashChar);
             transformed_group |= (decodedBytes[2] << 14);
             sz = 2;
         }
         // xxxx
         else 
         {
-            decodedBytes[2] = lookup_decoding!(PlusChar, SlashChar)(group[2]);
-            decodedBytes[3] = lookup_decoding!(PlusChar, SlashChar)(group[3]);
+            decodedBytes[2] = lookup_decoding(group[2], plusChar, slashChar);
+            decodedBytes[3] = lookup_decoding(group[3], plusChar, slashChar);
             transformed_group |= ((decodedBytes[2] << 14) | (decodedBytes[3] << 8)); 
             sz = 3;
         }
@@ -208,7 +210,7 @@ void decodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope con
 
 /// Test decoding of data which has a length which can be
 /// cleanly decoded.
-unittest
+@safe pure unittest
 {
     {
         enum data = "QUJD";
@@ -252,14 +254,14 @@ unittest
 }
 
 /// Test decoding invalid data
-unittest
+@safe pure unittest
 {
-    void testFail(const(char)[] input)
+    void testFail(const(char)[] input) @safe pure
     {
         bool thrown = false;
         try {
             ubyte[] decoded = input.decodeBase64;
-        } catch (Throwable t) {
+        } catch (Exception t) {
             thrown = true;
         }
 
@@ -286,23 +288,24 @@ unittest
 /++
 Encode a ubyte array as Base64, returning the encoded value.
 +/
-const(char)[] encodeBase64(char PlusChar = '+', char SlashChar = '/')(scope const(ubyte)[] buf) @safe pure
+const(char)[] encodeBase64(scope const(ubyte)[] buf, char plusChar = '+', char slashChar = '/') @safe pure
 {
     import mir.appender : scopedBuffer;
-    // XXX: is a stringBuf more appropriate here?
     auto app = scopedBuffer!char;
-    encodeBase64!(PlusChar, SlashChar)(buf, app);
+    encodeBase64(buf, app, plusChar, slashChar);
     return app.data.dup;
 }
 
 /++
 Encode a ubyte array as Base64, placing the result onto an Appender.
 +/
-void encodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope const(ubyte)[] input,
-                                                                       scope return ref Appender appender) @safe pure
+void encodeBase64(Appender)(scope const(ubyte)[] input,
+                            scope return ref Appender appender,
+                            char plusChar = '+',
+                            char slashChar = '/') @safe pure
 {
-    import mir.ndslice.topology : bytegroup, map;
     import core.bitop : bswap;
+    import mir.ndslice.topology : bytegroup, map;
     // Slice our input array so that n % 3 == 0 (we have a multiple of 3) 
     // If we have less then 3, then this is effectively a no-op (will result in a 0-length slice)
     char[4] encodedByteGroup;
@@ -313,10 +316,10 @@ void encodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope con
         const(ubyte) c = (group >> 14) & 0x3f;
         const(ubyte) d = (group >> 8) & 0x3f;
 
-        encodedByteGroup[0] = a.lookup_encoding!(PlusChar, SlashChar);
-        encodedByteGroup[1] = b.lookup_encoding!(PlusChar, SlashChar);
-        encodedByteGroup[2] = c.lookup_encoding!(PlusChar, SlashChar);
-        encodedByteGroup[3] = d.lookup_encoding!(PlusChar, SlashChar);
+        encodedByteGroup[0] = a.lookup_encoding(plusChar, slashChar);
+        encodedByteGroup[1] = b.lookup_encoding(plusChar, slashChar);
+        encodedByteGroup[2] = c.lookup_encoding(plusChar, slashChar);
+        encodedByteGroup[3] = d.lookup_encoding(plusChar, slashChar);
         appender.put(encodedByteGroup[]);
     }
 
@@ -331,72 +334,77 @@ void encodeBase64(char PlusChar = '+', char SlashChar = '/', Appender)(scope con
         if (window.length == 1) {
             const(ubyte) a = (group >> 26) & 0x3f;
             const(ubyte) b = (group >> 20) & 0x3f;
-            encodedByteGroup[0] = a.lookup_encoding!(PlusChar, SlashChar);
-            encodedByteGroup[1] = b.lookup_encoding!(PlusChar, SlashChar);
+            encodedByteGroup[0] = a.lookup_encoding(plusChar, slashChar);
+            encodedByteGroup[1] = b.lookup_encoding(plusChar, slashChar);
             encodedByteGroup[2] = '=';
             encodedByteGroup[3] = '=';
-            appender.put(encodedByteGroup[]);
         }
         else {
-            // Just in case math fails or something
+            // Just in case 
             assert(window.length == 2);
 
             group |= (window[1] << 16);
             const(ubyte) a = (group >> 26) & 0x3f;
             const(ubyte) b = (group >> 20) & 0x3f;
             const(ubyte) c = (group >> 14) & 0x3f;
-            encodedByteGroup[0] = a.lookup_encoding!(PlusChar, SlashChar);
-            encodedByteGroup[1] = b.lookup_encoding!(PlusChar, SlashChar);
-            encodedByteGroup[2] = c.lookup_encoding!(PlusChar, SlashChar);
+            encodedByteGroup[0] = a.lookup_encoding(plusChar, slashChar);
+            encodedByteGroup[1] = b.lookup_encoding(plusChar, slashChar);
+            encodedByteGroup[2] = c.lookup_encoding(plusChar, slashChar);
             encodedByteGroup[3] = '=';
-            appender.put(encodedByteGroup[]);
         }
+
+        appender.put(encodedByteGroup[]);
     }
 }
 
 /// Test encoding of data which has a length that can be cleanly
 /// encoded.
-unittest
+@safe pure unittest
 {
     // 3 bytes
     {
-        enum data = cast(ubyte[])"ABC";
+        enum data = cast(immutable(ubyte)[])"ABC";
         assert(data.encodeBase64 == "QUJD");
     }
 
     // 6 bytes
     {
-        enum data = cast(ubyte[])"ABCDEF";
+        enum data = cast(immutable(ubyte)[])"ABCDEF";
         assert(data.encodeBase64 == "QUJDREVG");
     }
 
     // 9 bytes
     {
-        enum data = cast(ubyte[])"ABCDEFGHI";
+        enum data = cast(immutable(ubyte)[])"ABCDEFGHI";
         assert(data.encodeBase64 == "QUJDREVGR0hJ");
     }
 
     // 12 bytes
     {
-        enum data = cast(ubyte[])"ABCDEFGHIJKL";
+        enum data = cast(immutable(ubyte)[])"ABCDEFGHIJKL";
         assert(data.encodeBase64 == "QUJDREVGR0hJSktM");
     }
 }
 
 /// Test encoding of data which has a length which CANNOT be cleanly encoded.
 /// This typically means that there's padding.
-unittest
+@safe pure unittest
 {
     // 1 byte 
     {
-        enum data = cast(ubyte[])"A";
+        enum data = cast(immutable(ubyte)[])"A";
         assert(data.encodeBase64 == "QQ==");
     }
     // 2 bytes
     {
-        enum data = cast(ubyte[])"AB";
+        enum data = cast(immutable(ubyte)[])"AB";
         assert(data.encodeBase64 == "QUI=");
     }
+    // 2 bytes
+    {
+        enum data = [0xFF, 0xFF];
+        assert(data.encodeBase64 == "//8=");
+    }
     // 4 bytes
     {
         enum data = [0xDE, 0xAD, 0xBA, 0xBE];
@@ -404,22 +412,43 @@ unittest
     }
     // 37 bytes
     {
-        enum data = cast(ubyte[])"A Very Very Very Very Large Test Blob";
+        enum data = cast(immutable(ubyte)[])"A Very Very Very Very Large Test Blob";
         assert(data.encodeBase64 == "QSBWZXJ5IFZlcnkgVmVyeSBWZXJ5IExhcmdlIFRlc3QgQmxvYg==");
     }
 }
 
+/// Test nogc encoding
+@safe pure @nogc unittest
+{
+    import mir.appender : scopedBuffer;
+
+    {
+        enum data = cast(immutable(ubyte)[])"A Very Very Very Very Large Test Blob";
+        auto appender = scopedBuffer!char();
+        data.encodeBase64(appender); 
+        assert(appender.data == "QSBWZXJ5IFZlcnkgVmVyeSBWZXJ5IExhcmdlIFRlc3QgQmxvYg==");     
+    }
+
+    {
+        enum data = cast(immutable(ubyte)[])"abc123!?$*&()'-=@~";
+        auto appender = scopedBuffer!char();
+        data.encodeBase64(appender);
+        assert(appender.data == "YWJjMTIzIT8kKiYoKSctPUB+");
+    }
+}
+
 /// Make sure we can decode what we encode.
-unittest
+@safe pure unittest
 {
     // Test an example string
     {
-        enum data = cast(ubyte[])"abc123!?$*&()'-=@~";
+        enum data = cast(immutable(ubyte)[])"abc123!?$*&()'-=@~";
         assert(data.encodeBase64.decodeBase64 == data);
     }
     // Test an example from Ion data
     {
-        enum data = cast(ubyte[])"a b c d e f g h i j k l m n o p q r s t u v w x y z";
+        enum data = cast(immutable(ubyte)[])"a b c d e f g h i j k l m n o p q r s t u v w x y z";
         assert(data.encodeBase64.decodeBase64 == data);
     }
-}
\ No newline at end of file
+}
+