Skip to content

Commit d0b7242

Browse files
committed
Add support for signed and unsigned LEB128 to pack/unpack.
This commit adds a new pack format command `R` and `r` for unsigned and signed LEB128 encoding. The "r" mnemonic is because this is a "vaRiable" length encoding scheme. LEB128 is used in various formats including DWARF, WebAssembly, MQTT, and Protobuf. [Feature #21785]
1 parent 73e930f commit d0b7242

File tree

6 files changed

+223
-3
lines changed

6 files changed

+223
-3
lines changed

doc/language/packed_data.rdoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ These tables summarize the directives for packing and unpacking.
5353

5454
U | UTF-8 character
5555
w | BER-compressed integer
56+
R | LEB128 encoded unsigned integer
57+
r | LEB128 encoded signed integer
5658

5759
=== For Floats
5860

pack.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,56 @@ pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
667667
}
668668
break;
669669

670+
case 'r': /* r for SLEB128 encoding (signed) */
671+
case 'R': /* R for ULEB128 encoding (unsigned) */
672+
{
673+
int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
674+
675+
if (type == 'r') {
676+
pack_flags |= INTEGER_PACK_2COMP;
677+
}
678+
679+
while (len-- > 0) {
680+
size_t numbytes;
681+
int sign;
682+
char *cp;
683+
684+
from = NEXTFROM;
685+
from = rb_to_int(from);
686+
numbytes = rb_absint_numwords(from, 7, NULL);
687+
if (numbytes == 0)
688+
numbytes = 1;
689+
VALUE buf = rb_str_new(NULL, numbytes);
690+
691+
sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, pack_flags);
692+
693+
if (sign < 0 && type == 'R') {
694+
rb_raise(rb_eArgError, "can't encode negative numbers in ULEB128");
695+
}
696+
697+
if (type == 'r') {
698+
/* Check if we need an extra byte for sign extension */
699+
unsigned char last_byte = (unsigned char)RSTRING_PTR(buf)[numbytes - 1];
700+
if ((sign >= 0 && (last_byte & 0x40)) || /* positive but sign bit set */
701+
(sign < 0 && !(last_byte & 0x40))) { /* negative but sign bit clear */
702+
/* Need an extra byte */
703+
rb_str_resize(buf, numbytes + 1);
704+
RSTRING_PTR(buf)[numbytes] = sign < 0 ? 0x7f : 0x00;
705+
numbytes++;
706+
}
707+
}
708+
709+
cp = RSTRING_PTR(buf);
710+
while (1 < numbytes) {
711+
*cp |= 0x80;
712+
cp++;
713+
numbytes--;
714+
}
715+
716+
rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
717+
}
718+
}
719+
break;
670720
case 'u': /* uuencoded string */
671721
case 'm': /* base64 encoded string */
672722
from = NEXTFROM;
@@ -1558,6 +1608,39 @@ pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
15581608
}
15591609
break;
15601610

1611+
case 'r':
1612+
case 'R':
1613+
{
1614+
int pack_flags = INTEGER_PACK_LITTLE_ENDIAN;
1615+
1616+
if (type == 'r') {
1617+
pack_flags |= INTEGER_PACK_2COMP;
1618+
}
1619+
char *s0 = s;
1620+
while (len > 0 && s < send) {
1621+
if (*s & 0x80) {
1622+
s++;
1623+
}
1624+
else {
1625+
s++;
1626+
UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, pack_flags));
1627+
len--;
1628+
s0 = s;
1629+
}
1630+
}
1631+
/* Handle incomplete value and remaining expected values with nil (only if not using *) */
1632+
if (!star) {
1633+
if (s0 != s && len > 0) {
1634+
UNPACK_PUSH(Qnil);
1635+
len--;
1636+
}
1637+
while (len-- > 0) {
1638+
UNPACK_PUSH(Qnil);
1639+
}
1640+
}
1641+
}
1642+
break;
1643+
15611644
case 'w':
15621645
{
15631646
char *s0 = s;
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
require_relative '../../../spec_helper'
2+
require_relative '../fixtures/classes'
3+
require_relative 'shared/basic'
4+
require_relative 'shared/numeric_basic'
5+
require_relative 'shared/integer'
6+
7+
ruby_version_is "4.0" do
8+
describe "Array#pack with format 'R'" do
9+
it_behaves_like :array_pack_basic, 'R'
10+
it_behaves_like :array_pack_basic_non_float, 'R'
11+
it_behaves_like :array_pack_arguments, 'R'
12+
it_behaves_like :array_pack_numeric_basic, 'R'
13+
it_behaves_like :array_pack_integer, 'R'
14+
end
15+
16+
describe "Array#pack with format 'r'" do
17+
it_behaves_like :array_pack_basic, 'r'
18+
it_behaves_like :array_pack_basic_non_float, 'r'
19+
it_behaves_like :array_pack_arguments, 'r'
20+
it_behaves_like :array_pack_numeric_basic, 'r'
21+
it_behaves_like :array_pack_integer, 'r'
22+
end
23+
end

spec/ruby/core/array/pack/shared/basic.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
# NOTE: it's just a plan of the Ruby core team
3838
it "warns that a directive is unknown" do
3939
# additional directive ('a') is required for the X directive
40-
-> { [@obj, @obj].pack("a R" + pack_format) }.should complain(/unknown pack directive 'R'/)
40+
-> { [@obj, @obj].pack("a K" + pack_format) }.should complain(/unknown pack directive 'K'/)
4141
-> { [@obj, @obj].pack("a 0" + pack_format) }.should complain(/unknown pack directive '0'/)
4242
-> { [@obj, @obj].pack("a :" + pack_format) }.should complain(/unknown pack directive ':'/)
4343
end
@@ -48,7 +48,7 @@
4848
# NOTE: Added this case just to not forget about the decision in the ticket
4949
it "raise ArgumentError when a directive is unknown" do
5050
# additional directive ('a') is required for the X directive
51-
-> { [@obj, @obj].pack("a R" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive 'R'/)
51+
-> { [@obj, @obj].pack("a K" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive 'K'/)
5252
-> { [@obj, @obj].pack("a 0" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive '0'/)
5353
-> { [@obj, @obj].pack("a :" + pack_format) }.should raise_error(ArgumentError, /unknown pack directive ':'/)
5454
end

spec/ruby/core/string/unpack/shared/basic.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
ruby_version_is "3.3" do
1313
# https://bugs.ruby-lang.org/issues/19150
1414
it 'raise ArgumentError when a directive is unknown' do
15-
-> { "abcdefgh".unpack("a R" + unpack_format) }.should raise_error(ArgumentError, /unknown unpack directive 'R'/)
15+
-> { "abcdefgh".unpack("a K" + unpack_format) }.should raise_error(ArgumentError, /unknown unpack directive 'K'/)
1616
-> { "abcdefgh".unpack("a 0" + unpack_format) }.should raise_error(ArgumentError, /unknown unpack directive '0'/)
1717
-> { "abcdefgh".unpack("a :" + unpack_format) }.should raise_error(ArgumentError, /unknown unpack directive ':'/)
1818
end

test/ruby/test_pack.rb

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -936,4 +936,116 @@ class Array
936936
assert_equal "oh no", v
937937
end;
938938
end
939+
940+
def test_unpack_broken_R
941+
assert_equal([nil], "\xFF".unpack("R"))
942+
assert_nil("\xFF".unpack1("R"))
943+
assert_equal([nil], "\xFF".unpack("r"))
944+
assert_nil("\xFF".unpack1("r"))
945+
946+
bytes = [256].pack("r")
947+
assert_equal([256, nil, nil, nil], (bytes + "\xFF").unpack("rrrr"))
948+
949+
bytes = [256].pack("R")
950+
assert_equal([256, nil, nil, nil], (bytes + "\xFF").unpack("RRRR"))
951+
952+
assert_equal([], "\xFF".unpack("R*"))
953+
assert_equal([], "\xFF".unpack("r*"))
954+
end
955+
956+
def test_pack_unpack_R
957+
# ULEB128 encoding (unsigned)
958+
assert_equal("\x00", [0].pack("R"))
959+
assert_equal("\x01", [1].pack("R"))
960+
assert_equal("\x7f", [127].pack("R"))
961+
assert_equal("\x80\x01", [128].pack("R"))
962+
assert_equal("\xff\x7f", [0x3fff].pack("R"))
963+
assert_equal("\x80\x80\x01", [0x4000].pack("R"))
964+
assert_equal("\xff\xff\xff\xff\x0f", [0xffffffff].pack("R"))
965+
assert_equal("\x80\x80\x80\x80\x10", [0x100000000].pack("R"))
966+
assert_equal("\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01", [0xffff_ffff_ffff_ffff].pack("R"))
967+
assert_equal("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x1f", [0xffff_ffff_ffff_ffff_ffff_ffff].pack("R"))
968+
969+
# Multiple values
970+
assert_equal("\x01\x02", [1, 2].pack("R*"))
971+
assert_equal("\x7f\x80\x01", [127, 128].pack("R*"))
972+
973+
# Negative numbers should raise an error
974+
assert_raise(ArgumentError) { [-1].pack("R") }
975+
assert_raise(ArgumentError) { [-100].pack("R") }
976+
977+
# Unpack tests
978+
assert_equal([0], "\x00".unpack("R"))
979+
assert_equal([1], "\x01".unpack("R"))
980+
assert_equal([127], "\x7f".unpack("R"))
981+
assert_equal([128], "\x80\x01".unpack("R"))
982+
assert_equal([0x3fff], "\xff\x7f".unpack("R"))
983+
assert_equal([0x4000], "\x80\x80\x01".unpack("R"))
984+
assert_equal([0xffffffff], "\xff\xff\xff\xff\x0f".unpack("R"))
985+
assert_equal([0x100000000], "\x80\x80\x80\x80\x10".unpack("R"))
986+
assert_equal([0xffff_ffff_ffff_ffff], "\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01".unpack("R"))
987+
assert_equal([0xffff_ffff_ffff_ffff_ffff_ffff].pack("R"), "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x1f")
988+
989+
# Multiple values
990+
assert_equal([1, 2], "\x01\x02".unpack("R*"))
991+
assert_equal([127, 128], "\x7f\x80\x01".unpack("R*"))
992+
993+
# Round-trip test
994+
values = [0, 1, 127, 128, 0x3fff, 0x4000, 0xffffffff, 0x100000000]
995+
assert_equal(values, values.pack("R*").unpack("R*"))
996+
end
997+
998+
def test_pack_unpack_r
999+
# SLEB128 encoding (signed)
1000+
assert_equal("\x00", [0].pack("r"))
1001+
assert_equal("\x01", [1].pack("r"))
1002+
assert_equal("\x7f", [-1].pack("r"))
1003+
assert_equal("\x7e", [-2].pack("r"))
1004+
assert_equal("\xff\x00", [127].pack("r"))
1005+
assert_equal("\x80\x01", [128].pack("r"))
1006+
assert_equal("\x81\x7f", [-127].pack("r"))
1007+
assert_equal("\x80\x7f", [-128].pack("r"))
1008+
1009+
# Larger positive numbers
1010+
assert_equal("\xff\xff\x00", [0x3fff].pack("r"))
1011+
assert_equal("\x80\x80\x01", [0x4000].pack("r"))
1012+
1013+
# Larger negative numbers
1014+
assert_equal("\x81\x80\x7f", [-0x3fff].pack("r"))
1015+
assert_equal("\x80\x80\x7f", [-0x4000].pack("r"))
1016+
1017+
# Very large numbers
1018+
assert_equal("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x1F", [0xffff_ffff_ffff_ffff_ffff_ffff].pack("r"))
1019+
assert_equal("\x81\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80`", [-0xffff_ffff_ffff_ffff_ffff_ffff].pack("r"))
1020+
1021+
# Multiple values
1022+
assert_equal("\x00\x01\x7f", [0, 1, -1].pack("r*"))
1023+
1024+
# Unpack tests
1025+
assert_equal([0], "\x00".unpack("r"))
1026+
assert_equal([1], "\x01".unpack("r"))
1027+
assert_equal([-1], "\x7f".unpack("r"))
1028+
assert_equal([-2], "\x7e".unpack("r"))
1029+
assert_equal([127], "\xff\x00".unpack("r"))
1030+
assert_equal([128], "\x80\x01".unpack("r"))
1031+
assert_equal([-127], "\x81\x7f".unpack("r"))
1032+
assert_equal([-128], "\x80\x7f".unpack("r"))
1033+
1034+
# Larger numbers
1035+
assert_equal([0x3fff], "\xff\xff\x00".unpack("r"))
1036+
assert_equal([0x4000], "\x80\x80\x01".unpack("r"))
1037+
assert_equal([-0x3fff], "\x81\x80\x7f".unpack("r"))
1038+
assert_equal([-0x4000], "\x80\x80\x7f".unpack("r"))
1039+
1040+
# Very large numbers
1041+
assert_equal("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x1f", [0xffff_ffff_ffff_ffff_ffff_ffff].pack("r"))
1042+
assert_equal("\x81\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80`", [-0xffff_ffff_ffff_ffff_ffff_ffff].pack("r"))
1043+
1044+
# Multiple values
1045+
assert_equal([0, 1, -1], "\x00\x01\x7f".unpack("r*"))
1046+
1047+
# Round-trip test
1048+
values = [0, 1, -1, 127, -127, 128, -128, 0x3fff, -0x3fff, 0x4000, -0x4000]
1049+
assert_equal(values, values.pack("r*").unpack("r*"))
1050+
end
9391051
end

0 commit comments

Comments
 (0)