Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bin 8 bit support for Ruby ASCII-8BIT data type #45

Merged
merged 5 commits into from
May 18, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
*.bundle
*.gem
*.class
doc
.yardoc
.bundle
Gemfile*
pkg
Expand Down
47 changes: 46 additions & 1 deletion ext/msgpack/packer.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,10 @@ static inline void msgpack_packer_write_raw_header(msgpack_packer_t* pk, unsigne
msgpack_buffer_ensure_writable(PACKER_BUFFER_(pk), 1);
unsigned char h = 0xa0 | (uint8_t) n;
msgpack_buffer_write_1(PACKER_BUFFER_(pk), h);
} else if(n < 256) {
msgpack_buffer_ensure_writable(PACKER_BUFFER_(pk), 2);
unsigned char be = (uint8_t) n;
msgpack_buffer_write_byte_and_data(PACKER_BUFFER_(pk), 0xd9, (const void*)&be, 1);
} else if(n < 65536) {
msgpack_buffer_ensure_writable(PACKER_BUFFER_(pk), 3);
uint16_t be = _msgpack_be16(n);
Expand All @@ -282,6 +286,23 @@ static inline void msgpack_packer_write_raw_header(msgpack_packer_t* pk, unsigne
}
}

static inline void msgpack_packer_write_bin_header(msgpack_packer_t* pk, unsigned int n)
{
if(n < 256) {
msgpack_buffer_ensure_writable(PACKER_BUFFER_(pk), 1);
unsigned char be = (uint8_t) n;
msgpack_buffer_write_byte_and_data(PACKER_BUFFER_(pk), 0xc4, (const void*)&be, 1);
} else if(n < 65536) {
msgpack_buffer_ensure_writable(PACKER_BUFFER_(pk), 3);
uint16_t be = _msgpack_be16(n);
msgpack_buffer_write_byte_and_data(PACKER_BUFFER_(pk), 0xc5, (const void*)&be, 2);
} else {
msgpack_buffer_ensure_writable(PACKER_BUFFER_(pk), 5);
uint32_t be = _msgpack_be32(n);
msgpack_buffer_write_byte_and_data(PACKER_BUFFER_(pk), 0xc6, (const void*)&be, 4);
}
}

static inline void msgpack_packer_write_array_header(msgpack_packer_t* pk, unsigned int n)
{
if(n < 16) {
Expand Down Expand Up @@ -316,6 +337,25 @@ static inline void msgpack_packer_write_map_header(msgpack_packer_t* pk, unsigne
}
}

static inline char *object_id_string(VALUE object)
{
VALUE object_id = rb_funcall(object, rb_intern("object_id"), 0);
VALUE object_id_str = rb_funcall(object_id, rb_intern("to_s"), 0);
return StringValuePtr(object_id_str);
}

static inline bool is_byte_array(VALUE string)
{
VALUE ascii_8bit = rb_eval_string("Encoding::ASCII_8BIT");
VALUE string_encoding = rb_funcall(string, rb_intern("encoding"), 0);

if (strcmp(object_id_string(ascii_8bit), object_id_string(string_encoding)) == 0) {
return true;
} else {
return false;
}
}


void _msgpack_packer_write_string_to_io(msgpack_packer_t* pk, VALUE string);

Expand All @@ -328,7 +368,12 @@ static inline void msgpack_packer_write_string_value(msgpack_packer_t* pk, VALUE
// TODO rb_eArgError?
rb_raise(rb_eArgError, "size of string is too long to pack: %lu bytes should be <= %lu", len, 0xffffffffUL);
}
msgpack_packer_write_raw_header(pk, (unsigned int)len);

if(is_byte_array(v)) {
msgpack_packer_write_bin_header(pk, (unsigned int)len);
} else {
msgpack_packer_write_raw_header(pk, (unsigned int)len);
}
msgpack_buffer_append_string(PACKER_BUFFER_(pk), v);
}

Expand Down
28 changes: 21 additions & 7 deletions ext/msgpack/unpacker.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ static msgpack_rmem_t s_stack_rmem;

#ifdef COMPAT_HAVE_ENCODING /* see compat.h*/
static int s_enc_utf8;
static int s_enc_ascii_8bit;
#endif

void msgpack_unpacker_static_init()
Expand All @@ -40,6 +41,7 @@ void msgpack_unpacker_static_init()

#ifdef COMPAT_HAVE_ENCODING
s_enc_utf8 = rb_utf8_encindex();
s_enc_ascii_8bit = rb_ascii8bit_encindex();
#endif
}

Expand Down Expand Up @@ -150,12 +152,20 @@ static inline int object_complete(msgpack_unpacker_t* uk, VALUE object)
static inline int object_complete_string(msgpack_unpacker_t* uk, VALUE str)
{
#ifdef COMPAT_HAVE_ENCODING
// TODO ruby 2.0 has String#b method
ENCODING_SET(str, s_enc_utf8);
#endif
return object_complete(uk, str);
}

static inline int object_complete_byte_array(msgpack_unpacker_t* uk, VALUE str)
{
#ifdef COMPAT_HAVE_ENCODING
// TODO ruby 2.0 has String#b method
ENCODING_SET(str, s_enc_ascii_8bit);
#endif
return object_complete(uk, str);
}

/* stack funcs */
static inline msgpack_unpacker_stack_t* _msgpack_unpacker_stack_top(msgpack_unpacker_t* uk)
{
Expand Down Expand Up @@ -258,7 +268,11 @@ static inline int read_raw_body_begin(msgpack_unpacker_t* uk, bool str)
* because rb_hash_aset freezes keys and it causes copying */
bool will_freeze = is_reading_map_key(uk);
VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze);
object_complete_string(uk, string);
if(str == true) {
object_complete_string(uk, string);
} else {
object_complete_byte_array(uk, string);
}
if(will_freeze) {
rb_obj_freeze(string);
}
Expand Down Expand Up @@ -427,7 +441,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
return read_raw_body_begin(uk, true);
}

case 0xdb: // raw 32 / str 16
case 0xdb: // raw 32 / str 32
{
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4);
uint32_t count = _msgpack_be32(cb->u32);
Expand All @@ -444,7 +458,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1);
uint8_t count = cb->u8;
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete_byte_array(uk, rb_str_buf_new(0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -456,7 +470,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2);
uint16_t count = _msgpack_be16(cb->u16);
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete_byte_array(uk, rb_str_buf_new(0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand All @@ -468,7 +482,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4);
uint32_t count = _msgpack_be32(cb->u32);
if(count == 0) {
return object_complete_string(uk, rb_str_buf_new(0));
return object_complete_byte_array(uk, rb_str_buf_new(0));
}
/* read_raw_body_begin sets uk->reading_raw */
uk->reading_raw_remaining = count;
Expand Down Expand Up @@ -581,7 +595,7 @@ int msgpack_unpacker_read_map_header(msgpack_unpacker_t* uk, uint32_t* result_si
return 0;
}

int msgpack_unpacker_read(msgpack_unpacker_t* uk, size_t target_stack_depth)
int msgpack_unpacker_read(msgpack_unpacker_t* uk, size_t target_stack_depth)
{
while(true) {
int r = read_primitive(uk);
Expand Down
35 changes: 33 additions & 2 deletions spec/format_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,11 @@
check_raw 1, (1<<5)-1
end

it "raw 8" do
check_raw 2, (1<<8)-1
end

it "raw 16" do
check_raw 3, (1<<5)
check_raw 3, (1<<16)-1
end

Expand All @@ -93,6 +96,26 @@
#check_raw 5, (1<<32)-1 # memory error
end

it "str encoding is UTF_8" do
pack_unpack('string'.force_encoding(Encoding::UTF_8)).encoding.should == Encoding::UTF_8
end

it "bin 8" do
check_bin 2, (1<<8)-1
end

it "bin 16" do
check_bin 3, (1<<16)-1
end

it "bin 32" do
check_bin 5, (1<<16)
end

it "bin encoding is ASCII_8BIT" do
pack_unpack('string'.force_encoding(Encoding::ASCII_8BIT)).encoding.should == Encoding::ASCII_8BIT
end

it "fixarray" do
check_array 1, 0
check_array 1, (1<<4)-1
Expand Down Expand Up @@ -210,7 +233,11 @@ def check(len, obj)
end

def check_raw(overhead, num)
check num+overhead, " "*num
check num+overhead, (" "*num).force_encoding(Encoding::UTF_8)
end

def check_bin(overhead, num)
check num+overhead, (" "*num).force_encoding(Encoding::ASCII_8BIT)
end

def check_array(overhead, num)
Expand All @@ -221,5 +248,9 @@ def match(obj, buf)
raw = obj.to_msgpack.to_s
raw.should == buf
end

def pack_unpack(obj)
MessagePack.unpack(obj.to_msgpack)
end
end

6 changes: 6 additions & 0 deletions spec/unpacker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -262,36 +262,42 @@

it "msgpack str 8 type" do
MessagePack.unpack([0xd9, 0x00].pack('C*')).should == ""
MessagePack.unpack([0xd9, 0x00].pack('C*')).encoding.should == Encoding::UTF_8
MessagePack.unpack([0xd9, 0x01].pack('C*') + 'a').should == "a"
MessagePack.unpack([0xd9, 0x02].pack('C*') + 'aa').should == "aa"
end

it "msgpack str 16 type" do
MessagePack.unpack([0xda, 0x00, 0x00].pack('C*')).should == ""
MessagePack.unpack([0xda, 0x00, 0x00].pack('C*')).encoding.should == Encoding::UTF_8
MessagePack.unpack([0xda, 0x00, 0x01].pack('C*') + 'a').should == "a"
MessagePack.unpack([0xda, 0x00, 0x02].pack('C*') + 'aa').should == "aa"
end

it "msgpack str 32 type" do
MessagePack.unpack([0xdb, 0x00, 0x00, 0x00, 0x00].pack('C*')).should == ""
MessagePack.unpack([0xdb, 0x00, 0x00, 0x00, 0x00].pack('C*')).encoding.should == Encoding::UTF_8
MessagePack.unpack([0xdb, 0x00, 0x00, 0x00, 0x01].pack('C*') + 'a').should == "a"
MessagePack.unpack([0xdb, 0x00, 0x00, 0x00, 0x02].pack('C*') + 'aa').should == "aa"
end

it "msgpack bin 8 type" do
MessagePack.unpack([0xc4, 0x00].pack('C*')).should == ""
MessagePack.unpack([0xc4, 0x00].pack('C*')).encoding.should == Encoding::ASCII_8BIT
MessagePack.unpack([0xc4, 0x01].pack('C*') + 'a').should == "a"
MessagePack.unpack([0xc4, 0x02].pack('C*') + 'aa').should == "aa"
end

it "msgpack bin 16 type" do
MessagePack.unpack([0xc5, 0x00, 0x00].pack('C*')).should == ""
MessagePack.unpack([0xc5, 0x00, 0x00].pack('C*')).encoding.should == Encoding::ASCII_8BIT
MessagePack.unpack([0xc5, 0x00, 0x01].pack('C*') + 'a').should == "a"
MessagePack.unpack([0xc5, 0x00, 0x02].pack('C*') + 'aa').should == "aa"
end

it "msgpack bin 32 type" do
MessagePack.unpack([0xc6, 0x00, 0x00, 0x00, 0x00].pack('C*')).should == ""
MessagePack.unpack([0xc6, 0x0, 0x00, 0x00, 0x000].pack('C*')).encoding.should == Encoding::ASCII_8BIT
MessagePack.unpack([0xc6, 0x00, 0x00, 0x00, 0x01].pack('C*') + 'a').should == "a"
MessagePack.unpack([0xc6, 0x00, 0x00, 0x00, 0x02].pack('C*') + 'aa').should == "aa"
end
Expand Down