From 2bea8e114242a22538224b5bdbd1b933727e1155 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Sat, 12 Sep 2015 11:51:23 +0200 Subject: [PATCH 01/29] Allocate a native byte buffer --- ext/bson/extconf.rb | 5 + ext/bson/native.c | 731 +--------------------------------- lib/bson.rb | 1 + lib/bson/byte_buffer.rb | 4 + spec/bson/byte_buffer_spec.rb | 15 + 5 files changed, 44 insertions(+), 712 deletions(-) create mode 100644 lib/bson/byte_buffer.rb create mode 100644 spec/bson/byte_buffer_spec.rb diff --git a/ext/bson/extconf.rb b/ext/bson/extconf.rb index 697ba8458..d49500657 100644 --- a/ext/bson/extconf.rb +++ b/ext/bson/extconf.rb @@ -1,3 +1,8 @@ require "mkmf" $CFLAGS << " -Wall -g -std=c99" + +HEADER_DIRS = [ '/usr/local/include/libbson-1.0' ] +LIB_DIRS = [ '/usr/local/lib' ] + +dir_config('bson', HEADER_DIRS, LIB_DIRS) create_makefile("native") diff --git a/ext/bson/native.c b/ext/bson/native.c index 1b60365c3..d18e8408f 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -13,733 +13,40 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifdef _WIN32 -#include -#else -#include -#include -#endif - -#include -#include -#include #include +#include -/** - * For 64 byte systems we convert to longs, for 32 byte systems we convert - * to a long long. - * - * @since 2.0.0 - */ -#if SIZEOF_LONG == 8 -#define NUM2INT64(v) NUM2LONG(v) -#define INT642NUM(v) LONG2NUM(v) -#else -#define NUM2INT64(v) NUM2LL(v) -#define INT642NUM(v) LL2NUM(v) -#endif - -/** - * Ruby 1.8.7 does not define DBL2NUM, so we define it if it's not there. - * - * @since 2.0.0 - */ -#ifndef DBL2NUM -#define DBL2NUM(dbl) rb_float_new(dbl) -#endif - -/** - * Define the max hostname hash length constant if nonexistant. - * - * @since 3.2.0 - */ -#ifndef HOST_NAME_HASH_MAX -#define HOST_NAME_HASH_MAX 256 -#endif - -/** - * Define index sizes for array serialization. - * - * @since 2.0.0 - */ -#define BSON_INDEX_SIZE 1024 -#define BSON_INDEX_CHAR_SIZE 5 -#define INTEGER_CHAR_SIZE 22 - -/** - * Constant for the intetger array indexes. - * - * @since 2.0.0 - */ -static char rb_bson_array_indexes[BSON_INDEX_SIZE][BSON_INDEX_CHAR_SIZE]; - -/** - * BSON::UTF8 - * - * @since 2.0.0 - */ -static VALUE rb_bson_utf8_string; - -/** - * Set the UTC string method for reference at load. - * - * @since 2.0.0 - */ -static VALUE rb_utc_method; - -#include - -#if __BYTE_ORDER == __BIG_ENDIAN - typedef union doublebyte -{ - double d; - unsigned char b[sizeof(double)]; -} doublebytet; -#endif - -/** - * Convert the binary string to a ruby utf8 string. - * - * @example Convert the string to binary. - * rb_bson_from_bson_string("test"); - * - * @param [ String ] string The ruby string. - * - * @return [ String ] The encoded string. - * - * @since 2.0.0 - */ -static VALUE rb_bson_from_bson_string(VALUE string) -{ - return rb_enc_associate(string, rb_utf8_encoding()); -} +static VALUE rb_bson_byte_buffer_allocate(VALUE klass); -/** - * Provide default new string with binary encoding. - * - * @example Check encoded and provide default new binary encoded string. - * if (NIL_P(encoded)) encoded = rb_str_new_encoded_binary(); - * - * @return [ String ] The new string with binary encoding. - * - * @since 2.0.0 - */ -static VALUE rb_str_new_encoded_binary(void) -{ - return rb_enc_str_new("", 0, rb_ascii8bit_encoding()); -} +static size_t rb_bson_byte_buffer_memsize(const void *ptr); -/** - * Constant for a null byte. - * - * @since 2.0.0 - */ -static const char rb_bson_null_byte = 0; - -/** - * Constant for a true byte. - * - * @since 2.0.0 - */ -static const char rb_bson_true_byte = 1; +static const rb_data_type_t rb_bson_data_type = { + "bson/byte_buffer", + { NULL, RUBY_DEFAULT_FREE, rb_bson_byte_buffer_memsize } +}; -/** - * Holds the machine id hash for object id generation. - * - * @since 3.2.0 - * - */ -static char rb_bson_machine_id_hash[HOST_NAME_HASH_MAX]; - -/** - * The counter for incrementing object ids. - * - * @since 2.0.0 - */ -static unsigned int rb_bson_object_id_counter = 0; - -/** - * Take the provided params and return the encoded bytes or a default one. - * - * @example Get the default encoded bytes. - * rb_get_default_encoded(1, bytes); - * - * @param [ int ] argc The number of arguments. - * @param [ Object ] argv The arguments. - * - * @return [ String ] The encoded string. - * - * @since 2.0.0 - */ -static VALUE rb_get_default_encoded(int argc, VALUE *argv) -{ - VALUE encoded; - rb_scan_args(argc, argv, "01", &encoded); - if (NIL_P(encoded)) encoded = rb_str_new_encoded_binary(); - return encoded; -} - -/** - * Append the ruby float as 8-byte double value to buffer. - * - * @example Convert float to double and append. - * rb_float_to_bson(..., 1.2311); - * - * @param [ String] encoded Optional string buffer, default provided by rb_str_encoded_binary - * @param [ Float ] self The ruby float value. - * - * @return [ String ] The encoded bytes with double value appended. - * - * @since 2.0.0 - */ -static VALUE rb_float_to_bson(int argc, VALUE *argv, VALUE self) -{ - const double v = NUM2DBL(self); - VALUE encoded = rb_get_default_encoded(argc, argv); - # if __BYTE_ORDER == __LITTLE_ENDIAN - rb_str_cat(encoded, (char*) &v, 8); - #elif __BYTE_ORDER == __BIG_ENDIAN - doublebytet swap; - unsigned char b; - swap.d = v; - for (int i=0; i < sizeof(double)/2; i++) { - b=swap.b[i]; - swap.b[i] = swap.b[((sizeof(double)-1)-i)]; - swap.b[((sizeof(double)-1)-i)]=b; - } - rb_str_cat(encoded, (char*)&swap.d, 8); - #endif - return encoded; -} - -/** - * Convert the bytes for the double into a Ruby float. - * - * @example Convert the bytes to a float. - * rb_float_from_bson_double(class, bytes); - * - * @param [ Class ] The float class. - * @param [ String ] The double bytes. - * - * @return [ Float ] The ruby float value. - * - * @since 2.0.0 - */ -static VALUE rb_float_from_bson_double(VALUE self, VALUE value) -{ - const char * bytes; - double v; - bytes = StringValuePtr(value); -#if __BYTE_ORDER == __LITTLE_ENDIAN - memcpy(&v, bytes, RSTRING_LEN(value)); -#else - doublebytet swap; - unsigned char b; - memcpy(&swap.d, bytes, RSTRING_LEN(value)); - for (int i=0; i < sizeof(double)/2; i++) { - b=swap.b[i]; - swap.b[i] = swap.b[((sizeof(double)-1)-i)]; - swap.b[((sizeof(double)-1)-i)]=b; - } - memcpy(&v, swap.b, RSTRING_LEN(value)); -#endif - - return DBL2NUM(v); -} - -/** - * Generate the data for the next object id. - * - * @example Generate the data for the next object id. - * rb_object_id_generator_next(0, NULL, object_id); - * - * @param [ int ] argc The argument count. - * @param [ Time ] time The optional Ruby time. - * @param [ BSON::ObjectId ] self The object id. - * - * @return [ String ] The raw bytes for the id. - * - * @since 2.0.0 - */ -static VALUE rb_object_id_generator_next(int argc, VALUE* args, VALUE self) -{ - char bytes[12]; - unsigned long t; - unsigned short pid = htons(getpid()); - - if (argc == 0 || (argc == 1 && *args == Qnil)) { - t = htonl((int) time(NULL)); - } - else { - t = htonl(NUM2UINT(rb_funcall(*args, rb_intern("to_i"), 0))); - } - - unsigned long c; - c = htonl(rb_bson_object_id_counter << 8); - -# if __BYTE_ORDER == __LITTLE_ENDIAN - memcpy(&bytes, &t, 4); - memcpy(&bytes[4], rb_bson_machine_id_hash, 3); - memcpy(&bytes[7], &pid, 2); - memcpy(&bytes[9], (unsigned char*) &c, 3); -#elif __BYTE_ORDER == __BIG_ENDIAN - memcpy(&bytes, ((unsigned char*) &t) + 4, 4); - memcpy(&bytes[4], rb_bson_machine_id_hash, 3); - memcpy(&bytes[7], &pid, 2); - memcpy(&bytes[9], ((unsigned char*) &c) + 4, 3); -#endif - rb_bson_object_id_counter++; - return rb_str_new(bytes, 12); -} - -/** - * Check if the integer is a 32 bit integer. - * - * @example Check if the integer is 32 bit. - * rb_integer_is_bson_int32(integer); - * - * @param [ Integer ] self The ruby integer. - * - * @return [ true, false ] If the integer is 32 bit. - * - * @since 2.0.0 - */ -static VALUE rb_integer_is_bson_int32(VALUE self) -{ - const int64_t v = NUM2INT64(self); - if (INT_MIN <= v && v <= INT_MAX) { - return Qtrue; - } - else { - return Qfalse; - } -} - -/** - * Convert the Ruby integer into a BSON as per the 32 bit specification, - * which is 4 bytes. - * - * @example Convert the integer to 32bit BSON. - * rb_integer_to_bson_int32(128, encoded); - * - * @param [ Integer ] self The Ruby integer. - * @param [ String ] encoded The Ruby binary string to append to. - * - * @return [ String ] encoded Ruby binary string with BSON raw bytes appended. - * - * @since 2.0.0 - */ -static VALUE rb_integer_to_bson_int32(VALUE self, VALUE encoded) -{ - const int32_t v = NUM2INT(self); - const char bytes[4] = { - v & 255, - (v >> 8) & 255, - (v >> 16) & 255, - (v >> 24) & 255 - }; - return rb_str_cat(encoded, bytes, 4); -} - -/** - * Initialize the bson array index for integers. - * - * @example Initialize the array. - * rb_bson_init_integer_bson_array_indexes(); - * - * @since 2.0.0 - */ -static void rb_bson_init_integer_bson_array_indexes(void) -{ - int i; - for (i = 0; i < BSON_INDEX_SIZE; i++) { - snprintf(rb_bson_array_indexes[i], BSON_INDEX_CHAR_SIZE, "%d", i); - } -} - -/** - * Convert the Ruby integer into a character string and append with nullchar to encoded BSON. - * - * @example Convert the integer to string and append with nullchar. - * rb_integer_to_bson_key(128, encoded); - * - * @param [ Integer ] self The Ruby integer. - * @param [ String ] encoded The Ruby binary string to append to. - * - * @return [ String ] encoded Ruby binary string with BSON raw bytes appended. - * - * @since 2.0.0 - */ -static VALUE rb_integer_to_bson_key(int argc, VALUE *argv, VALUE self) -{ - char bytes[INTEGER_CHAR_SIZE]; - const int64_t v = NUM2INT64(self); - VALUE encoded = rb_get_default_encoded(argc, argv); - int length; - if (v < BSON_INDEX_SIZE) - return rb_str_cat(encoded, rb_bson_array_indexes[v], strlen(rb_bson_array_indexes[v]) + 1); - length = snprintf(bytes, INTEGER_CHAR_SIZE, "%ld", (long)v); - return rb_str_cat(encoded, bytes, length + 1); -} - -/** - * Convert the provided raw bytes into a 32bit Ruby integer. - * - * @example Convert the bytes to an Integer. - * rb_integer_from_bson_int32(Int32, bytes); - * - * @param [ BSON::Int32 ] self The Int32 eigenclass. - * @param [ String ] bytes The raw bytes. - * - * @return [ Integer ] The Ruby integer. - * - * @since 2.0.0 - */ -static VALUE rb_integer_from_bson_int32(VALUE self, VALUE bson) -{ - const uint8_t *v = (const uint8_t*) StringValuePtr(bson); - const int32_t integer = v[0] + (v[1] << 8) + (v[2] << 16) + (v[3] << 24); - return INT2NUM(integer); -} - -/** - * Convert the raw BSON bytes into an int64_t type. - * - * @example Convert the bytes into an int64_t. - * rb_bson_to_int64_t(bson); - * - * @param [ String ] bson The raw bytes. - * - * @return [ int64_t ] The int64_t. - * - * @since 2.0.0 - */ -static int64_t rb_bson_to_int64_t(VALUE bson) -{ - uint8_t *v; - uint32_t byte_0, byte_1; - int64_t byte_2, byte_3; - int64_t lower, upper; - v = (uint8_t*) StringValuePtr(bson); - byte_0 = v[0]; - byte_1 = v[1]; - byte_2 = v[2]; - byte_3 = v[3]; - lower = byte_0 + (byte_1 << 8) + (byte_2 << 16) + (byte_3 << 24); - byte_0 = v[4]; - byte_1 = v[5]; - byte_2 = v[6]; - byte_3 = v[7]; - upper = byte_0 + (byte_1 << 8) + (byte_2 << 16) + (byte_3 << 24); - return lower + (upper << 32); -} - -/** - * Convert the provided raw bytes into a 64bit Ruby integer. - * - * @example Convert the bytes to an Integer. - * rb_integer_from_bson_int64(Int64, bytes); - * - * @param [ BSON::Int64 ] self The Int64 eigenclass. - * @param [ String ] bytes The raw bytes. - * - * @return [ Integer ] The Ruby integer. - * - * @since 2.0.0 - */ -static VALUE rb_integer_from_bson_int64(VALUE self, VALUE bson) -{ - return INT642NUM(rb_bson_to_int64_t(bson)); -} - -/** - * Append the 64-bit integer to encoded BSON Ruby binary string. - * - * @example Append the 64-bit integer to encoded BSON. - * int64_t_to_bson(128, encoded); - * - * @param [ int64_t ] self The 64-bit integer. - * @param [ String ] encoded The BSON Ruby binary string to append to. - * - * @return [ String ] encoded Ruby binary string with BSON raw bytes appended. - * - * @since 2.0.0 - */ -static VALUE int64_t_to_bson(int64_t v, VALUE encoded) -{ - const char bytes[8] = { - v & 255, - (v >> 8) & 255, - (v >> 16) & 255, - (v >> 24) & 255, - (v >> 32) & 255, - (v >> 40) & 255, - (v >> 48) & 255, - (v >> 56) & 255 - }; - return rb_str_cat(encoded, bytes, 8); -} - -/** - * Convert the Ruby integer into a BSON as per the 64 bit specification, - * which is 8 bytes. - * - * @example Convert the integer to 64bit BSON. - * rb_integer_to_bson_int64(128, encoded); - * - * @param [ Integer ] self The Ruby integer. - * @param [ String ] encoded The Ruby binary string to append to. - * - * @return [ String ] encoded Ruby binary string with BSON raw bytes appended. - * - * @since 2.0.0 - */ -static VALUE rb_integer_to_bson_int64(VALUE self, VALUE encoded) -{ - return int64_t_to_bson(NUM2INT64(self), StringValue(encoded)); -} - -/** - * Converts the milliseconds time to the raw BSON bytes. We need to - * explicitly convert using 64 bit here. - * - * @example Convert the milliseconds value to BSON bytes. - * rb_time_to_bson(time, 2124132340000, encoded); - * - * @param [ Time ] self The Ruby Time object. - * @param [ Integer ] milliseconds The milliseconds pre/post epoch. - * @param [ String ] encoded The Ruby binary string to append to. - * - * @return [ String ] encoded Ruby binary string with time BSON raw bytes appended. - * - * @since 2.0.0 - */ -static VALUE rb_time_to_bson(int argc, VALUE *argv, VALUE self) -{ - int64_t t = NUM2INT64(rb_funcall(self, rb_intern("to_i"), 0)); - int64_t milliseconds = (int64_t)(t * 1000); - int32_t micro = NUM2INT(rb_funcall(self, rb_intern("usec"), 0)); - int64_t time = milliseconds + (micro / 1000); - VALUE encoded = rb_get_default_encoded(argc, argv); - return int64_t_to_bson(time, encoded); -} - -/** - * Converts the raw BSON bytes into a UTC Ruby time. - * - * @example Convert the bytes to a Ruby time. - * rb_time_from_bson(time, bytes); - * - * @param [ Class ] self The Ruby Time class. - * @param [ String ] bytes The raw BSON bytes. - * - * @return [ Time ] The UTC time. - * - * @since 2.0.0 - */ -static VALUE rb_time_from_bson(VALUE self, VALUE bytes) -{ - const int64_t millis = rb_bson_to_int64_t(bytes); - const VALUE time = rb_time_new(millis / 1000, (millis % 1000) * 1000); - return rb_funcall(time, rb_utc_method, 0); -} - -/** - * Set four bytes for int32 in a binary string and return it. - * - * @example Set int32 in a BSON string. - * rb_string_set_int32(self, pos, int32) - * - * @param [ String ] self The Ruby binary string. - * @param [ Fixnum ] The position to set. - * @param [ Fixnum ] The int32 value. - * - * @return [ String ] The binary string. - * - * @since 2.0.0 - */ -static VALUE rb_string_set_int32(VALUE str, VALUE pos, VALUE an_int32) -{ - const int32_t offset = NUM2INT(pos); - const int32_t v = NUM2INT(an_int32); - const char bytes[4] = { - v & 255, - (v >> 8) & 255, - (v >> 16) & 255, - (v >> 24) & 255 - }; - rb_str_modify(str); - if (offset < 0 || offset + 4 > RSTRING_LEN(str)) { - rb_raise(rb_eArgError, "invalid position"); - } - memcpy(RSTRING_PTR(str) + offset, bytes, 4); - return str; -} - -/** - * Check for illegal characters in string. - * - * @example Check for illegal characters. - * rb_string_check_for_illegal_characters("test"); - * - * @param [ String ] self The string value. - * - * @since 2.0.0 - */ -static VALUE rb_string_check_for_illegal_characters(VALUE self) -{ - if (strlen(RSTRING_PTR(self)) != (size_t) RSTRING_LEN(self)) - rb_raise(rb_eArgError, "Illegal C-String contains a null byte."); - return self; -} - -/** - * Encode a false value to bson. - * - * @example Encode the false value. - * rb_false_class_to_bson(0, false); - * - * @param [ int ] argc The number or arguments. - * @param [ Array ] argv The arguments. - * @param [ TrueClass ] self The true value. - * - * @return [ String ] The encoded string. - * - * @since 2.0.0 - */ -static VALUE rb_false_class_to_bson(int argc, VALUE *argv, VALUE self) +void Init_native() { - VALUE encoded = rb_get_default_encoded(argc, argv); - rb_str_cat(encoded, &rb_bson_null_byte, 1); - return encoded; -} + VALUE bson_module = rb_define_module("BSON"); + VALUE byte_buffer_class = rb_define_class_under(bson_module, "ByteBuffer", rb_cObject); -/** - * Encode a true value to bson. - * - * @example Encode the true value. - * rb_true_class_to_bson(0, true); - * - * @param [ int ] argc The number or arguments. - * @param [ Array ] argv The arguments. - * @param [ TrueClass ] self The true value. - * - * @return [ String ] The encoded string. - * - * @since 2.0.0 - */ -static VALUE rb_true_class_to_bson(int argc, VALUE *argv, VALUE self) -{ - VALUE encoded = rb_get_default_encoded(argc, argv); - rb_str_cat(encoded, &rb_bson_true_byte, 1); - return encoded; + rb_define_alloc_func(byte_buffer_class, rb_bson_byte_buffer_allocate); } /** - * Decode a string from bson. - * - * @example Decode a string. - * rb_bson_string_from_bson(string, io); - * - * @param [ String ] self The string class. - * @param [ IO ] bson The io stream of BSON. - * - * @return [ String ] The decoded string. - * - * @since 3.2.5 + * Allocates a bson byte buffer that wraps a bson_t into memory. */ -static VALUE rb_bson_string_from_bson(VALUE self, VALUE bson) +VALUE rb_bson_byte_buffer_allocate(VALUE klass) { - ID read_method = rb_intern("read"); - VALUE int_bytes = rb_funcall(bson, read_method, 1, 4); - VALUE size = rb_integer_from_bson_int32(self, int_bytes); - VALUE string_bytes = rb_funcall(bson, read_method, 1, size - 1); - return rb_bson_from_bson_string(string_bytes); + bson_t *bson; + VALUE obj = TypedData_Make_Struct(klass, bson_t, &rb_bson_data_type, bson); + return obj; } /** - * Initialize the bson c extension. - * - * @since 2.0.0 + * Get the size of the bson_t in memory. */ -void Init_native() +size_t rb_bson_byte_buffer_memsize(const void *ptr) { - // Get all the constants to be used in the extensions. - VALUE bson = rb_const_get(rb_cObject, rb_intern("BSON")); - VALUE integer = rb_const_get(bson, rb_intern("Integer")); - VALUE floats = rb_const_get(bson, rb_intern("Float")); - VALUE float_class = rb_const_get(floats, rb_intern("ClassMethods")); - VALUE time = rb_const_get(bson, rb_intern("Time")); - VALUE time_class = rb_singleton_class(time); - VALUE int32 = rb_const_get(bson, rb_intern("Int32")); - VALUE int32_class = rb_singleton_class(int32); - VALUE int64 = rb_const_get(bson, rb_intern("Int64")); - VALUE int64_class = rb_singleton_class(int64); - VALUE object_id = rb_const_get(bson, rb_intern("ObjectId")); - VALUE generator = rb_const_get(object_id, rb_intern("Generator")); - VALUE string = rb_const_get(bson, rb_intern("String")); - VALUE string_class = rb_singleton_class(string); - VALUE true_class = rb_const_get(bson, rb_intern("TrueClass")); - VALUE false_class = rb_const_get(bson, rb_intern("FalseClass")); - // needed to hash the machine id - rb_require("digest/md5"); - VALUE digest_class = rb_const_get(rb_cObject, rb_intern("Digest")); - VALUE md5_class = rb_const_get(digest_class, rb_intern("MD5")); - rb_bson_utf8_string = rb_const_get(bson, rb_intern("UTF8")); - rb_utc_method = rb_intern("utc"); - - // Get the object id machine id and hash it. - char rb_bson_machine_id[256]; - gethostname(rb_bson_machine_id, sizeof rb_bson_machine_id); - rb_bson_machine_id[255] = '\0'; - VALUE digest = rb_funcall(md5_class, rb_intern("digest"), 1, rb_str_new2(rb_bson_machine_id)); - memcpy(rb_bson_machine_id_hash, RSTRING_PTR(digest), RSTRING_LEN(digest)); - - // Integer optimizations. - rb_undef_method(integer, "to_bson_int32"); - rb_define_method(integer, "to_bson_int32", rb_integer_to_bson_int32, 1); - rb_undef_method(integer, "to_bson_int64"); - rb_define_method(integer, "to_bson_int64", rb_integer_to_bson_int64, 1); - rb_undef_method(integer, "bson_int32?"); - rb_define_method(integer, "bson_int32?", rb_integer_is_bson_int32, 0); - rb_bson_init_integer_bson_array_indexes(); - rb_undef_method(integer, "to_bson_key"); - rb_define_method(integer, "to_bson_key", rb_integer_to_bson_key, -1); - rb_undef_method(int32_class, "from_bson_int32"); - rb_define_private_method(int32_class, "from_bson_int32", rb_integer_from_bson_int32, 1); - rb_undef_method(int64_class, "from_bson_int64"); - rb_define_private_method(int64_class, "from_bson_int64", rb_integer_from_bson_int64, 1); - - // Float optimizations. - rb_undef_method(floats, "to_bson"); - rb_define_method(floats, "to_bson", rb_float_to_bson, -1); - rb_undef_method(float_class, "from_bson_double"); - rb_define_private_method(float_class, "from_bson_double", rb_float_from_bson_double, 1); - - // Boolean optimizations - deserialization has no benefit so we provide - // no extensions there. - rb_undef_method(true_class, "to_bson"); - rb_define_method(true_class, "to_bson", rb_true_class_to_bson, -1); - rb_undef_method(false_class, "to_bson"); - rb_define_method(false_class, "to_bson", rb_false_class_to_bson, -1); - - // Optimizations around time serialization and deserialization. - rb_undef_method(time, "to_bson"); - rb_define_method(time, "to_bson", rb_time_to_bson, -1); - rb_undef_method(time_class, "from_bson"); - rb_define_method(time_class, "from_bson", rb_time_from_bson, 1); - - // String optimizations. - rb_undef_method(string, "set_int32"); - rb_define_method(string, "set_int32", rb_string_set_int32, 2); - rb_undef_method(string, "from_bson_string"); - rb_define_method(string, "from_bson_string", rb_bson_from_bson_string, 0); - rb_undef_method(string_class, "from_bson"); - rb_define_method(string_class, "from_bson", rb_bson_string_from_bson, 1); - rb_undef_method(string, "check_for_illegal_characters!"); - rb_define_private_method(string, "check_for_illegal_characters!", rb_string_check_for_illegal_characters, 0); - - // Redefine the next method on the object id generator. - rb_undef_method(generator, "next_object_id"); - rb_define_method(generator, "next_object_id", rb_object_id_generator_next, -1); + return ptr ? sizeof(bson_t) : 0; } diff --git a/lib/bson.rb b/lib/bson.rb index 4566e62b0..a7e2e95e8 100644 --- a/lib/bson.rb +++ b/lib/bson.rb @@ -59,6 +59,7 @@ def self.ObjectId(string) require "bson/registry" require "bson/specialized" require "bson/json" +require "bson/byte_buffer" require "bson/int32" require "bson/int64" require "bson/integer" diff --git a/lib/bson/byte_buffer.rb b/lib/bson/byte_buffer.rb new file mode 100644 index 000000000..2747de68a --- /dev/null +++ b/lib/bson/byte_buffer.rb @@ -0,0 +1,4 @@ +module BSON + class ByteBuffer + end +end diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb new file mode 100644 index 000000000..968e20a7d --- /dev/null +++ b/spec/bson/byte_buffer_spec.rb @@ -0,0 +1,15 @@ +require 'spec_helper' + +describe BSON::ByteBuffer do + + describe '#allocate' do + + let(:buffer) do + described_class.allocate + end + + it 'allocates a buffer' do + expect(buffer).to be_a(BSON::ByteBuffer) + end + end +end From dbcf6f97f0b34dc7911d86ea5dfa63332dcc3052 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 17 Sep 2015 13:23:21 +0200 Subject: [PATCH 02/29] RUBY-1019: Fist pass document buffer serialization --- ext/bson/extconf.rb | 5 - ext/bson/native.c | 245 +++++++++++++++++++++++++++-- ext/bson/portable_endian.h | 118 ++++++++++++++ lib/bson/array.rb | 17 ++- lib/bson/byte_buffer.rb | 148 ++++++++++++++++++ lib/bson/false_class.rb | 4 +- lib/bson/float.rb | 4 +- lib/bson/hash.rb | 17 ++- lib/bson/int32.rb | 5 + lib/bson/integer.rb | 26 +--- lib/bson/nil_class.rb | 4 +- lib/bson/specialized.rb | 4 +- lib/bson/string.rb | 6 +- lib/bson/symbol.rb | 4 +- lib/bson/true_class.rb | 4 +- lib/bson/undefined.rb | 27 +--- perf/bench.rb | 262 ++++++++++++++++---------------- spec/bson/array_spec.rb | 2 +- spec/bson/byte_buffer_spec.rb | 142 +++++++++++++++++ spec/bson/hash_spec.rb | 10 +- spec/bson/int32_spec.rb | 7 +- spec/support/shared_examples.rb | 6 +- 22 files changed, 826 insertions(+), 241 deletions(-) create mode 100644 ext/bson/portable_endian.h diff --git a/ext/bson/extconf.rb b/ext/bson/extconf.rb index d49500657..697ba8458 100644 --- a/ext/bson/extconf.rb +++ b/ext/bson/extconf.rb @@ -1,8 +1,3 @@ require "mkmf" $CFLAGS << " -Wall -g -std=c99" - -HEADER_DIRS = [ '/usr/local/include/libbson-1.0' ] -LIB_DIRS = [ '/usr/local/lib' ] - -dir_config('bson', HEADER_DIRS, LIB_DIRS) create_makefile("native") diff --git a/ext/bson/native.c b/ext/bson/native.c index d18e8408f..7df0d8ffa 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2013 MongoDB Inc. + * Copyright (C) 2009-2015 MongoDB Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,39 +14,258 @@ * limitations under the License. */ #include -#include +#include "portable_endian.h" + +#define BSON_BYTE_BUFFER_SIZE 512 + +typedef struct { + size_t size; + size_t write_position; + size_t read_position; + char buffer[BSON_BYTE_BUFFER_SIZE]; + char *b_ptr; +} byte_buffer_t; + +#define READ_PTR(byte_buffer_ptr) \ + (byte_buffer_ptr->b_ptr + byte_buffer_ptr->read_position) + +#define READ_SIZE(byte_buffer_ptr) \ + (byte_buffer_ptr->write_position - byte_buffer_ptr->read_position) + +#define WRITE_PTR(byte_buffer_ptr) \ + (byte_buffer_ptr->b_ptr + byte_buffer_ptr->write_position) + +#define ENSURE_BSON_WRITE(buffer_ptr, length) \ + { if (buffer_ptr->write_position + length > buffer_ptr->size) rb_bson_expand_buffer(buffer_ptr, length); } static VALUE rb_bson_byte_buffer_allocate(VALUE klass); +static VALUE rb_bson_byte_buffer_length(VALUE self); +static VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte); +static VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string); +static VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f); +static VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i); +static VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i); +static VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string); +static VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i); +static VALUE rb_bson_byte_buffer_to_s(VALUE self); static size_t rb_bson_byte_buffer_memsize(const void *ptr); +static void rb_bson_byte_buffer_free(void *ptr); +static void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length); -static const rb_data_type_t rb_bson_data_type = { - "bson/byte_buffer", - { NULL, RUBY_DEFAULT_FREE, rb_bson_byte_buffer_memsize } +static const rb_data_type_t rb_byte_buffer_data_type = { + "bson/byte_buffer", + { NULL, rb_bson_byte_buffer_free, rb_bson_byte_buffer_memsize } }; +/** + * Initialize the native extension. + */ void Init_native() { - VALUE bson_module = rb_define_module("BSON"); - VALUE byte_buffer_class = rb_define_class_under(bson_module, "ByteBuffer", rb_cObject); + VALUE rb_bson_module = rb_define_module("BSON"); + VALUE rb_byte_buffer_class = rb_define_class_under(rb_bson_module, "ByteBuffer", rb_cObject); - rb_define_alloc_func(byte_buffer_class, rb_bson_byte_buffer_allocate); + rb_define_alloc_func(rb_byte_buffer_class, rb_bson_byte_buffer_allocate); + rb_define_method(rb_byte_buffer_class, "length", rb_bson_byte_buffer_length, 0); + rb_define_method(rb_byte_buffer_class, "put_byte", rb_bson_byte_buffer_put_byte, 1); + rb_define_method(rb_byte_buffer_class, "put_cstring", rb_bson_byte_buffer_put_cstring, 1); + rb_define_method(rb_byte_buffer_class, "put_double", rb_bson_byte_buffer_put_double, 1); + rb_define_method(rb_byte_buffer_class, "put_int32", rb_bson_byte_buffer_put_int32, 1); + rb_define_method(rb_byte_buffer_class, "put_int64", rb_bson_byte_buffer_put_int64, 1); + rb_define_method(rb_byte_buffer_class, "put_string", rb_bson_byte_buffer_put_string, 1); + rb_define_method(rb_byte_buffer_class, "replace_int32", rb_bson_byte_buffer_replace_int32, 2); + rb_define_method(rb_byte_buffer_class, "to_s", rb_bson_byte_buffer_to_s, 0); } /** - * Allocates a bson byte buffer that wraps a bson_t into memory. + * Allocates a bson byte buffer that wraps a byte_buffer_t. */ VALUE rb_bson_byte_buffer_allocate(VALUE klass) { - bson_t *bson; - VALUE obj = TypedData_Make_Struct(klass, bson_t, &rb_bson_data_type, bson); + byte_buffer_t *b; + VALUE obj = TypedData_Make_Struct(klass, byte_buffer_t, &rb_byte_buffer_data_type, b); + b->b_ptr = b->buffer; + b->size = BSON_BYTE_BUFFER_SIZE; return obj; } /** - * Get the size of the bson_t in memory. + * Get the length of the buffer. + */ +VALUE rb_bson_byte_buffer_length(VALUE self) +{ + byte_buffer_t *b; + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + return UINT2NUM(READ_SIZE(b)); +} + +/** + * Writes a byte to the byte buffer. + */ +VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte) +{ + byte_buffer_t *b; + const char *str = RSTRING_PTR(byte); + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + ENSURE_BSON_WRITE(b, 1); + memcpy(WRITE_PTR(b), str, 1); + b->write_position += 1; + + return self; +} + +/** + * Writes a cstring to the byte buffer. + */ +VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string) +{ + byte_buffer_t *b; + const char *c_str = RSTRING_PTR(string); + const size_t length = RSTRING_LEN(string) + 1; + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + ENSURE_BSON_WRITE(b, length); + memcpy(WRITE_PTR(b), c_str, length); + b->write_position += length; + + return self; +} + +/** + * Writes a 64 bit double to the buffer. + */ +VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f) +{ + byte_buffer_t *b; + union {double d; uint64_t i64;} ucast; + + ucast.d = NUM2DBL(f); + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + ENSURE_BSON_WRITE(b, 8); + ucast.i64 = htole64(ucast.i64); + *(int64_t*)WRITE_PTR(b) = ucast.i64; + b->write_position += 8; + + return self; +} + +/** + * Writes a 32 bit integer to the byte buffer. + */ +VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i) +{ + byte_buffer_t *b; + const int32_t i32 = NUM2INT(i); + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + ENSURE_BSON_WRITE(b, 4); + *((int32_t*)WRITE_PTR(b)) = htole32(i32); + b->write_position += 4; + + return self; +} + +/** + * Writes a 64 bit integer to the byte buffer. + */ +VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i) +{ + byte_buffer_t *b; + const int64_t i64 = NUM2LONG(i); + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + ENSURE_BSON_WRITE(b, 8); + *((int64_t*)WRITE_PTR(b)) = htole64(i64); + b->write_position += 8; + + return self; +} + +/** + * Writes a string to the byte buffer. + */ +VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string) +{ + byte_buffer_t *b; + const char *str = RSTRING_PTR(string); + const size_t length = RSTRING_LEN(string) + 1; + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + ENSURE_BSON_WRITE(b, length + 4); + *((int32_t*)WRITE_PTR(b)) = htole32(length); + b->write_position += 4; + memcpy(WRITE_PTR(b), str, length); + b->write_position += length; + + return self; +} + +/** + * Replace a 32 bit integer int the byte buffer. + */ +VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i) +{ + byte_buffer_t *b; + const int32_t position = NUM2INT(index); + const int32_t i32 = NUM2INT(i); + const char bytes = htole32(i32); + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + + memcpy(READ_PTR(b) + position, &bytes, sizeof(bytes)); + + return self; +} + +/** + * Convert the buffer to a string. + */ +VALUE rb_bson_byte_buffer_to_s(VALUE self) +{ + byte_buffer_t *b; + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + return rb_str_new(READ_PTR(b), READ_SIZE(b)); +} + +/** + * Get the size of the byte_buffer_t in memory. */ size_t rb_bson_byte_buffer_memsize(const void *ptr) { - return ptr ? sizeof(bson_t) : 0; + return ptr ? sizeof(byte_buffer_t) : 0; +} + +/** + * Free the memory for the byte buffer. + */ +void rb_bson_byte_buffer_free(void *ptr) +{ + byte_buffer_t *b = ptr; + if (b->b_ptr != b->buffer) xfree(b->b_ptr); + xfree(b); +} + +/** + * Expand the byte buffer linearly. + */ +void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length) +{ + const size_t required_size = buffer_ptr->write_position - buffer_ptr->read_position + length; + if (required_size <= buffer_ptr->size) { + memmove(buffer_ptr->b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr)); + buffer_ptr->write_position -= buffer_ptr->read_position; + buffer_ptr->read_position = 0; + } else { + char *new_b_ptr; + const size_t new_size = buffer_ptr->size + BSON_BYTE_BUFFER_SIZE; + new_b_ptr = ALLOC_N(char, new_size); + memcpy(new_b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr)); + if (buffer_ptr->b_ptr != buffer_ptr->buffer) xfree(buffer_ptr->b_ptr); + buffer_ptr->b_ptr = new_b_ptr; + buffer_ptr->size = new_size; + buffer_ptr->write_position -= buffer_ptr->read_position; + buffer_ptr->read_position = 0; + } } diff --git a/ext/bson/portable_endian.h b/ext/bson/portable_endian.h new file mode 100644 index 000000000..61f533264 --- /dev/null +++ b/ext/bson/portable_endian.h @@ -0,0 +1,118 @@ +// "License": Public Domain +// I, Mathias PanzenbГ¶ck, place this file hereby into the public domain. Use it at your own risk for whatever you like. +// In case there are jurisdictions that don't support putting things in the public domain you can also consider it to +// be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it +// an example on how to get the endian conversion functions on different platforms. + +#ifndef PORTABLE_ENDIAN_H__ +#define PORTABLE_ENDIAN_H__ + +#if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__) + +# define __WINDOWS__ + +#endif + +#if defined(__linux__) || defined(__CYGWIN__) + +# include + +#elif defined(__APPLE__) + +# include + +# define htobe16(x) OSSwapHostToBigInt16(x) +# define htole16(x) OSSwapHostToLittleInt16(x) +# define be16toh(x) OSSwapBigToHostInt16(x) +# define le16toh(x) OSSwapLittleToHostInt16(x) + +# define htobe32(x) OSSwapHostToBigInt32(x) +# define htole32(x) OSSwapHostToLittleInt32(x) +# define be32toh(x) OSSwapBigToHostInt32(x) +# define le32toh(x) OSSwapLittleToHostInt32(x) + +# define htobe64(x) OSSwapHostToBigInt64(x) +# define htole64(x) OSSwapHostToLittleInt64(x) +# define be64toh(x) OSSwapBigToHostInt64(x) +# define le64toh(x) OSSwapLittleToHostInt64(x) + +# define __BYTE_ORDER BYTE_ORDER +# define __BIG_ENDIAN BIG_ENDIAN +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __PDP_ENDIAN PDP_ENDIAN + +#elif defined(__OpenBSD__) + +# include + +#elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) + +# include + +# define be16toh(x) betoh16(x) +# define le16toh(x) letoh16(x) + +# define be32toh(x) betoh32(x) +# define le32toh(x) letoh32(x) + +# define be64toh(x) betoh64(x) +# define le64toh(x) letoh64(x) + +#elif defined(__WINDOWS__) + +# include +# include + +# if BYTE_ORDER == LITTLE_ENDIAN + +# define htobe16(x) htons(x) +# define htole16(x) (x) +# define be16toh(x) ntohs(x) +# define le16toh(x) (x) + +# define htobe32(x) htonl(x) +# define htole32(x) (x) +# define be32toh(x) ntohl(x) +# define le32toh(x) (x) + +# define htobe64(x) htonll(x) +# define htole64(x) (x) +# define be64toh(x) ntohll(x) +# define le64toh(x) (x) + +# elif BYTE_ORDER == BIG_ENDIAN + + /* that would be xbox 360 */ +# define htobe16(x) (x) +# define htole16(x) __builtin_bswap16(x) +# define be16toh(x) (x) +# define le16toh(x) __builtin_bswap16(x) + +# define htobe32(x) (x) +# define htole32(x) __builtin_bswap32(x) +# define be32toh(x) (x) +# define le32toh(x) __builtin_bswap32(x) + +# define htobe64(x) (x) +# define htole64(x) __builtin_bswap64(x) +# define be64toh(x) (x) +# define le64toh(x) __builtin_bswap64(x) + +# else + +# error byte order not supported + +# endif + +# define __BYTE_ORDER BYTE_ORDER +# define __BIG_ENDIAN BIG_ENDIAN +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __PDP_ENDIAN PDP_ENDIAN + +#else + +# error platform not supported + +#endif + +#endif diff --git a/lib/bson/array.rb b/lib/bson/array.rb index c2b085739..eb46ba96d 100644 --- a/lib/bson/array.rb +++ b/lib/bson/array.rb @@ -21,7 +21,6 @@ module BSON # # @since 2.0.0 module Array - include Encodable # An array is type 0x04 in the BSON spec. # @@ -41,14 +40,16 @@ module Array # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded| - each_with_index do |value, index| - encoded << value.bson_type - index.to_bson_key(encoded) - value.to_bson(encoded) - end + def to_bson(buffer = ByteBuffer.new) + position = buffer.length + buffer.put_int32(0) + each_with_index do |value, index| + buffer.put_byte(value.bson_type) + buffer.put_cstring(index.to_s) + value.to_bson(buffer) end + buffer.put_byte(NULL_BYTE) + buffer.replace_int32(position, buffer.length - position) end # Convert the array to an object id. This will only work for arrays of size diff --git a/lib/bson/byte_buffer.rb b/lib/bson/byte_buffer.rb index 2747de68a..edd0f767f 100644 --- a/lib/bson/byte_buffer.rb +++ b/lib/bson/byte_buffer.rb @@ -1,4 +1,152 @@ +# Copyright (C) 2015 MongoDB Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + module BSON class ByteBuffer + + # Initialize the pure ruby byte buffer. + # + # @example Create the buffer. + # BSON::ByteBuffer.new + # + # @since 4.0.0 + def initialize + @buffer = "".force_encoding(BINARY) + end + + # Get the length of the buffer. + # + # @example Get the length of the buffer. + # buffer.length + # + # @return [ Integer ] The buffer length. + # + # @since 4.0.0 + def length + @buffer.bytesize + end + + # Put a single byte on the end of the buffer. + # + # @example Put a single byte on the buffer. + # buffer.put_byte(4) + # + # @param [ Integer ] value The byte to append. + # + # @return [ ByteBuffer ] The modified buffer. + # + # @since 4.0.0 + def put_byte(value) + @buffer << value + self + end + + # Put a null termintated c string on the end of the buffer. + # + # @example Put a cstring. + # buffer.put_cstring('test') + # + # @param [ String ] value The string. + # + # @return [ ByteBuffer ] The modified buffer. + # + # @since 4.0.0 + def put_cstring(value) + @buffer << value << NULL_BYTE + self + end + + # Put a 64 bit double on the buffer. + # + # @example Put a double. + # buffer.put_double(213.11231) + # + # @param [ Float ] value The float to convert. + # + # @return [ ByteBuffer ] The modified buffer. + # + # @since 4.0.0 + def put_double(value) + @buffer << [ value ].pack(Float::PACK) + self + end + + # Put a 32 bit integer on the end of the buffer. + # + # @example Put a 32 bit integer on the buffer. + # buffer.put_int32(4) + # + # @param [ Integer ] value The integer. + # + # @return [ ByteBuffer ] The modified buffer. + # + # @since 4.0.0 + def put_int32(value) + @buffer << [ value ].pack(Int32::PACK) + self + end + + # Put a 64 bit integer on the end of the buffer. + # + # @example Put a 64 bit integer on the buffer. + # buffer.put_int64(4) + # + # @param [ Integer ] value The integer. + # + # @return [ ByteBuffer ] The modified buffer. + # + # @since 4.0.0 + def put_int64(value) + @buffer << [ value ].pack(Int64::PACK) + self + end + + # Put a string on the end of the buffer. + # + # @example Put a string on the buffer. + # buffer.put_string('test') + # + # @param [ String ] value The value to append. + # + # @return [ ByteBuffer ] The modified buffer. + # + # @since 4.0.0 + def put_string(value) + put_int32(value.bytesize + 1) + @buffer << value + @buffer << NULL_BYTE + self + end + + # Replace an int 32 at the specified location in the buffer. + # + # @example Replace an int 32. + # buffer.replace_int32(4, 32) + # + # @param [ Integer ] index The index to replace at. + # @param [ Integer ] value The new value. + # + # @return [ ByteBuffer ] The modified buffer. + # + # @since 4.0.0 + def replace_int32(location, value) + @buffer[location, Int32::BYTES_LENGTH] = [ value ].pack(Int32::PACK) + self + end + + def to_s + @buffer + end end end diff --git a/lib/bson/false_class.rb b/lib/bson/false_class.rb index 9a2bd61e2..c4940a1cb 100644 --- a/lib/bson/false_class.rb +++ b/lib/bson/false_class.rb @@ -49,8 +49,8 @@ def bson_type # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encoded << FALSE_BYTE + def to_bson(buffer = ByteBuffer.new) + buffer.put_byte(FALSE_BYTE) end end diff --git a/lib/bson/float.rb b/lib/bson/float.rb index 1e2e3aa22..b0073e470 100644 --- a/lib/bson/float.rb +++ b/lib/bson/float.rb @@ -42,8 +42,8 @@ module Float # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encoded << [ self ].pack(PACK) + def to_bson(buffer = ByteBuffer.new) + buffer.put_double(self) end module ClassMethods diff --git a/lib/bson/hash.rb b/lib/bson/hash.rb index 5feac584e..a263f1236 100644 --- a/lib/bson/hash.rb +++ b/lib/bson/hash.rb @@ -21,7 +21,6 @@ module BSON # # @since 2.0.0 module Hash - include Encodable # An hash (embedded document) is type 0x03 in the BSON spec. # @@ -38,14 +37,16 @@ module Hash # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded| - each do |field, value| - encoded << value.bson_type - field.to_bson_key(encoded) - value.to_bson(encoded) - end + def to_bson(buffer = ByteBuffer.new) + position = buffer.length + buffer.put_int32(0) + each do |field, value| + buffer.put_byte(value.bson_type) + buffer.put_cstring(field) + value.to_bson(buffer) end + buffer.put_byte(NULL_BYTE) + buffer.replace_int32(position, buffer.length - position) end # Converts the hash to a normalized value in a BSON document. diff --git a/lib/bson/int32.rb b/lib/bson/int32.rb index f2efd3aad..d0a8d66c7 100644 --- a/lib/bson/int32.rb +++ b/lib/bson/int32.rb @@ -27,6 +27,11 @@ class Int32 # @since 2.0.0 BSON_TYPE = 16.chr.force_encoding(BINARY).freeze + # The number of bytes constant. + # + # @since 4.0.0 + BYTES_LENGTH = 4 + # Constant for the int 32 pack directive. # # @since 2.0.0 diff --git a/lib/bson/integer.rb b/lib/bson/integer.rb index 6e451909a..563ee14d5 100644 --- a/lib/bson/integer.rb +++ b/lib/bson/integer.rb @@ -22,16 +22,6 @@ module BSON # @since 2.0.0 module Integer - # A 32bit integer is type 0x10 in the BSON spec. - # - # @since 2.0.0 - INT32_TYPE = 16.chr.force_encoding(BINARY).freeze - - # A 64bit integer is type 0x12 in the BSON spec. - # - # @since 2.0.0 - INT64_TYPE = 18.chr.force_encoding(BINARY).freeze - # The maximum 32 bit integer value. # # @since 2.0.0 @@ -100,7 +90,7 @@ def bson_int64? # # @since 2.0.0 def bson_type - bson_int32? ? INT32_TYPE : (bson_int64? ? INT64_TYPE : out_of_range!) + bson_int32? ? Int32::BSON_TYPE : (bson_int64? ? Int64::BSON_TYPE : out_of_range!) end # Get the integer as encoded BSON. @@ -113,11 +103,11 @@ def bson_type # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) + def to_bson(buffer = ByteBuffer.new) if bson_int32? - to_bson_int32(encoded) + buffer.put_int32(self) elsif bson_int64? - to_bson_int64(encoded) + buffer.put_int64(self) else out_of_range! end @@ -155,12 +145,8 @@ def to_bson_int64(encoded) encoded << ((self >> 56) & 255) end - def to_bson_key(encoded = ''.force_encoding(BINARY)) - if self < BSON_INDEX_SIZE - encoded << BSON_ARRAY_INDEXES[self] - else - self.to_s.to_bson_cstring(encoded) - end + def to_bson_key(buffer = ByteBuffer.new) + buffer.put_cstring(to_s) end private diff --git a/lib/bson/nil_class.rb b/lib/bson/nil_class.rb index 3ed2603ba..1e83d6c04 100644 --- a/lib/bson/nil_class.rb +++ b/lib/bson/nil_class.rb @@ -37,8 +37,8 @@ module NilClass # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encoded + def to_bson(buffer = ByteBuffer.new) + buffer end module ClassMethods diff --git a/lib/bson/specialized.rb b/lib/bson/specialized.rb index 1afb73dc2..d783c3994 100644 --- a/lib/bson/specialized.rb +++ b/lib/bson/specialized.rb @@ -45,8 +45,8 @@ def ==(other) # @return [ String ] An empty string. # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encoded + def to_bson(buffer = ByteBuffer.new) + buffer end private diff --git a/lib/bson/string.rb b/lib/bson/string.rb index e08b0267f..beca30e16 100644 --- a/lib/bson/string.rb +++ b/lib/bson/string.rb @@ -41,10 +41,8 @@ module String # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encode_with_placeholder_and_null(STRING_ADJUST, encoded) do |encoded| - to_bson_string(encoded) - end + def to_bson(buffer = ByteBuffer.new) + buffer.put_string(self) end # Get the string as a BSON key name encoded C string with checking for special characters. diff --git a/lib/bson/symbol.rb b/lib/bson/symbol.rb index fa7dd7edc..807de4890 100644 --- a/lib/bson/symbol.rb +++ b/lib/bson/symbol.rb @@ -40,8 +40,8 @@ module Symbol # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - to_s.to_bson(encoded) + def to_bson(buffer = ByteBuffer.new) + to_s.to_bson(buffer) end # Get the symbol as a BSON key name encoded C symbol. diff --git a/lib/bson/true_class.rb b/lib/bson/true_class.rb index c3094a084..59bbc2ce6 100644 --- a/lib/bson/true_class.rb +++ b/lib/bson/true_class.rb @@ -49,8 +49,8 @@ def bson_type # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encoded << TRUE_BYTE + def to_bson(buffer = ByteBuffer.new) + buffer.put_byte(TRUE_BYTE) end end diff --git a/lib/bson/undefined.rb b/lib/bson/undefined.rb index 5fbc2c184..e09659632 100644 --- a/lib/bson/undefined.rb +++ b/lib/bson/undefined.rb @@ -20,6 +20,7 @@ module BSON # # @since 2.0.0 class Undefined + include Specialized # Undefined is type 0x06 in the BSON spec. # @@ -40,32 +41,6 @@ def ==(other) self.class == other.class end - # Encode the Undefined field - has no value since it only needs the type - # and field name when being encoded. - # - # @example Encode the undefined value. - # Undefined.to_bson - # - # @return [ String ] An empty string. - # - # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encoded - end - - # Deserialize undefined BSON type from BSON. - # - # @param [ BSON ] bson The encoded undefined value. - # - # @return [ Undefined ] The decoded undefined value. - # - # @see http://bsonspec.org/#/specification - # - # @since 2.0.0 - def self.from_bson(bson) - new - end - # Register this type when the module is loaded. # # @since 2.0.0 diff --git a/perf/bench.rb b/perf/bench.rb index f3ee3ec1f..1d0acff91 100644 --- a/perf/bench.rb +++ b/perf/bench.rb @@ -30,137 +30,137 @@ def benchmark! count.times { document.to_bson } end - bench.report("Binary#to_bson -------->") do - count.times { BSON::Binary.new("test", :generic).to_bson } - end - - bench.report("Code#to_bson ---------->") do - count.times { BSON::Code.new("this.value = 1").to_bson } - end - - bench.report("FalseClass#to_bson ---->") do - count.times { false.to_bson } - end - - bench.report("Float#to_bson --------->") do - count.times { 1.131312.to_bson } - end - - bench.report("Integer#to_bson ------->") do - count.times { 1024.to_bson } - end - - bench.report("MaxKey#to_bson -------->") do - count.times { BSON::MaxKey.new.to_bson } - end - - bench.report("MinKey#to_bson -------->") do - count.times { BSON::MinKey.new.to_bson } - end - - bench.report("ObjectId#to_bson ------>") do - count.times { BSON::ObjectId.new.to_bson } - end - - bench.report("ObjectId#to_s --------->") do - object_id = BSON::ObjectId.new - count.times { object_id.to_s } - end - - bench.report("Regexp#to_bson -------->") do - count.times { %r{\d+}.to_bson } - end - - bench.report("String#to_bson -------->") do - count.times { "testing".to_bson } - end - - bench.report("Symbol#to_bson -------->") do - count.times { "testing".to_bson } - end - - bench.report("Time#to_bson ---------->") do - count.times { Time.new.to_bson } - end - - bench.report("TrueClass#to_bson ----->") do - count.times { true.to_bson } - end - - boolean_bytes = true.to_bson - bench.report("Boolean#from_bson ----->") do - count.times { BSON::Boolean.from_bson(StringIO.new(boolean_bytes)) } - end - - int32_bytes = 1024.to_bson - bench.report("Int32#from_bson ------->") do - count.times { BSON::Int32.from_bson(StringIO.new(int32_bytes)) } - end - - int64_bytes = (BSON::Integer::MAX_32BIT + 1).to_bson - bench.report("Int64#from_bson ------->") do - count.times { BSON::Int64.from_bson(StringIO.new(int64_bytes)) } - end - - float_bytes = 1.23131.to_bson - bench.report("Float#from_bson ------->") do - count.times { Float.from_bson(StringIO.new(float_bytes)) } - end - - binary_bytes = BSON::Binary.new("test", :generic).to_bson - bench.report("Binary#from_bson ------>") do - count.times { BSON::Binary.from_bson(StringIO.new(binary_bytes)) } - end - - code_bytes = BSON::Code.new("this.value = 1").to_bson - bench.report("Code#from_bson -------->") do - count.times { BSON::Code.from_bson(StringIO.new(code_bytes)) } - end - - false_bytes = false.to_bson - bench.report("Boolean#from_bson ----->") do - count.times { BSON::Boolean.from_bson(StringIO.new(false_bytes)) } - end - - max_key_bytes = BSON::MaxKey.new.to_bson - bench.report("MaxKey#from_bson ------>") do - count.times { BSON::MaxKey.from_bson(StringIO.new(max_key_bytes)) } - end - - min_key_bytes = BSON::MinKey.new.to_bson - bench.report("MinKey#from_bson ------>") do - count.times { BSON::MinKey.from_bson(StringIO.new(min_key_bytes)) } - end - - object_id_bytes = BSON::ObjectId.new.to_bson - bench.report("ObjectId#from_bson ---->") do - count.times { BSON::ObjectId.from_bson(StringIO.new(object_id_bytes)) } - end - - regex_bytes = %r{\d+}.to_bson - bench.report("Regexp#from_bson ------>") do - count.times { Regexp.from_bson(StringIO.new(regex_bytes)) } - end - - string_bytes = "testing".to_bson - bench.report("String#from_bson ------>") do - count.times { String.from_bson(StringIO.new(string_bytes)) } - end - - symbol_bytes = "testing".to_bson - bench.report("Symbol#from_bson ------>") do - count.times { Symbol.from_bson(StringIO.new(symbol_bytes)) } - end - - time_bytes = Time.new.to_bson - bench.report("Time#from_bson -------->") do - count.times { Time.from_bson(StringIO.new(time_bytes)) } - end - - doc_bytes = document.to_bson - bench.report("Document#from_bson ---->") do - count.times { BSON::Document.from_bson(StringIO.new(doc_bytes)) } - end + # bench.report("Binary#to_bson -------->") do + # count.times { BSON::Binary.new("test", :generic).to_bson } + # end + + # bench.report("Code#to_bson ---------->") do + # count.times { BSON::Code.new("this.value = 1").to_bson } + # end + + # bench.report("FalseClass#to_bson ---->") do + # count.times { false.to_bson } + # end + + # bench.report("Float#to_bson --------->") do + # count.times { 1.131312.to_bson } + # end + + # bench.report("Integer#to_bson ------->") do + # count.times { 1024.to_bson } + # end + + # bench.report("MaxKey#to_bson -------->") do + # count.times { BSON::MaxKey.new.to_bson } + # end + + # bench.report("MinKey#to_bson -------->") do + # count.times { BSON::MinKey.new.to_bson } + # end + + # bench.report("ObjectId#to_bson ------>") do + # count.times { BSON::ObjectId.new.to_bson } + # end + + # bench.report("ObjectId#to_s --------->") do + # object_id = BSON::ObjectId.new + # count.times { object_id.to_s } + # end + + # bench.report("Regexp#to_bson -------->") do + # count.times { %r{\d+}.to_bson } + # end + + # bench.report("String#to_bson -------->") do + # count.times { "testing".to_bson } + # end + + # bench.report("Symbol#to_bson -------->") do + # count.times { "testing".to_bson } + # end + + # bench.report("Time#to_bson ---------->") do + # count.times { Time.new.to_bson } + # end + + # bench.report("TrueClass#to_bson ----->") do + # count.times { true.to_bson } + # end + + # boolean_bytes = true.to_bson + # bench.report("Boolean#from_bson ----->") do + # count.times { BSON::Boolean.from_bson(StringIO.new(boolean_bytes)) } + # end + + # int32_bytes = 1024.to_bson + # bench.report("Int32#from_bson ------->") do + # count.times { BSON::Int32.from_bson(StringIO.new(int32_bytes)) } + # end + + # int64_bytes = (BSON::Integer::MAX_32BIT + 1).to_bson + # bench.report("Int64#from_bson ------->") do + # count.times { BSON::Int64.from_bson(StringIO.new(int64_bytes)) } + # end + + # float_bytes = 1.23131.to_bson + # bench.report("Float#from_bson ------->") do + # count.times { Float.from_bson(StringIO.new(float_bytes)) } + # end + + # binary_bytes = BSON::Binary.new("test", :generic).to_bson + # bench.report("Binary#from_bson ------>") do + # count.times { BSON::Binary.from_bson(StringIO.new(binary_bytes)) } + # end + + # code_bytes = BSON::Code.new("this.value = 1").to_bson + # bench.report("Code#from_bson -------->") do + # count.times { BSON::Code.from_bson(StringIO.new(code_bytes)) } + # end + + # false_bytes = false.to_bson + # bench.report("Boolean#from_bson ----->") do + # count.times { BSON::Boolean.from_bson(StringIO.new(false_bytes)) } + # end + + # max_key_bytes = BSON::MaxKey.new.to_bson + # bench.report("MaxKey#from_bson ------>") do + # count.times { BSON::MaxKey.from_bson(StringIO.new(max_key_bytes)) } + # end + + # min_key_bytes = BSON::MinKey.new.to_bson + # bench.report("MinKey#from_bson ------>") do + # count.times { BSON::MinKey.from_bson(StringIO.new(min_key_bytes)) } + # end + + # object_id_bytes = BSON::ObjectId.new.to_bson + # bench.report("ObjectId#from_bson ---->") do + # count.times { BSON::ObjectId.from_bson(StringIO.new(object_id_bytes)) } + # end + + # regex_bytes = %r{\d+}.to_bson + # bench.report("Regexp#from_bson ------>") do + # count.times { Regexp.from_bson(StringIO.new(regex_bytes)) } + # end + + # string_bytes = "testing".to_bson + # bench.report("String#from_bson ------>") do + # count.times { String.from_bson(StringIO.new(string_bytes)) } + # end + + # symbol_bytes = "testing".to_bson + # bench.report("Symbol#from_bson ------>") do + # count.times { Symbol.from_bson(StringIO.new(symbol_bytes)) } + # end + + # time_bytes = Time.new.to_bson + # bench.report("Time#from_bson -------->") do + # count.times { Time.from_bson(StringIO.new(time_bytes)) } + # end + + # doc_bytes = document.to_bson + # bench.report("Document#from_bson ---->") do + # count.times { BSON::Document.from_bson(StringIO.new(doc_bytes)) } + # end end end diff --git a/spec/bson/array_spec.rb b/spec/bson/array_spec.rb index fdd055e42..592b69cea 100644 --- a/spec/bson/array_spec.rb +++ b/spec/bson/array_spec.rb @@ -21,7 +21,7 @@ let(:type) { 4.chr } let(:obj) {[ "one", "two" ]} let(:bson) do - BSON::Document["0", "one", "1", "two"].to_bson + BSON::Document["0", "one", "1", "two"].to_bson.to_s end it_behaves_like "a bson element" diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index 968e20a7d..a1b7f7041 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -12,4 +12,146 @@ expect(buffer).to be_a(BSON::ByteBuffer) end end + + describe '#length' do + + let(:buffer) do + described_class.new + end + + before do + buffer.put_int32(5) + end + + it 'returns the length of the buffer' do + expect(buffer.length).to eq(4) + end + end + + describe '#put_byte' do + + let(:buffer) do + described_class.new + end + + let(:modified) do + buffer.put_byte(BSON::Integer::INT32_TYPE) + end + + it 'appends the int32 to the byte buffer' do + expect(modified.to_s).to eq(BSON::Integer::INT32_TYPE.chr) + end + end + + describe '#put_cstring' do + + let(:buffer) do + described_class.new + end + + let(:modified) do + buffer.put_cstring('testing') + end + + it 'appends the string plus null byte to the byte buffer' do + expect(modified.to_s).to eq("testing#{BSON::NULL_BYTE}") + end + end + + describe '#put_double' do + + let(:buffer) do + described_class.new + end + + let(:modified) do + buffer.put_double(1.2332) + end + + it 'appends the double to the buffer' do + expect(modified.to_s).to eq([ 1.2332 ].pack(Float::PACK)) + end + end + + describe '#put_int32' do + + let(:buffer) do + described_class.new + end + + context 'when the integer is 32 bit' do + + let(:modified) do + buffer.put_int32(Integer::MAX_32BIT - 1) + end + + let(:expected) do + [ Integer::MAX_32BIT - 1 ].pack(BSON::Int32::PACK) + end + + it 'appends the int32 to the byte buffer' do + expect(modified.to_s).to eq(expected) + end + end + end + + describe '#put_int64' do + + let(:buffer) do + described_class.new + end + + context 'when the integer is 64 bit' do + + let(:modified) do + buffer.put_int64(Integer::MAX_64BIT - 1) + end + + let(:expected) do + [ Integer::MAX_64BIT - 1 ].pack(BSON::Int64::PACK) + end + + it 'appends the int64 to the byte buffer' do + expect(modified.to_s).to eq(expected) + end + end + end + + describe '#put_string' do + + let(:buffer) do + described_class.new + end + + let(:modified) do + buffer.put_string('testing') + end + + it 'appends the string to the byte buffer' do + expect(modified.to_s).to eq("#{8.to_bson.to_s}testing#{BSON::NULL_BYTE}") + end + end + + describe '#replace_int32' do + + let(:buffer) do + described_class.new + end + + let(:exp_first) do + [ 5 ].pack(BSON::Int32::PACK) + end + + let(:exp_second) do + [ 4 ].pack(BSON::Int32::PACK) + end + + let(:modified) do + buffer.put_int32(0).put_int32(4).replace_int32(0, 5) + end + + it 'replaces the int32 at the location' do + expect(modified.to_s).to eq("#{exp_first}#{exp_second}") + end + end end diff --git a/spec/bson/hash_spec.rb b/spec/bson/hash_spec.rb index dee6f426d..22fccc594 100644 --- a/spec/bson/hash_spec.rb +++ b/spec/bson/hash_spec.rb @@ -29,8 +29,8 @@ end let(:bson) do - "#{20.to_bson}#{String::BSON_TYPE}key#{BSON::NULL_BYTE}" + - "#{6.to_bson}value#{BSON::NULL_BYTE}#{BSON::NULL_BYTE}" + "#{20.to_bson.to_s}#{String::BSON_TYPE}key#{BSON::NULL_BYTE}" + + "#{6.to_bson.to_s}value#{BSON::NULL_BYTE}#{BSON::NULL_BYTE}" end it_behaves_like "a serializable bson element" @@ -44,9 +44,9 @@ end let(:bson) do - "#{32.to_bson}#{Hash::BSON_TYPE}field#{BSON::NULL_BYTE}" + - "#{20.to_bson}#{String::BSON_TYPE}key#{BSON::NULL_BYTE}" + - "#{6.to_bson}value#{BSON::NULL_BYTE}#{BSON::NULL_BYTE}#{BSON::NULL_BYTE}" + "#{32.to_bson.to_s}#{Hash::BSON_TYPE}field#{BSON::NULL_BYTE}" + + "#{20.to_bson.to_s}#{String::BSON_TYPE}key#{BSON::NULL_BYTE}" + + "#{6.to_bson.to_s}value#{BSON::NULL_BYTE}#{BSON::NULL_BYTE}#{BSON::NULL_BYTE}" end it_behaves_like "a serializable bson element" diff --git a/spec/bson/int32_spec.rb b/spec/bson/int32_spec.rb index c99fa1552..a90fadc9e 100644 --- a/spec/bson/int32_spec.rb +++ b/spec/bson/int32_spec.rb @@ -31,12 +31,13 @@ let(:encoded) {StringIO.new([ -1 ].pack(BSON::Int32::PACK))} let(:decoded_2) { -50 } let(:encoded_2) {StringIO.new([ -50 ].pack(BSON::Int32::PACK))} + it "decodes a -1 correctly" do expect(BSON::Int32.from_bson(encoded)).to eq(decoded) - end + end + it "decodes a -50 correctly" do expect(BSON::Int32.from_bson(encoded_2)).to eq(decoded_2) - end + end end - end diff --git a/spec/support/shared_examples.rb b/spec/support/shared_examples.rb index ad8913093..518e12756 100644 --- a/spec/support/shared_examples.rb +++ b/spec/support/shared_examples.rb @@ -43,11 +43,7 @@ end it "serializes to bson" do - expect(obj.to_bson).to eq(bson) - end - - it "serializes to bson by appending" do - expect(obj.to_bson(previous_content)).to eq(previous_content << bson) + expect(obj.to_bson.to_s).to eq(bson) end end From c9c506ac7f2cbdfecb0d6917a65a3579f8afab17 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 17 Sep 2015 13:36:25 +0200 Subject: [PATCH 03/29] RUBY-1019: Fix binary serialization --- ext/bson/native.c | 19 +++++++++++++++++++ lib/bson/binary.rb | 13 +++++++------ lib/bson/byte_buffer.rb | 1 + 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/ext/bson/native.c b/ext/bson/native.c index 7df0d8ffa..67cfc5519 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -41,6 +41,7 @@ typedef struct { static VALUE rb_bson_byte_buffer_allocate(VALUE klass); static VALUE rb_bson_byte_buffer_length(VALUE self); static VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte); +static VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes); static VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string); static VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f); static VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i); @@ -69,6 +70,7 @@ void Init_native() rb_define_alloc_func(rb_byte_buffer_class, rb_bson_byte_buffer_allocate); rb_define_method(rb_byte_buffer_class, "length", rb_bson_byte_buffer_length, 0); rb_define_method(rb_byte_buffer_class, "put_byte", rb_bson_byte_buffer_put_byte, 1); + rb_define_method(rb_byte_buffer_class, "put_bytes", rb_bson_byte_buffer_put_bytes, 1); rb_define_method(rb_byte_buffer_class, "put_cstring", rb_bson_byte_buffer_put_cstring, 1); rb_define_method(rb_byte_buffer_class, "put_double", rb_bson_byte_buffer_put_double, 1); rb_define_method(rb_byte_buffer_class, "put_int32", rb_bson_byte_buffer_put_int32, 1); @@ -116,6 +118,23 @@ VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte) return self; } +/** + * Writes bytes to the byte buffer. + */ +VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes) +{ + byte_buffer_t *b; + const char *str = RSTRING_PTR(bytes); + const size_t length = RSTRING_LEN(bytes); + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + ENSURE_BSON_WRITE(b, length); + memcpy(WRITE_PTR(b), str, length); + b->write_position += length; + + return self; +} + /** * Writes a cstring to the byte buffer. */ diff --git a/lib/bson/binary.rb b/lib/bson/binary.rb index 7aa92a78b..1f385d4ae 100644 --- a/lib/bson/binary.rb +++ b/lib/bson/binary.rb @@ -130,12 +130,13 @@ def inspect # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encode_binary_data_with_placeholder(encoded) do |encoded| - encoded << SUBTYPES.fetch(type) - encoded << data.bytesize.to_bson if type == :old - encoded << data.force_encoding(BINARY) - end + def to_bson(buffer = ByteBuffer.new) + position = buffer.length + buffer.put_int32(0) + buffer.put_byte(SUBTYPES.fetch(type)) + buffer.put_int32(data.bytesize) if type == :old + buffer.put_bytes(data.force_encoding(BINARY)) + buffer.replace_int32(position, buffer.length - position - 5) end # Deserialize the binary data from BSON. diff --git a/lib/bson/byte_buffer.rb b/lib/bson/byte_buffer.rb index edd0f767f..2c19e1aab 100644 --- a/lib/bson/byte_buffer.rb +++ b/lib/bson/byte_buffer.rb @@ -51,6 +51,7 @@ def put_byte(value) @buffer << value self end + alias :put_bytes :put_byte # Put a null termintated c string on the end of the buffer. # From 137dddfd8ecb4e8debdefa14aafbd213e216f7a8 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 17 Sep 2015 13:42:30 +0200 Subject: [PATCH 04/29] RUBY-1019: Fix code serialization --- lib/bson/binary.rb | 1 - lib/bson/code.rb | 7 ++----- lib/bson/string.rb | 6 +++--- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/lib/bson/binary.rb b/lib/bson/binary.rb index 1f385d4ae..9eba976d5 100644 --- a/lib/bson/binary.rb +++ b/lib/bson/binary.rb @@ -21,7 +21,6 @@ module BSON # @since 2.0.0 class Binary include JSON - include Encodable # A binary is type 0x05 in the BSON spec. # diff --git a/lib/bson/code.rb b/lib/bson/code.rb index bd6b8c7c6..8cc556f81 100644 --- a/lib/bson/code.rb +++ b/lib/bson/code.rb @@ -21,7 +21,6 @@ module BSON # @since 2.0.0 class Code include JSON - include Encodable # A code is type 0x0D in the BSON spec. # @@ -82,10 +81,8 @@ def initialize(javascript = "") # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encode_with_placeholder_and_null(STRING_ADJUST, encoded) do |encoded| - javascript.to_bson_string(encoded) - end + def to_bson(buffer = ByteBuffer.new) + buffer.put_string(javascript) # @todo: was formerly to_bson_string end # Deserialize code from BSON. diff --git a/lib/bson/string.rb b/lib/bson/string.rb index beca30e16..9215a867b 100644 --- a/lib/bson/string.rb +++ b/lib/bson/string.rb @@ -106,13 +106,13 @@ def to_bson_object_id # @return [ String ] The binary string. # # @since 2.0.0 - def to_bson_string(encoded = ''.force_encoding(BINARY)) + def to_bson_string(buffer = ByteBuffer.new) begin - to_utf8_binary(encoded) + to_utf8_binary(buffer) rescue EncodingError data = dup.force_encoding(UTF8) raise unless data.valid_encoding? - encoded << data.force_encoding(BINARY) + buffer.put_bytes(data.force_encoding(BINARY)) end end From 0e3aec1102a50eff93cc91a071fe22a2067f1ba1 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 17 Sep 2015 15:24:32 +0200 Subject: [PATCH 05/29] RUBY-1019: Fix code with scope serialization --- lib/bson/code_with_scope.rb | 15 ++++++--------- lib/bson/hash.rb | 2 +- spec/bson/code_with_scope_spec.rb | 6 +++--- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/lib/bson/code_with_scope.rb b/lib/bson/code_with_scope.rb index 1680a83c5..ee1eaf66c 100644 --- a/lib/bson/code_with_scope.rb +++ b/lib/bson/code_with_scope.rb @@ -21,7 +21,6 @@ module BSON # # @since 2.0.0 class CodeWithScope - include Encodable include JSON # A code with scope is type 0x0F in the BSON spec. @@ -88,14 +87,12 @@ def initialize(javascript = "", scope = {}) # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - # -1 because we are removing an extra byte - out = encode_with_placeholder_and_null(BSON_ADJUST - 1, encoded) do |encoded| - javascript.to_bson(encoded) - scope.to_bson(encoded) - end - # an extra null byte has been added; we must remove it - out.chop! + def to_bson(buffer = ByteBuffer.new) + position = buffer.length + buffer.put_int32(0) + buffer.put_string(javascript) + scope.to_bson(buffer) + buffer.replace_int32(position, buffer.length - position) end # Deserialize a code with scope from BSON. diff --git a/lib/bson/hash.rb b/lib/bson/hash.rb index a263f1236..737e0b621 100644 --- a/lib/bson/hash.rb +++ b/lib/bson/hash.rb @@ -42,7 +42,7 @@ def to_bson(buffer = ByteBuffer.new) buffer.put_int32(0) each do |field, value| buffer.put_byte(value.bson_type) - buffer.put_cstring(field) + buffer.put_cstring(field.to_s) # @todo: to_bson_key? value.to_bson(buffer) end buffer.put_byte(NULL_BYTE) diff --git a/spec/bson/code_with_scope_spec.rb b/spec/bson/code_with_scope_spec.rb index f668427d2..250c1f23d 100644 --- a/spec/bson/code_with_scope_spec.rb +++ b/spec/bson/code_with_scope_spec.rb @@ -63,8 +63,8 @@ end let(:obj) { described_class.new(code, scope) } let(:bson) do - "#{47.to_bson}#{(code.length + 1).to_bson}#{code}#{BSON::NULL_BYTE}" + - "#{scope.to_bson}" + "#{47.to_bson.to_s}#{(code.length + 1).to_bson.to_s}#{code}#{BSON::NULL_BYTE}" + + "#{scope.to_bson.to_s}" end it_behaves_like "a bson element" @@ -79,7 +79,7 @@ { "name" => "test" } end let(:obj) { described_class.new(code, scope) } - let(:bson) { StringIO.new(obj.to_bson) } + let(:bson) { StringIO.new(obj.to_bson.to_s) } let!(:deserialized) { described_class.from_bson(bson) } it "deserializes the javascript" do From 39a6aac6d1ace8120d179ab3b6609b137a9f65e7 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 17 Sep 2015 15:27:06 +0200 Subject: [PATCH 06/29] RUBY-1019: Fix date/date-time/time serialization --- lib/bson/date.rb | 4 ++-- lib/bson/date_time.rb | 4 ++-- lib/bson/time.rb | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/bson/date.rb b/lib/bson/date.rb index 232ea6eb5..782368c2b 100644 --- a/lib/bson/date.rb +++ b/lib/bson/date.rb @@ -34,8 +34,8 @@ module Date # @see http://bsonspec.org/#/specification # # @since 2.1.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - ::Time.utc(year, month, day).to_bson(encoded) + def to_bson(buffer = ByteBuffer.new) + ::Time.utc(year, month, day).to_bson(buffer) end # Get the BSON type for the date. diff --git a/lib/bson/date_time.rb b/lib/bson/date_time.rb index 45a3fa222..ee01e2229 100644 --- a/lib/bson/date_time.rb +++ b/lib/bson/date_time.rb @@ -34,8 +34,8 @@ module DateTime # @see http://bsonspec.org/#/specification # # @since 2.1.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - to_time.to_bson(encoded) + def to_bson(buffer = ByteBuffer.new) + to_time.to_bson(buffer) end end diff --git a/lib/bson/time.rb b/lib/bson/time.rb index 8bd8fc439..2982258bf 100644 --- a/lib/bson/time.rb +++ b/lib/bson/time.rb @@ -37,8 +37,8 @@ module Time # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - encoded << [ (to_i * 1000) + (usec / 1000) ].pack(Int64::PACK) + def to_bson(buffer = ByteBuffer.new) + buffer.put_int64((to_i * 1000) + (usec / 1000)) end module ClassMethods From cdd81bba28fb6eac5420bb3623a90f230842b881 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 17 Sep 2015 15:36:14 +0200 Subject: [PATCH 07/29] RUBY-1019: Fix nil/object id/int serialization --- lib/bson/nil_class.rb | 16 ++-------------- lib/bson/object.rb | 2 +- lib/bson/object_id.rb | 16 ++++++++-------- spec/bson/integer_spec.rb | 7 +------ spec/bson/object_id_spec.rb | 6 +++--- 5 files changed, 15 insertions(+), 32 deletions(-) diff --git a/lib/bson/nil_class.rb b/lib/bson/nil_class.rb index 1e83d6c04..0821ec422 100644 --- a/lib/bson/nil_class.rb +++ b/lib/bson/nil_class.rb @@ -21,27 +21,15 @@ module BSON # # @since 2.0.0 module NilClass + include Specialized # A nil is type 0x0A in the BSON spec. # # @since 2.0.0 BSON_TYPE = 10.chr.force_encoding(BINARY).freeze - # Get the nil as encoded BSON. - # - # @example Get the nil as encoded BSON. - # nil.to_bson - # - # @return [ String ] An empty string. - # - # @see http://bsonspec.org/#/specification - # - # @since 2.0.0 - def to_bson(buffer = ByteBuffer.new) - buffer - end - module ClassMethods + # Deserialize NilClass from BSON. # # @param [ BSON ] bson The encoded Null value. diff --git a/lib/bson/object.rb b/lib/bson/object.rb index 7d6874c5e..bde31f223 100644 --- a/lib/bson/object.rb +++ b/lib/bson/object.rb @@ -31,7 +31,7 @@ module Object # @see http://bsonspec.org/#/specification # # @since 2.2.4 - def to_bson_key(encoded = ''.force_encoding(BINARY)) + def to_bson_key(buffer = ByteBuffer.new) raise InvalidKey.new(self) end diff --git a/lib/bson/object_id.rb b/lib/bson/object_id.rb index 0b8c041e7..4b3248166 100644 --- a/lib/bson/object_id.rb +++ b/lib/bson/object_id.rb @@ -44,7 +44,7 @@ class ObjectId # @since 2.0.0 def ==(other) return false unless other.is_a?(ObjectId) - to_bson == other.to_bson + to_bson.to_s == other.to_bson.to_s end alias :eql? :== @@ -86,7 +86,7 @@ def as_json(*args) # # @since 2.0.0 def <=>(other) - to_bson <=> other.to_bson + to_bson.to_s <=> other.to_bson.to_s end # Return the UTC time at which this ObjectId was generated. This may @@ -100,7 +100,7 @@ def <=>(other) # # @since 2.0.0 def generation_time - ::Time.at(to_bson.unpack("N")[0]).utc + ::Time.at(to_bson.to_s.unpack("N")[0]).utc end # Get the hash value for the object id. @@ -112,7 +112,7 @@ def generation_time # # @since 2.0.0 def hash - to_bson.hash + to_bson.to_s.hash end # Get a nice string for use with object inspection. @@ -136,7 +136,7 @@ def inspect # # @since 2.0.0 def marshal_dump - to_bson + to_bson.to_s end # Unmarshal the data into an object id. @@ -168,10 +168,10 @@ def marshal_load(data) # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) + def to_bson(buffer = ByteBuffer.new) repair if defined?(@data) @raw_data ||= @@generator.next_object_id - encoded << @raw_data + buffer.put_bytes(@raw_data) end # Get the string representation of the object id. @@ -183,7 +183,7 @@ def to_bson(encoded = ''.force_encoding(BINARY)) # # @since 2.0.0 def to_s - to_bson.to_hex_string.force_encoding(UTF8) + to_bson.to_s.to_hex_string.force_encoding(UTF8) end alias :to_str :to_s diff --git a/spec/bson/integer_spec.rb b/spec/bson/integer_spec.rb index 21fa6fa04..aeb28c6e9 100644 --- a/spec/bson/integer_spec.rb +++ b/spec/bson/integer_spec.rb @@ -63,14 +63,9 @@ let(:obj) { Integer::MAX_32BIT - 1 } let(:encoded) { obj.to_s + BSON::NULL_BYTE } - let(:previous_content) { 'previous_content'.force_encoding(BSON::BINARY) } it "returns the encoded string" do - expect(obj.to_bson_key).to eq(encoded) - end - - it "appends to optional previous content" do - expect(obj.to_bson_key(previous_content)).to eq(previous_content << encoded) + expect(obj.to_bson_key.to_s).to eq(encoded) end end end diff --git a/spec/bson/object_id_spec.rb b/spec/bson/object_id_spec.rb index 9adffed53..0a5d7e8e3 100644 --- a/spec/bson/object_id_spec.rb +++ b/spec/bson/object_id_spec.rb @@ -379,7 +379,7 @@ end it "returns a hash of the raw bytes" do - expect(object_id.hash).to eq(object_id.to_bson.hash) + expect(object_id.hash).to eq(object_id.to_bson.to_s.hash) end end @@ -488,7 +488,7 @@ let(:time) { Time.utc(2013, 1, 1) } let(:type) { 7.chr } let(:obj) { described_class.from_time(time) } - let(:bson) { obj.to_bson } + let(:bson) { obj.to_bson.to_s } it_behaves_like "a bson element" it_behaves_like "a serializable bson element" @@ -543,7 +543,7 @@ { object_id => 1 } end - it "raises an exception on serialization" do + pending "raises an exception on serialization" do expect { hash.to_bson }.to raise_error(BSON::InvalidKey) From 855d3d384590ddb57d538ec7e266717227da8ecb Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 17 Sep 2015 15:50:51 +0200 Subject: [PATCH 08/29] RUBY-1019: Fix string serialization --- lib/bson/byte_buffer.rb | 1 + lib/bson/regexp.rb | 6 +- lib/bson/string.rb | 32 ++------- lib/bson/symbol.rb | 4 +- spec/bson/byte_buffer_spec.rb | 92 ++++++++++++++++++++++--- spec/bson/document_spec.rb | 12 ++-- spec/bson/string_spec.rb | 118 ++------------------------------ spec/bson/symbol_spec.rb | 10 +-- spec/support/shared_examples.rb | 2 +- 9 files changed, 108 insertions(+), 169 deletions(-) diff --git a/lib/bson/byte_buffer.rb b/lib/bson/byte_buffer.rb index 2c19e1aab..24f929323 100644 --- a/lib/bson/byte_buffer.rb +++ b/lib/bson/byte_buffer.rb @@ -64,6 +64,7 @@ def put_byte(value) # # @since 4.0.0 def put_cstring(value) + # @todo: check_for_illegal_characters! @buffer << value << NULL_BYTE self end diff --git a/lib/bson/regexp.rb b/lib/bson/regexp.rb index bf7f742ae..e28fa3bb7 100644 --- a/lib/bson/regexp.rb +++ b/lib/bson/regexp.rb @@ -84,9 +84,9 @@ def as_json(*args) # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - source.to_bson_cstring(encoded) - bson_options.to_bson_cstring(encoded) + def to_bson(buffer = ByteBuffer.new) + buffer.put_cstring(source) + buffer.put_cstring(bson_options) end private diff --git a/lib/bson/string.rb b/lib/bson/string.rb index 9215a867b..af5bb5722 100644 --- a/lib/bson/string.rb +++ b/lib/bson/string.rb @@ -22,7 +22,6 @@ module BSON # # @since 2.0.0 module String - include Encodable # A string is type 0x02 in the BSON spec. # @@ -57,26 +56,7 @@ def to_bson(buffer = ByteBuffer.new) # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson_key(encoded = ''.force_encoding(BINARY)) - to_bson_cstring(encoded) - end - - # Get the string as an encoded C string. - # - # @example Get the string as an encoded C string. - # "test".to_bson_cstring - # - # @raise [ EncodingError ] If the string is not UTF-8. - # - # @return [ String ] The encoded string. - # - # @see http://bsonspec.org/#/specification - # - # @since 2.0.0 - def to_bson_cstring(encoded = ''.force_encoding(BINARY)) - check_for_illegal_characters! - to_bson_string(encoded) << NULL_BYTE - end + def to_bson_key; self; end # Convert the string to an object id. This will only work for strings of size # 12. @@ -106,13 +86,13 @@ def to_bson_object_id # @return [ String ] The binary string. # # @since 2.0.0 - def to_bson_string(buffer = ByteBuffer.new) + def to_bson_string begin - to_utf8_binary(buffer) + to_utf8_binary rescue EncodingError data = dup.force_encoding(UTF8) raise unless data.valid_encoding? - buffer.put_bytes(data.force_encoding(BINARY)) + data.force_encoding(BINARY) end end @@ -159,8 +139,8 @@ def set_int32(pos, int32) private - def to_utf8_binary(encoded) - encoded << encode(UTF8).force_encoding(BINARY) + def to_utf8_binary + encode(UTF8).force_encoding(BINARY) end module ClassMethods diff --git a/lib/bson/symbol.rb b/lib/bson/symbol.rb index 807de4890..4d4268173 100644 --- a/lib/bson/symbol.rb +++ b/lib/bson/symbol.rb @@ -54,8 +54,8 @@ def to_bson(buffer = ByteBuffer.new) # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson_key(encoded = ''.force_encoding(BINARY)) - to_s.to_bson_key(encoded) + def to_bson_key + to_s.to_bson_key end # Converts the symbol to a normalized key in a BSON document. diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index a1b7f7041..3a12ba8eb 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -35,27 +35,99 @@ end let(:modified) do - buffer.put_byte(BSON::Integer::INT32_TYPE) + buffer.put_byte(BSON::Int32::BSON_TYPE) end it 'appends the int32 to the byte buffer' do - expect(modified.to_s).to eq(BSON::Integer::INT32_TYPE.chr) + expect(modified.to_s).to eq(BSON::Int32::BSON_TYPE.chr) end end describe '#put_cstring' do - let(:buffer) do - described_class.new - end + context 'when the string is valid' do - let(:modified) do - buffer.put_cstring('testing') - end + let(:buffer) do + described_class.new + end + + let(:modified) do + buffer.put_cstring('testing') + end + + it 'appends the string plus null byte to the byte buffer' do + expect(modified.to_s).to eq("testing#{BSON::NULL_BYTE}") + end - it 'appends the string plus null byte to the byte buffer' do - expect(modified.to_s).to eq("testing#{BSON::NULL_BYTE}") end + + # context "when the string contains a null byte" do + + # let(:string) do + # "test#{BSON::NULL_BYTE}ing" + # end + + # it "raises an error" do + # expect { + # string.to_bson_cstring + # }.to raise_error(ArgumentError) + # end + # end + + # context "when the string contains utf-8 characters" do + + # let(:string) do + # "Straße" + # end + + # let(:encoded) do + # string.to_bson_cstring + # end + + # let(:char) do + # "ß".chr.force_encoding(BSON::BINARY) + # end + + # it "returns the encoded string" do + # expect(encoded).to eq("Stra#{char}e#{BSON::NULL_BYTE}") + # end + + # it_behaves_like "a binary encoded string" + # end + + # context "when the string is encoded in non utf-8" do + + # let(:string) do + # "Straße".encode("iso-8859-1") + # end + + # let(:encoded) do + # string.to_bson_cstring + # end + + # let(:char) do + # "ß".chr.force_encoding(BSON::BINARY) + # end + + # it "returns the encoded string" do + # expect(encoded).to eq("Stra#{char}e#{BSON::NULL_BYTE}") + # end + + # it_behaves_like "a binary encoded string" + # end + + # context "when the string contains non utf-8 characters" do + + # let(:string) do + # 255.chr + # end + + # it "raises an error" do + # expect { + # string.to_bson_cstring + # }.to raise_error(EncodingError) + # end + # end end describe '#put_double' do diff --git a/spec/bson/document_spec.rb b/spec/bson/document_spec.rb index ec314066f..cf08fc7ec 100644 --- a/spec/bson/document_spec.rb +++ b/spec/bson/document_spec.rb @@ -653,7 +653,7 @@ end let(:serialized) do - document.to_bson + document.to_bson.to_s end let(:deserialized) do @@ -688,7 +688,7 @@ end it "properly serializes the symbol" do - expect(obj.to_bson).to eq(bson) + expect(obj.to_bson.to_s).to eq(bson) end end @@ -768,7 +768,7 @@ end let(:deserialized) do - described_class.from_bson(StringIO.new(document.to_bson)) + described_class.from_bson(StringIO.new(document.to_bson.to_s)) end it "serializes and deserializes properly" do @@ -819,9 +819,9 @@ described_class["type", string.encode("iso-8859-1")] end - it "encodes and decodes the document properly" do + pending "encodes and decodes the document properly" do expect( - BSON::Document.from_bson(StringIO.new(document.to_bson)) + BSON::Document.from_bson(StringIO.new(document.to_bson.to_s)) ).to eq({ "type" => string }) end end @@ -835,7 +835,7 @@ it "encodes and decodes the document properly" do expect( - BSON::Document.from_bson(StringIO.new(document.to_bson)) + BSON::Document.from_bson(StringIO.new(document.to_bson.to_s)) ).to eq({ "type" => string }) end end diff --git a/spec/bson/string_spec.rb b/spec/bson/string_spec.rb index 56e78efd8..aa27e4dea 100644 --- a/spec/bson/string_spec.rb +++ b/spec/bson/string_spec.rb @@ -22,109 +22,13 @@ let(:type) { 2.chr } let(:obj) { "test" } - let(:bson) { "#{5.to_bson}test#{BSON::NULL_BYTE}" } + let(:bson) { "#{5.to_bson.to_s}test#{BSON::NULL_BYTE}" } it_behaves_like "a bson element" it_behaves_like "a serializable bson element" it_behaves_like "a deserializable bson element" end - describe "#to_bson_cstring" do - - context "when the string is valid" do - - let(:string) do - "test" - end - - let(:encoded) do - string.to_bson_cstring - end - - let(:previous_content) do - 'previous_content'.force_encoding(BSON::BINARY) - end - - it "returns the encoded string" do - expect(encoded).to eq("test#{BSON::NULL_BYTE}") - end - - it_behaves_like "a binary encoded string" - - it "appends to optional previous content" do - expect(string.to_bson_cstring(previous_content)).to eq(previous_content << encoded) - end - end - - context "when the string contains a null byte" do - - let(:string) do - "test#{BSON::NULL_BYTE}ing" - end - - it "raises an error" do - expect { - string.to_bson_cstring - }.to raise_error(ArgumentError) - end - end - - context "when the string contains utf-8 characters" do - - let(:string) do - "Straße" - end - - let(:encoded) do - string.to_bson_cstring - end - - let(:char) do - "ß".chr.force_encoding(BSON::BINARY) - end - - it "returns the encoded string" do - expect(encoded).to eq("Stra#{char}e#{BSON::NULL_BYTE}") - end - - it_behaves_like "a binary encoded string" - end - - context "when the string is encoded in non utf-8" do - - let(:string) do - "Straße".encode("iso-8859-1") - end - - let(:encoded) do - string.to_bson_cstring - end - - let(:char) do - "ß".chr.force_encoding(BSON::BINARY) - end - - it "returns the encoded string" do - expect(encoded).to eq("Stra#{char}e#{BSON::NULL_BYTE}") - end - - it_behaves_like "a binary encoded string" - end - - context "when the string contains non utf-8 characters" do - - let(:string) do - 255.chr - end - - it "raises an error" do - expect { - string.to_bson_cstring - }.to raise_error(EncodingError) - end - end - end - describe "#to_bson_object_id" do context "when the string has 12 characters" do @@ -164,20 +68,11 @@ string.to_bson_string end - let(:previous_content) do - 'previous_content'.force_encoding(BSON::BINARY) - end - it "returns the string" do expect(encoded).to eq(string) end it_behaves_like "a binary encoded string" - - it "appends to optional previous content" do - expect(string.to_bson_string(previous_content)).to eq(previous_content << encoded) - end - end context "when the string contains a null byte" do @@ -204,7 +99,7 @@ end let(:encoded) do - string.to_bson_string + string.to_bson_string.to_s end let(:char) do @@ -225,7 +120,7 @@ end let(:encoded) do - string.to_bson_string + string.to_bson_string.to_s end let(:char) do @@ -267,16 +162,11 @@ describe "#to_bson_key" do let(:string) { "test" } - let(:encoded) { string.to_s + BSON::NULL_BYTE } - let(:previous_content) { 'previous_content'.force_encoding(BSON::BINARY) } + let(:encoded) { string.to_s } it "returns the encoded string" do expect(string.to_bson_key).to eq(encoded) end - - it "appends to optional previous content" do - expect(string.to_bson_key(previous_content)).to eq(previous_content << encoded) - end end describe "#to_hex_string" do diff --git a/spec/bson/symbol_spec.rb b/spec/bson/symbol_spec.rb index 33632197b..8489b09fa 100644 --- a/spec/bson/symbol_spec.rb +++ b/spec/bson/symbol_spec.rb @@ -20,7 +20,7 @@ let(:type) { 14.chr } let(:obj) { :test } - let(:bson) { "#{5.to_bson}test#{BSON::NULL_BYTE}" } + let(:bson) { "#{5.to_bson.to_s}test#{BSON::NULL_BYTE}" } it_behaves_like "a bson element" it_behaves_like "a serializable bson element" @@ -31,18 +31,14 @@ describe "#to_bson_key" do let(:symbol) { :test } - let(:encoded) { symbol.to_s + BSON::NULL_BYTE } - let(:previous_content) { 'previous_content'.force_encoding(BSON::BINARY) } + let(:encoded) { symbol.to_s } it "returns the encoded string" do expect(symbol.to_bson_key).to eq(encoded) end - it "appends to optional previous content" do - expect(symbol.to_bson_key(previous_content)).to eq(previous_content << encoded) - end + pending 'when the symbol contains a null byte' do - context 'when the symbol contains a null byte' do let(:symbol) { :"test#{BSON::NULL_BYTE}ing" } it 'raises an error' do diff --git a/spec/support/shared_examples.rb b/spec/support/shared_examples.rb index 518e12756..014055cc7 100644 --- a/spec/support/shared_examples.rb +++ b/spec/support/shared_examples.rb @@ -104,7 +104,7 @@ it "serializes and deserializes properly" do expect( - BSON::Document.from_bson(StringIO.new(document.to_bson)) + BSON::Document.from_bson(StringIO.new(document.to_bson.to_s)) ).to eq(document) end end From df49d9fac11815bcb6217fbaa8c50872a5cf7cc9 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 17 Sep 2015 16:39:02 +0200 Subject: [PATCH 09/29] Remove profiling --- Rakefile | 6 ------ perf/bench.rb | 23 ----------------------- 2 files changed, 29 deletions(-) diff --git a/Rakefile b/Rakefile index 2c7010479..cc33964dd 100644 --- a/Rakefile +++ b/Rakefile @@ -110,12 +110,6 @@ namespace :benchmark do require "bson" benchmark! end - - task :profile => :compile do - puts "Profiling with native extensions..." - require "bson" - profile! - end end task :default => [ :clean_all, :spec, :ext_spec ] diff --git a/perf/bench.rb b/perf/bench.rb index 1d0acff91..de2a3b480 100644 --- a/perf/bench.rb +++ b/perf/bench.rb @@ -14,7 +14,6 @@ $:.unshift File.join(File.dirname(__FILE__), "..", "lib") require "benchmark" -require "ruby-prof" def benchmark! count = 1_000_000 @@ -163,25 +162,3 @@ def benchmark! # end end end - -def profile! - count = 1_000 - - document = BSON::Document.new(field1: 'testing', field2: 'testing') - embedded = 5.times.map do |i| - BSON::Document.new(field1: 10, field2: 'test') - end - document[:embedded] = embedded - - document_serialization = RubyProf.profile do - count.times { document.to_bson } - end - - doc_bytes = document.to_bson - document_deserialization = RubyProf.profile do - count.times { BSON::Document.from_bson(StringIO.new(doc_bytes)) } - end - - RubyProf::GraphPrinter.new(document_serialization).print($stdout) - RubyProf::GraphPrinter.new(document_deserialization).print($stdout) -end From f869fee7765e19cb372f8fb422f0ab56eb64f12d Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 17 Sep 2015 16:59:39 +0200 Subject: [PATCH 10/29] RUBY-1019: Fix cstring appending --- ext/bson/native.c | 2 + lib/bson/byte_buffer.rb | 10 ++++- lib/bson/hash.rb | 2 +- lib/bson/string.rb | 29 ++---------- spec/bson/byte_buffer_spec.rb | 83 ++++++----------------------------- spec/bson/document_spec.rb | 2 +- spec/bson/object_id_spec.rb | 2 +- spec/bson/symbol_spec.rb | 11 ----- 8 files changed, 32 insertions(+), 109 deletions(-) diff --git a/ext/bson/native.c b/ext/bson/native.c index 67cfc5519..f2c38c9ef 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -143,6 +143,8 @@ VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string) byte_buffer_t *b; const char *c_str = RSTRING_PTR(string); const size_t length = RSTRING_LEN(string) + 1; + if (strlen(c_str) < length - 1) + rb_raise(rb_eArgError, "Illegal C-String %s contains a null byte.", c_str); TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); ENSURE_BSON_WRITE(b, length); diff --git a/lib/bson/byte_buffer.rb b/lib/bson/byte_buffer.rb index 24f929323..7af7568c3 100644 --- a/lib/bson/byte_buffer.rb +++ b/lib/bson/byte_buffer.rb @@ -64,7 +64,7 @@ def put_byte(value) # # @since 4.0.0 def put_cstring(value) - # @todo: check_for_illegal_characters! + check_for_illegal_characters!(value) @buffer << value << NULL_BYTE self end @@ -150,5 +150,13 @@ def replace_int32(location, value) def to_s @buffer end + + private + + def check_for_illegal_characters!(value) + if value.include?(NULL_BYTE) + raise(ArgumentError, "Illegal C-String '#{value}' contains a null byte.") + end + end end end diff --git a/lib/bson/hash.rb b/lib/bson/hash.rb index 737e0b621..834fb581b 100644 --- a/lib/bson/hash.rb +++ b/lib/bson/hash.rb @@ -42,7 +42,7 @@ def to_bson(buffer = ByteBuffer.new) buffer.put_int32(0) each do |field, value| buffer.put_byte(value.bson_type) - buffer.put_cstring(field.to_s) # @todo: to_bson_key? + buffer.put_cstring(field.to_bson_key) value.to_bson(buffer) end buffer.put_byte(NULL_BYTE) diff --git a/lib/bson/string.rb b/lib/bson/string.rb index af5bb5722..0825ace21 100644 --- a/lib/bson/string.rb +++ b/lib/bson/string.rb @@ -41,7 +41,7 @@ module String # # @since 2.0.0 def to_bson(buffer = ByteBuffer.new) - buffer.put_string(self) + buffer.put_string(to_bson_string) end # Get the string as a BSON key name encoded C string with checking for special characters. @@ -56,7 +56,9 @@ def to_bson(buffer = ByteBuffer.new) # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson_key; self; end + def to_bson_key + to_bson_string + end # Convert the string to an object id. This will only work for strings of size # 12. @@ -122,21 +124,6 @@ def from_bson_string force_encoding(UTF8) end - # Set four bytes for int32 in a binary string and return it. - # - # @example Set int32 in a BSON string. - # "".set_int32(pos, int32) - # - # @param [ Fixnum ] The position to set. - # @param [ Fixnum ] The int32 value. - # - # @return [ String ] The binary string. - # - # @since 2.0.0 - def set_int32(pos, int32) - self[pos, 4] = [ int32 ].pack(Int32::PACK) - end - private def to_utf8_binary @@ -159,14 +146,6 @@ def from_bson(bson) end end - private - - def check_for_illegal_characters! - if include?(NULL_BYTE) - raise(ArgumentError, "Illegal C-String '#{self}' contains a null byte.") - end - end - # Register this type when the module is loaded. # # @since 2.0.0 diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index 3a12ba8eb..25a4845fc 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -45,11 +45,11 @@ describe '#put_cstring' do - context 'when the string is valid' do + let(:buffer) do + described_class.new + end - let(:buffer) do - described_class.new - end + context 'when the string is valid' do let(:modified) do buffer.put_cstring('testing') @@ -61,73 +61,18 @@ end - # context "when the string contains a null byte" do - - # let(:string) do - # "test#{BSON::NULL_BYTE}ing" - # end - - # it "raises an error" do - # expect { - # string.to_bson_cstring - # }.to raise_error(ArgumentError) - # end - # end - - # context "when the string contains utf-8 characters" do - - # let(:string) do - # "Straße" - # end - - # let(:encoded) do - # string.to_bson_cstring - # end - - # let(:char) do - # "ß".chr.force_encoding(BSON::BINARY) - # end + context "when the string contains a null byte" do - # it "returns the encoded string" do - # expect(encoded).to eq("Stra#{char}e#{BSON::NULL_BYTE}") - # end - - # it_behaves_like "a binary encoded string" - # end - - # context "when the string is encoded in non utf-8" do - - # let(:string) do - # "Straße".encode("iso-8859-1") - # end - - # let(:encoded) do - # string.to_bson_cstring - # end - - # let(:char) do - # "ß".chr.force_encoding(BSON::BINARY) - # end - - # it "returns the encoded string" do - # expect(encoded).to eq("Stra#{char}e#{BSON::NULL_BYTE}") - # end - - # it_behaves_like "a binary encoded string" - # end - - # context "when the string contains non utf-8 characters" do - - # let(:string) do - # 255.chr - # end + let(:string) do + "test#{BSON::NULL_BYTE}ing" + end - # it "raises an error" do - # expect { - # string.to_bson_cstring - # }.to raise_error(EncodingError) - # end - # end + it "raises an error" do + expect { + buffer.put_cstring(string) + }.to raise_error(ArgumentError) + end + end end describe '#put_double' do diff --git a/spec/bson/document_spec.rb b/spec/bson/document_spec.rb index cf08fc7ec..0fef159eb 100644 --- a/spec/bson/document_spec.rb +++ b/spec/bson/document_spec.rb @@ -819,7 +819,7 @@ described_class["type", string.encode("iso-8859-1")] end - pending "encodes and decodes the document properly" do + it "encodes and decodes the document properly" do expect( BSON::Document.from_bson(StringIO.new(document.to_bson.to_s)) ).to eq({ "type" => string }) diff --git a/spec/bson/object_id_spec.rb b/spec/bson/object_id_spec.rb index 0a5d7e8e3..f2b5235be 100644 --- a/spec/bson/object_id_spec.rb +++ b/spec/bson/object_id_spec.rb @@ -543,7 +543,7 @@ { object_id => 1 } end - pending "raises an exception on serialization" do + it "raises an exception on serialization" do expect { hash.to_bson }.to raise_error(BSON::InvalidKey) diff --git a/spec/bson/symbol_spec.rb b/spec/bson/symbol_spec.rb index 8489b09fa..c315dd01d 100644 --- a/spec/bson/symbol_spec.rb +++ b/spec/bson/symbol_spec.rb @@ -36,16 +36,5 @@ it "returns the encoded string" do expect(symbol.to_bson_key).to eq(encoded) end - - pending 'when the symbol contains a null byte' do - - let(:symbol) { :"test#{BSON::NULL_BYTE}ing" } - - it 'raises an error' do - expect { - symbol.to_bson_key - }.to raise_error(ArgumentError) - end - end end end From 6e4f69dcb46bd5d43bb18306dc3b57bbab3669c9 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Fri, 18 Sep 2015 18:31:15 +0200 Subject: [PATCH 11/29] RUBY-1019: Validate UTF-8 inside buffer --- ext/bson/native.c | 186 +++++++++++++++++++++++++++++++++- lib/bson.rb | 1 - lib/bson/byte_buffer.rb | 16 ++- lib/bson/encodable.rb | 86 ---------------- lib/bson/string.rb | 31 +----- spec/bson/byte_buffer_spec.rb | 92 +++++++++++++++++ spec/bson/document_spec.rb | 12 +-- spec/bson/string_spec.rb | 92 ----------------- 8 files changed, 294 insertions(+), 222 deletions(-) delete mode 100644 lib/bson/encodable.rb diff --git a/ext/bson/native.c b/ext/bson/native.c index f2c38c9ef..146318b65 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -14,6 +14,8 @@ * limitations under the License. */ #include +#include +#include #include "portable_endian.h" #define BSON_BYTE_BUFFER_SIZE 512 @@ -53,6 +55,7 @@ static VALUE rb_bson_byte_buffer_to_s(VALUE self); static size_t rb_bson_byte_buffer_memsize(const void *ptr); static void rb_bson_byte_buffer_free(void *ptr); static void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length); +static bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_null); static const rb_data_type_t rb_byte_buffer_data_type = { "bson/byte_buffer", @@ -141,10 +144,12 @@ VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes) VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string) { byte_buffer_t *b; - const char *c_str = RSTRING_PTR(string); - const size_t length = RSTRING_LEN(string) + 1; - if (strlen(c_str) < length - 1) - rb_raise(rb_eArgError, "Illegal C-String %s contains a null byte.", c_str); + char *c_str = RSTRING_PTR(string); + size_t length = RSTRING_LEN(string) + 1; + + if (!rb_bson_utf8_validate(c_str, length - 1, false)) { + rb_raise(rb_eArgError, "String %s is not a valid UTF-8 CString.", c_str); + } TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); ENSURE_BSON_WRITE(b, length); @@ -210,9 +215,14 @@ VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i) VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string) { byte_buffer_t *b; - const char *str = RSTRING_PTR(string); + + char *str = RSTRING_PTR(string); const size_t length = RSTRING_LEN(string) + 1; + if (!rb_bson_utf8_validate(str, length - 1, true)) { + rb_raise(rb_eArgError, "String %s is not valid UTF-8.", str); + } + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); ENSURE_BSON_WRITE(b, length + 4); *((int32_t*)WRITE_PTR(b)) = htole32(length); @@ -290,3 +300,169 @@ void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length) buffer_ptr->read_position = 0; } } + +/** + * Taken from libbson. + */ +static void _bson_utf8_get_sequence(const char *utf8, uint8_t *seq_length, uint8_t *first_mask) +{ + unsigned char c = *(const unsigned char *)utf8; + uint8_t m; + uint8_t n; + + /* + * See the following[1] for a description of what the given multi-byte + * sequences will be based on the bits set of the first byte. We also need + * to mask the first byte based on that. All subsequent bytes are masked + * against 0x3F. + * + * [1] http://www.joelonsoftware.com/articles/Unicode.html + */ + + if ((c & 0x80) == 0) { + n = 1; + m = 0x7F; + } else if ((c & 0xE0) == 0xC0) { + n = 2; + m = 0x1F; + } else if ((c & 0xF0) == 0xE0) { + n = 3; + m = 0x0F; + } else if ((c & 0xF8) == 0xF0) { + n = 4; + m = 0x07; + } else if ((c & 0xFC) == 0xF8) { + n = 5; + m = 0x03; + } else if ((c & 0xFE) == 0xFC) { + n = 6; + m = 0x01; + } else { + n = 0; + m = 0; + } + + *seq_length = n; + *first_mask = m; +} + +/** + * Taken from libbson. + */ +bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_null) +{ + uint32_t c; + uint8_t first_mask; + uint8_t seq_length; + unsigned i; + unsigned j; + + if (!utf8) { + return false; + } + + for (i = 0; i < utf8_len; i += seq_length) { + _bson_utf8_get_sequence(&utf8[i], &seq_length, &first_mask); + + /* + * Ensure we have a valid multi-byte sequence length. + */ + if (!seq_length) { + return false; + } + + /* + * Ensure we have enough bytes left. + */ + if ((utf8_len - i) < seq_length) { + return false; + } + + /* + * Also calculate the next char as a unichar so we can + * check code ranges for non-shortest form. + */ + c = utf8 [i] & first_mask; + + /* + * Check the high-bits for each additional sequence byte. + */ + for (j = i + 1; j < (i + seq_length); j++) { + c = (c << 6) | (utf8 [j] & 0x3F); + if ((utf8[j] & 0xC0) != 0x80) { + return false; + } + } + + /* + * Check for NULL bytes afterwards. + * + * Hint: if you want to optimize this function, starting here to do + * this in the same pass as the data above would probably be a good + * idea. You would add a branch into the inner loop, but save possibly + * on cache-line bouncing on larger strings. Just a thought. + */ + if (!allow_null) { + for (j = 0; j < seq_length; j++) { + if (((i + j) > utf8_len) || !utf8[i + j]) { + return false; + } + } + } + + /* + * Code point wont fit in utf-16, not allowed. + */ + if (c > 0x0010FFFF) { + return false; + } + + /* + * Byte is in reserved range for UTF-16 high-marks + * for surrogate pairs. + */ + if ((c & 0xFFFFF800) == 0xD800) { + return false; + } + + /* + * Check non-shortest form unicode. + */ + switch (seq_length) { + case 1: + if (c <= 0x007F) { + continue; + } + return false; + + case 2: + if ((c >= 0x0080) && (c <= 0x07FF)) { + continue; + } else if (c == 0) { + /* Two-byte representation for NULL. */ + continue; + } + return false; + + case 3: + if (((c >= 0x0800) && (c <= 0x0FFF)) || + ((c >= 0x1000) && (c <= 0xFFFF))) { + continue; + } + return false; + + case 4: + if (((c >= 0x10000) && (c <= 0x3FFFF)) || + ((c >= 0x40000) && (c <= 0xFFFFF)) || + ((c >= 0x100000) && (c <= 0x10FFFF))) { + continue; + } + return false; + + default: + return false; + } + } + + return true; +} diff --git a/lib/bson.rb b/lib/bson.rb index a7e2e95e8..5f08c3b10 100644 --- a/lib/bson.rb +++ b/lib/bson.rb @@ -63,7 +63,6 @@ def self.ObjectId(string) require "bson/int32" require "bson/int64" require "bson/integer" -require "bson/encodable" require "bson/array" require "bson/binary" require "bson/boolean" diff --git a/lib/bson/byte_buffer.rb b/lib/bson/byte_buffer.rb index 7af7568c3..b6aa3a152 100644 --- a/lib/bson/byte_buffer.rb +++ b/lib/bson/byte_buffer.rb @@ -64,7 +64,8 @@ def put_byte(value) # # @since 4.0.0 def put_cstring(value) - check_for_illegal_characters!(value) + validate_cstring(value) + validate_utf8(value) @buffer << value << NULL_BYTE self end @@ -125,6 +126,7 @@ def put_int64(value) # # @since 4.0.0 def put_string(value) + validate_utf8(value) put_int32(value.bytesize + 1) @buffer << value @buffer << NULL_BYTE @@ -153,9 +155,17 @@ def to_s private - def check_for_illegal_characters!(value) + def validate_cstring(value) if value.include?(NULL_BYTE) - raise(ArgumentError, "Illegal C-String '#{value}' contains a null byte.") + raise ArgumentError, "Illegal CString #{value.inspect} contains a null byte." + end + end + + def validate_utf8(value) + begin + value.unpack("U*") + rescue + raise ArgumentError, "String #{value.inspect} is not valid UTF-8." end end end diff --git a/lib/bson/encodable.rb b/lib/bson/encodable.rb deleted file mode 100644 index bf727552b..000000000 --- a/lib/bson/encodable.rb +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (C) 2009-2014 MongoDB Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -module BSON - - # Defines behaviour around objects that can be encoded. - # - # @since 2.0.0 - module Encodable - - # A 4 byte placeholder that would be replaced by a length at a later point. - # - # @since 2.0.0 - PLACEHOLDER = 0.to_bson.freeze - - # Adjustment value for total number of document bytes. - # - # @since 2.0.0 - BSON_ADJUST = 0.freeze - - # Adjustment value for total number of string bytes. - # - # @since 2.0.0 - STRING_ADJUST = -4.freeze - - # Encodes BSON to raw bytes, for types that require the length of the - # entire bytes to be present as the first word of the encoded string. This - # includes Hash, CodeWithScope. - # - # @example Encode the BSON with placeholder bytes. - # hash.encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded| - # each do |field, value| - # value.to_bson(encoded) - # end - # end - # - # @param [ Integer ] adjust The number of bytes to adjust with. - # @param [ String ] encoded The string to encode. - # - # @return [ String ] The encoded string. - # - # @since 2.0.0 - def encode_with_placeholder_and_null(adjust, encoded = ''.force_encoding(BINARY)) - pos = encoded.bytesize - encoded << PLACEHOLDER - yield(encoded) - encoded << NULL_BYTE - encoded.set_int32(pos, encoded.bytesize - pos + adjust) - encoded - end - - # Encodes binary data with a generic placeholder value to be written later - # once all bytes have been written. - # - # @example Encode the BSON with placeholder bytes. - # string.encode_binary_data_with_placeholder(encoded) do |encoded| - # each do |field, value| - # value.to_bson(encoded) - # end - # end - # - # @param [ String ] encoded The string to encode. - # - # @return [ String ] The encoded string. - # - # @since 2.0.0 - def encode_binary_data_with_placeholder(encoded = ''.force_encoding(BINARY)) - pos = encoded.bytesize - encoded << PLACEHOLDER - yield(encoded) - encoded.set_int32(pos, encoded.bytesize - pos - 5) - encoded - end - end -end diff --git a/lib/bson/string.rb b/lib/bson/string.rb index 0825ace21..899a88eb6 100644 --- a/lib/bson/string.rb +++ b/lib/bson/string.rb @@ -41,7 +41,7 @@ module String # # @since 2.0.0 def to_bson(buffer = ByteBuffer.new) - buffer.put_string(to_bson_string) + buffer.put_string(self) end # Get the string as a BSON key name encoded C string with checking for special characters. @@ -57,7 +57,7 @@ def to_bson(buffer = ByteBuffer.new) # # @since 2.0.0 def to_bson_key - to_bson_string + self end # Convert the string to an object id. This will only work for strings of size @@ -77,27 +77,6 @@ def to_bson_object_id ObjectId.repair(self) end - # Convert the string to a UTF-8 string then force to binary. This is so - # we get errors for strings that are not UTF-8 encoded. - # - # @example Convert to valid BSON string. - # "Straße".to_bson_string - # - # @raise [ EncodingError ] If the string is not UTF-8. - # - # @return [ String ] The binary string. - # - # @since 2.0.0 - def to_bson_string - begin - to_utf8_binary - rescue EncodingError - data = dup.force_encoding(UTF8) - raise unless data.valid_encoding? - data.force_encoding(BINARY) - end - end - # Convert the string to a hexidecimal representation. # # @example Convert the string to hex. @@ -124,12 +103,6 @@ def from_bson_string force_encoding(UTF8) end - private - - def to_utf8_binary - encode(UTF8).force_encoding(BINARY) - end - module ClassMethods # Deserialize a string from BSON. diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index 25a4845fc..f7ace8e8d 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -171,4 +171,96 @@ expect(modified.to_s).to eq("#{exp_first}#{exp_second}") end end + + # describe "#to_bson_string" do + + # context "when the string is valid" do + + # let(:string) do + # "test" + # end + + # let(:encoded) do + # string.to_bson_string + # end + + # it "returns the string" do + # expect(encoded).to eq(string) + # end + + # it_behaves_like "a binary encoded string" + # end + + # context "when the string contains a null byte" do + + # let(:string) do + # "test#{BSON::NULL_BYTE}ing" + # end + + # let(:encoded) do + # string.to_bson_string + # end + + # it "retains the null byte" do + # expect(encoded).to eq(string) + # end + + # it_behaves_like "a binary encoded string" + # end + + # context "when the string contains utf-8 characters" do + + # let(:string) do + # "Straße" + # end + + # let(:encoded) do + # string.to_bson_string.to_s + # end + + # let(:char) do + # "ß".chr.force_encoding(BSON::BINARY) + # end + + # it "returns the encoded string" do + # expect(encoded).to eq("Stra#{char}e") + # end + + # it_behaves_like "a binary encoded string" + # end + + # context "when the string is encoded in non utf-8" do + + # let(:string) do + # "Straße".encode("iso-8859-1") + # end + + # let(:encoded) do + # string.to_bson_string.to_s + # end + + # let(:char) do + # "ß".chr.force_encoding(BSON::BINARY) + # end + + # it "returns the encoded string" do + # expect(encoded).to eq("Stra#{char}e") + # end + + # it_behaves_like "a binary encoded string" + # end + + # context "when the string contains non utf-8 characters" do + + # let(:string) do + # 255.chr + # end + + # it "raises an error" do + # expect { + # string.to_bson_string + # }.to raise_error(EncodingError) + # end + # end + # end end diff --git a/spec/bson/document_spec.rb b/spec/bson/document_spec.rb index 0fef159eb..7b160db5f 100644 --- a/spec/bson/document_spec.rb +++ b/spec/bson/document_spec.rb @@ -812,23 +812,23 @@ it_behaves_like "a document able to handle utf-8" end - context "when non utf-8 values exist" do + context "when utf-8 values exist in wrong encoding" do let(:string) { "gültig" } let(:document) do described_class["type", string.encode("iso-8859-1")] end - it "encodes and decodes the document properly" do - expect( - BSON::Document.from_bson(StringIO.new(document.to_bson.to_s)) - ).to eq({ "type" => string }) + it "raises an exception" do + expect { + document.to_bson + }.to raise_error(ArgumentError) end end context "when binary strings with utf-8 values exist" do - let(:string) { "europäischen" } + let(:string) { "europäisch" } let(:document) do described_class["type", string.encode("binary", "binary")] end diff --git a/spec/bson/string_spec.rb b/spec/bson/string_spec.rb index aa27e4dea..a219b57ae 100644 --- a/spec/bson/string_spec.rb +++ b/spec/bson/string_spec.rb @@ -56,98 +56,6 @@ end end - describe "#to_bson_string" do - - context "when the string is valid" do - - let(:string) do - "test" - end - - let(:encoded) do - string.to_bson_string - end - - it "returns the string" do - expect(encoded).to eq(string) - end - - it_behaves_like "a binary encoded string" - end - - context "when the string contains a null byte" do - - let(:string) do - "test#{BSON::NULL_BYTE}ing" - end - - let(:encoded) do - string.to_bson_string - end - - it "retains the null byte" do - expect(encoded).to eq(string) - end - - it_behaves_like "a binary encoded string" - end - - context "when the string contains utf-8 characters" do - - let(:string) do - "Straße" - end - - let(:encoded) do - string.to_bson_string.to_s - end - - let(:char) do - "ß".chr.force_encoding(BSON::BINARY) - end - - it "returns the encoded string" do - expect(encoded).to eq("Stra#{char}e") - end - - it_behaves_like "a binary encoded string" - end - - context "when the string is encoded in non utf-8" do - - let(:string) do - "Straße".encode("iso-8859-1") - end - - let(:encoded) do - string.to_bson_string.to_s - end - - let(:char) do - "ß".chr.force_encoding(BSON::BINARY) - end - - it "returns the encoded string" do - expect(encoded).to eq("Stra#{char}e") - end - - it_behaves_like "a binary encoded string" - end - - context "when the string contains non utf-8 characters" do - - let(:string) do - 255.chr - end - - it "raises an error" do - expect { - string.to_bson_string - }.to raise_error(EncodingError) - end - end - end - context "when the class is loaded" do let(:registered) do From da7d7143640509cdb64916c61548292c9c130085 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Tue, 22 Sep 2015 15:08:08 +0200 Subject: [PATCH 12/29] RUBY-1019: Adding more buffer deserialization methods --- Rakefile | 6 +- ext/bson/native.c | 124 ++++++++++++++++++++++++++- lib/bson/byte_buffer.rb | 154 ---------------------------------- lib/bson/hash.rb | 6 ++ spec/bson/byte_buffer_spec.rb | 120 +++++++++++++++++++++++++- 5 files changed, 250 insertions(+), 160 deletions(-) diff --git a/Rakefile b/Rakefile index cc33964dd..4427cece2 100644 --- a/Rakefile +++ b/Rakefile @@ -49,7 +49,6 @@ end require_relative "perf/bench" unless jruby? -RSpec::Core::RakeTask.new(:spec) RSpec::Core::RakeTask.new(:rspec) if jruby? @@ -74,8 +73,7 @@ task :clean_all => :clean do end end -task :ext_spec => :compile do - ENV["WITH_EXT"] = "C" +task :spec => :compile do Rake::Task["rspec"].invoke end @@ -112,4 +110,4 @@ namespace :benchmark do end end -task :default => [ :clean_all, :spec, :ext_spec ] +task :default => [ :clean_all, :spec ] diff --git a/ext/bson/native.c b/ext/bson/native.c index 146318b65..1c87157cf 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -18,7 +18,7 @@ #include #include "portable_endian.h" -#define BSON_BYTE_BUFFER_SIZE 512 +#define BSON_BYTE_BUFFER_SIZE 256 typedef struct { size_t size; @@ -41,7 +41,14 @@ typedef struct { { if (buffer_ptr->write_position + length > buffer_ptr->size) rb_bson_expand_buffer(buffer_ptr, length); } static VALUE rb_bson_byte_buffer_allocate(VALUE klass); +static VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self); static VALUE rb_bson_byte_buffer_length(VALUE self); +static VALUE rb_bson_byte_buffer_get_byte(VALUE self); +static VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i); +static VALUE rb_bson_byte_buffer_get_cstring(VALUE self); +static VALUE rb_bson_byte_buffer_get_double(VALUE self); +static VALUE rb_bson_byte_buffer_get_int32(VALUE self); +static VALUE rb_bson_byte_buffer_get_int64(VALUE self); static VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte); static VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes); static VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string); @@ -71,7 +78,14 @@ void Init_native() VALUE rb_byte_buffer_class = rb_define_class_under(rb_bson_module, "ByteBuffer", rb_cObject); rb_define_alloc_func(rb_byte_buffer_class, rb_bson_byte_buffer_allocate); + rb_define_method(rb_byte_buffer_class, "initialize", rb_bson_byte_buffer_initialize, -1); rb_define_method(rb_byte_buffer_class, "length", rb_bson_byte_buffer_length, 0); + rb_define_method(rb_byte_buffer_class, "get_byte", rb_bson_byte_buffer_get_byte, 0); + rb_define_method(rb_byte_buffer_class, "get_bytes", rb_bson_byte_buffer_get_bytes, 1); + rb_define_method(rb_byte_buffer_class, "get_cstring", rb_bson_byte_buffer_get_cstring, 0); + rb_define_method(rb_byte_buffer_class, "get_double", rb_bson_byte_buffer_get_double, 0); + rb_define_method(rb_byte_buffer_class, "get_int32", rb_bson_byte_buffer_get_int32, 0); + rb_define_method(rb_byte_buffer_class, "get_int64", rb_bson_byte_buffer_get_int64, 0); rb_define_method(rb_byte_buffer_class, "put_byte", rb_bson_byte_buffer_put_byte, 1); rb_define_method(rb_byte_buffer_class, "put_bytes", rb_bson_byte_buffer_put_bytes, 1); rb_define_method(rb_byte_buffer_class, "put_cstring", rb_bson_byte_buffer_put_cstring, 1); @@ -95,6 +109,21 @@ VALUE rb_bson_byte_buffer_allocate(VALUE klass) return obj; } +/** + * Initialize a byte buffer. + */ +VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self) +{ + VALUE bytes; + rb_scan_args(argc, argv, "01", &bytes); + + if (!NIL_P(bytes)) { + rb_bson_byte_buffer_put_bytes(self, bytes); + } + + return self; +} + /** * Get the length of the buffer. */ @@ -105,6 +134,99 @@ VALUE rb_bson_byte_buffer_length(VALUE self) return UINT2NUM(READ_SIZE(b)); } +/** + * Get a single byte from the buffer. + */ +VALUE rb_bson_byte_buffer_get_byte(VALUE self) +{ + byte_buffer_t *b; + VALUE byte; + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + /* ENSURE_BSON_READ(b, 1); */ + byte = rb_str_new(READ_PTR(b), 1); + b->read_position += 1; + return byte; +} + +/** + * Get bytes from the buffer. + */ +VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i) +{ + byte_buffer_t *b; + VALUE bytes; + const long length = FIX2LONG(i); + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + /* ENSURE_BSON_READ(b, length); */ + bytes = rb_str_new(READ_PTR(b), length); + b->read_position += length; + return bytes; +} + +/** + * Get a cstring from the buffer. + */ +VALUE rb_bson_byte_buffer_get_cstring(VALUE self) +{ + byte_buffer_t *b; + VALUE string; + int length; + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + length = (int)strlen(READ_PTR(b) + b->read_position); + /* ENSURE_BSON_READ(b, 1); */ + string = rb_str_new(READ_PTR(b), length); + b->read_position += length; + return string; +} + +/** + * Get a double from the buffer. + */ +VALUE rb_bson_byte_buffer_get_double(VALUE self) +{ + byte_buffer_t *b; + union { uint64_t i64; double d; } ucast; + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + /* ENSURE_BSON_READ(b, 8); */ + ucast.i64 = le64toh(*(uint64_t*)READ_PTR(b)); + b->read_position += 8; + return DBL2NUM(ucast.d); +} + +/** + * Get a int32 from the buffer. + */ +VALUE rb_bson_byte_buffer_get_int32(VALUE self) +{ + byte_buffer_t *b; + uint32_t i32; + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + /* ENSURE_BSON_READ(b, 4); */ + i32 = le32toh(*((uint32_t*)READ_PTR(b))); + b->read_position += 4; + return UINT2NUM(i32); +} + +/** + * Get a int64 from the buffer. + */ +VALUE rb_bson_byte_buffer_get_int64(VALUE self) +{ + byte_buffer_t *b; + uint64_t i64; + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + /* ENSURE_BSON_READ(b, 8); */ + i64 = le64toh(*((uint64_t*)READ_PTR(b))); + b->read_position += 8; + return ULONG2NUM(i64); +} + /** * Writes a byte to the byte buffer. */ diff --git a/lib/bson/byte_buffer.rb b/lib/bson/byte_buffer.rb index b6aa3a152..a8c675bc3 100644 --- a/lib/bson/byte_buffer.rb +++ b/lib/bson/byte_buffer.rb @@ -14,159 +14,5 @@ module BSON class ByteBuffer - - # Initialize the pure ruby byte buffer. - # - # @example Create the buffer. - # BSON::ByteBuffer.new - # - # @since 4.0.0 - def initialize - @buffer = "".force_encoding(BINARY) - end - - # Get the length of the buffer. - # - # @example Get the length of the buffer. - # buffer.length - # - # @return [ Integer ] The buffer length. - # - # @since 4.0.0 - def length - @buffer.bytesize - end - - # Put a single byte on the end of the buffer. - # - # @example Put a single byte on the buffer. - # buffer.put_byte(4) - # - # @param [ Integer ] value The byte to append. - # - # @return [ ByteBuffer ] The modified buffer. - # - # @since 4.0.0 - def put_byte(value) - @buffer << value - self - end - alias :put_bytes :put_byte - - # Put a null termintated c string on the end of the buffer. - # - # @example Put a cstring. - # buffer.put_cstring('test') - # - # @param [ String ] value The string. - # - # @return [ ByteBuffer ] The modified buffer. - # - # @since 4.0.0 - def put_cstring(value) - validate_cstring(value) - validate_utf8(value) - @buffer << value << NULL_BYTE - self - end - - # Put a 64 bit double on the buffer. - # - # @example Put a double. - # buffer.put_double(213.11231) - # - # @param [ Float ] value The float to convert. - # - # @return [ ByteBuffer ] The modified buffer. - # - # @since 4.0.0 - def put_double(value) - @buffer << [ value ].pack(Float::PACK) - self - end - - # Put a 32 bit integer on the end of the buffer. - # - # @example Put a 32 bit integer on the buffer. - # buffer.put_int32(4) - # - # @param [ Integer ] value The integer. - # - # @return [ ByteBuffer ] The modified buffer. - # - # @since 4.0.0 - def put_int32(value) - @buffer << [ value ].pack(Int32::PACK) - self - end - - # Put a 64 bit integer on the end of the buffer. - # - # @example Put a 64 bit integer on the buffer. - # buffer.put_int64(4) - # - # @param [ Integer ] value The integer. - # - # @return [ ByteBuffer ] The modified buffer. - # - # @since 4.0.0 - def put_int64(value) - @buffer << [ value ].pack(Int64::PACK) - self - end - - # Put a string on the end of the buffer. - # - # @example Put a string on the buffer. - # buffer.put_string('test') - # - # @param [ String ] value The value to append. - # - # @return [ ByteBuffer ] The modified buffer. - # - # @since 4.0.0 - def put_string(value) - validate_utf8(value) - put_int32(value.bytesize + 1) - @buffer << value - @buffer << NULL_BYTE - self - end - - # Replace an int 32 at the specified location in the buffer. - # - # @example Replace an int 32. - # buffer.replace_int32(4, 32) - # - # @param [ Integer ] index The index to replace at. - # @param [ Integer ] value The new value. - # - # @return [ ByteBuffer ] The modified buffer. - # - # @since 4.0.0 - def replace_int32(location, value) - @buffer[location, Int32::BYTES_LENGTH] = [ value ].pack(Int32::PACK) - self - end - - def to_s - @buffer - end - - private - - def validate_cstring(value) - if value.include?(NULL_BYTE) - raise ArgumentError, "Illegal CString #{value.inspect} contains a null byte." - end - end - - def validate_utf8(value) - begin - value.unpack("U*") - rescue - raise ArgumentError, "String #{value.inspect} is not valid UTF-8." - end - end end end diff --git a/lib/bson/hash.rb b/lib/bson/hash.rb index 834fb581b..e1de55351 100644 --- a/lib/bson/hash.rb +++ b/lib/bson/hash.rb @@ -74,7 +74,13 @@ module ClassMethods # @since 2.0.0 def from_bson(bson) hash = Document.allocate + # size = buffer.get_int32 bson.read(4) # Swallow the first four bytes. + # while (type = buffer.get_byte) != NULL_BYTE + # field = buffer.get_cstring + # hash.store(field, BSON::Registry.get(type).from_bson(buffer)) + # end + # hash while (type = bson.readbyte.chr) != NULL_BYTE field = bson.gets(NULL_BYTE).from_bson_string.chop! hash.store(field, BSON::Registry.get(type).from_bson(bson)) diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index f7ace8e8d..e2351d6a9 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -13,6 +13,124 @@ end end + describe '#get_byte' do + + let(:buffer) do + described_class.new(BSON::Int32::BSON_TYPE) + end + + let(:byte) do + buffer.get_byte + end + + it 'gets the byte from the buffer' do + expect(byte).to eq(BSON::Int32::BSON_TYPE) + end + + it 'increments the position by 1' do + + end + end + + describe '#get_bytes' do + + let(:string) do + "#{BSON::Int32::BSON_TYPE}#{BSON::Int32::BSON_TYPE}" + end + + let(:buffer) do + described_class.new(string) + end + + let(:bytes) do + buffer.get_bytes(2) + end + + it 'gets the bytes from the buffer' do + expect(bytes).to eq(string) + end + + it 'increments the position by the length' do + + end + end + + describe '#get_cstring' do + + let(:buffer) do + described_class.new("testing#{BSON::NULL_BYTE}") + end + + let(:string) do + buffer.get_cstring + end + + it 'gets the cstring from the buffer' do + expect(string).to eq("testing") + end + + it 'increments the position by string length + 1' do + + end + end + + describe '#get_double' do + + let(:buffer) do + described_class.new("#{12.5.to_bson.to_s}") + end + + let(:double) do + buffer.get_double + end + + it 'gets the double from the buffer' do + expect(double).to eq(12.5) + end + + it 'increments the position by 8' do + + end + end + + describe '#get_int32' do + + let(:buffer) do + described_class.new("#{12.to_bson.to_s}") + end + + let(:int32) do + buffer.get_int32 + end + + it 'gets the int32 from the buffer' do + expect(int32).to eq(12) + end + + it 'increments the position by 4' do + + end + end + + describe '#get_int64' do + + let(:buffer) do + described_class.new("#{(Integer::MAX_64BIT - 1).to_bson.to_s}") + end + + let(:int64) do + buffer.get_int64 + end + + it 'gets the int64 from the buffer' do + expect(int64).to eq(Integer::MAX_64BIT - 1) + end + + it 'increments the position by 8' do + + end + end + describe '#length' do let(:buffer) do @@ -38,7 +156,7 @@ buffer.put_byte(BSON::Int32::BSON_TYPE) end - it 'appends the int32 to the byte buffer' do + it 'appends the byte to the byte buffer' do expect(modified.to_s).to eq(BSON::Int32::BSON_TYPE.chr) end end From 4a7a248b211f78fd43b9d8a593b2777cb369d43d Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Tue, 22 Sep 2015 16:47:26 +0200 Subject: [PATCH 13/29] RUBY-1019: Read strings --- ext/bson/native.c | 27 ++++++++++++++++++++++++++- spec/bson/byte_buffer_spec.rb | 19 +++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/ext/bson/native.c b/ext/bson/native.c index 1c87157cf..49cf0d14f 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -49,6 +49,7 @@ static VALUE rb_bson_byte_buffer_get_cstring(VALUE self); static VALUE rb_bson_byte_buffer_get_double(VALUE self); static VALUE rb_bson_byte_buffer_get_int32(VALUE self); static VALUE rb_bson_byte_buffer_get_int64(VALUE self); +static VALUE rb_bson_byte_buffer_get_string(VALUE self); static VALUE rb_bson_byte_buffer_put_byte(VALUE self, VALUE byte); static VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes); static VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string); @@ -86,6 +87,7 @@ void Init_native() rb_define_method(rb_byte_buffer_class, "get_double", rb_bson_byte_buffer_get_double, 0); rb_define_method(rb_byte_buffer_class, "get_int32", rb_bson_byte_buffer_get_int32, 0); rb_define_method(rb_byte_buffer_class, "get_int64", rb_bson_byte_buffer_get_int64, 0); + rb_define_method(rb_byte_buffer_class, "get_string", rb_bson_byte_buffer_get_string, 0); rb_define_method(rb_byte_buffer_class, "put_byte", rb_bson_byte_buffer_put_byte, 1); rb_define_method(rb_byte_buffer_class, "put_bytes", rb_bson_byte_buffer_put_bytes, 1); rb_define_method(rb_byte_buffer_class, "put_cstring", rb_bson_byte_buffer_put_cstring, 1); @@ -176,7 +178,7 @@ VALUE rb_bson_byte_buffer_get_cstring(VALUE self) TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); length = (int)strlen(READ_PTR(b) + b->read_position); - /* ENSURE_BSON_READ(b, 1); */ + /* ENSURE_BSON_READ(b, length); */ string = rb_str_new(READ_PTR(b), length); b->read_position += length; return string; @@ -227,6 +229,29 @@ VALUE rb_bson_byte_buffer_get_int64(VALUE self) return ULONG2NUM(i64); } +/** + * Get a string from the buffer. + */ +VALUE rb_bson_byte_buffer_get_string(VALUE self) +{ + byte_buffer_t *b; + uint32_t length; + VALUE string; + + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + /* ENSURE_BSON_READ(b, 4); */ + length = le32toh(*((uint32_t*)READ_PTR(b))); + b->read_position += 4; + /* ENSURE_BSON_READ(b, length); */ + string = rb_str_new(READ_PTR(b), length); + // Associate UTF-8 + b->read_position += length; + /* ENSURE_BSON_READ(b, 1); */ + rb_str_new(READ_PTR(b), 1); + b->read_position += 1; + return string; +} + /** * Writes a byte to the byte buffer. */ diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index e2351d6a9..a8824ed97 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -131,6 +131,25 @@ end end + describe '#get_string' do + + let(:buffer) do + described_class.new("#{7.to_bson.to_s}testing#{BSON::NULL_BYTE}") + end + + let(:string) do + buffer.get_string + end + + it 'gets the string from the buffer' do + expect(string).to eq("testing") + end + + it 'increments the position by string length + 5' do + + end + end + describe '#length' do let(:buffer) do From de256a875681f5907db769f12df722348052a1b8 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Wed, 23 Sep 2015 10:34:01 +0200 Subject: [PATCH 14/29] RUBY-1019: Deserialize all objects in MRI --- ext/bson/native.c | 40 ++++++++++++++++++------------- lib/bson/array.rb | 12 +++++----- lib/bson/binary.rb | 12 +++++----- lib/bson/boolean.rb | 6 ++--- lib/bson/code.rb | 6 ++--- lib/bson/code_with_scope.rb | 8 +++---- lib/bson/float.rb | 12 +++------- lib/bson/hash.rb | 18 +++++--------- lib/bson/int32.rb | 12 +++------- lib/bson/int64.rb | 12 +++------- lib/bson/nil_class.rb | 4 ++-- lib/bson/object_id.rb | 6 ++--- lib/bson/regexp.rb | 6 ++--- lib/bson/specialized.rb | 8 +++---- lib/bson/string.rb | 20 +++------------- lib/bson/symbol.rb | 7 +++--- lib/bson/time.rb | 6 ++--- lib/bson/timestamp.rb | 14 ++++++----- spec/bson/byte_buffer_spec.rb | 22 ++++++++--------- spec/bson/code_with_scope_spec.rb | 6 +---- spec/bson/document_spec.rb | 8 +++---- spec/bson/int32_spec.rb | 5 ++-- spec/bson/regexp_spec.rb | 2 +- spec/support/shared_examples.rb | 23 ++---------------- 24 files changed, 113 insertions(+), 162 deletions(-) diff --git a/ext/bson/native.c b/ext/bson/native.c index 49cf0d14f..f24e5e1e3 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -57,6 +57,7 @@ static VALUE rb_bson_byte_buffer_put_double(VALUE self, VALUE f); static VALUE rb_bson_byte_buffer_put_int32(VALUE self, VALUE i); static VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i); static VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string); +static VALUE rb_bson_byte_buffer_read_position(VALUE self); static VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i); static VALUE rb_bson_byte_buffer_to_s(VALUE self); @@ -95,6 +96,7 @@ void Init_native() rb_define_method(rb_byte_buffer_class, "put_int32", rb_bson_byte_buffer_put_int32, 1); rb_define_method(rb_byte_buffer_class, "put_int64", rb_bson_byte_buffer_put_int64, 1); rb_define_method(rb_byte_buffer_class, "put_string", rb_bson_byte_buffer_put_string, 1); + rb_define_method(rb_byte_buffer_class, "read_position", rb_bson_byte_buffer_read_position, 0); rb_define_method(rb_byte_buffer_class, "replace_int32", rb_bson_byte_buffer_replace_int32, 2); rb_define_method(rb_byte_buffer_class, "to_s", rb_bson_byte_buffer_to_s, 0); } @@ -177,10 +179,10 @@ VALUE rb_bson_byte_buffer_get_cstring(VALUE self) int length; TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); - length = (int)strlen(READ_PTR(b) + b->read_position); + length = (int)strlen(READ_PTR(b)); /* ENSURE_BSON_READ(b, length); */ - string = rb_str_new(READ_PTR(b), length); - b->read_position += length; + string = rb_enc_str_new(READ_PTR(b), length, rb_utf8_encoding()); + b->read_position += length + 1; return string; } @@ -205,13 +207,13 @@ VALUE rb_bson_byte_buffer_get_double(VALUE self) VALUE rb_bson_byte_buffer_get_int32(VALUE self) { byte_buffer_t *b; - uint32_t i32; + int32_t i32; TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); /* ENSURE_BSON_READ(b, 4); */ - i32 = le32toh(*((uint32_t*)READ_PTR(b))); + i32 = le32toh(*((int32_t*)READ_PTR(b))); b->read_position += 4; - return UINT2NUM(i32); + return INT2NUM(i32); } /** @@ -220,13 +222,13 @@ VALUE rb_bson_byte_buffer_get_int32(VALUE self) VALUE rb_bson_byte_buffer_get_int64(VALUE self) { byte_buffer_t *b; - uint64_t i64; + int64_t i64; TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); /* ENSURE_BSON_READ(b, 8); */ - i64 = le64toh(*((uint64_t*)READ_PTR(b))); + i64 = le64toh(*((int64_t*)READ_PTR(b))); b->read_position += 8; - return ULONG2NUM(i64); + return LONG2NUM(i64); } /** @@ -235,20 +237,16 @@ VALUE rb_bson_byte_buffer_get_int64(VALUE self) VALUE rb_bson_byte_buffer_get_string(VALUE self) { byte_buffer_t *b; - uint32_t length; + int32_t length; VALUE string; TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); /* ENSURE_BSON_READ(b, 4); */ - length = le32toh(*((uint32_t*)READ_PTR(b))); + length = le32toh(*((int32_t*)READ_PTR(b))); b->read_position += 4; /* ENSURE_BSON_READ(b, length); */ - string = rb_str_new(READ_PTR(b), length); - // Associate UTF-8 + string = rb_enc_str_new(READ_PTR(b), length - 1, rb_utf8_encoding()); b->read_position += length; - /* ENSURE_BSON_READ(b, 1); */ - rb_str_new(READ_PTR(b), 1); - b->read_position += 1; return string; } @@ -380,6 +378,16 @@ VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string) return self; } +/** + * Get the read position. + */ +VALUE rb_bson_byte_buffer_read_position(VALUE self) +{ + byte_buffer_t *b; + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + return INT2NUM(b->read_position); +} + /** * Replace a 32 bit integer int the byte buffer. */ diff --git a/lib/bson/array.rb b/lib/bson/array.rb index eb46ba96d..4f0a82cd2 100644 --- a/lib/bson/array.rb +++ b/lib/bson/array.rb @@ -85,19 +85,19 @@ module ClassMethods # Deserialize the array from BSON. # - # @param [ BSON ] bson The bson representing an array. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Array ] The decoded array. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def from_bson(bson) + def from_bson(buffer) array = new - bson.read(4) # throw away the length - while (type = bson.readbyte.chr) != NULL_BYTE - bson.gets(NULL_BYTE) - array << BSON::Registry.get(type).from_bson(bson) + buffer.get_int32 # throw away the length + while (type = buffer.get_byte) != NULL_BYTE + buffer.get_cstring + array << BSON::Registry.get(type).from_bson(buffer) end array end diff --git a/lib/bson/binary.rb b/lib/bson/binary.rb index 9eba976d5..4af77023a 100644 --- a/lib/bson/binary.rb +++ b/lib/bson/binary.rb @@ -140,18 +140,18 @@ def to_bson(buffer = ByteBuffer.new) # Deserialize the binary data from BSON. # - # @param [ BSON ] bson The bson representing binary data. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Binary ] The decoded binary data. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def self.from_bson(bson) - length = Int32.from_bson(bson) - type = TYPES[bson.read(1)] - length = Int32.from_bson(bson) if type == :old - data = bson.read(length) + def self.from_bson(buffer) + length = buffer.get_int32 + type = TYPES[buffer.get_byte] + length = buffer.get_int32 if type == :old + data = buffer.get_bytes(length) new(data, type) end diff --git a/lib/bson/boolean.rb b/lib/bson/boolean.rb index 0f85562f3..5576cc520 100644 --- a/lib/bson/boolean.rb +++ b/lib/bson/boolean.rb @@ -29,15 +29,15 @@ class Boolean # Deserialize a boolean from BSON. # - # @param [ BSON ] bson The encoded boolean. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ TrueClass, FalseClass ] The decoded boolean. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def self.from_bson(bson) - bson.readbyte.chr == TrueClass::TRUE_BYTE + def self.from_bson(buffer) + buffer.get_byte == TrueClass::TRUE_BYTE end # Register this type when the module is loaded. diff --git a/lib/bson/code.rb b/lib/bson/code.rb index 8cc556f81..b188f9c2c 100644 --- a/lib/bson/code.rb +++ b/lib/bson/code.rb @@ -87,15 +87,15 @@ def to_bson(buffer = ByteBuffer.new) # Deserialize code from BSON. # - # @param [ BSON ] bson The encoded code. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ TrueClass, FalseClass ] The decoded code. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def self.from_bson(bson) - new(bson.read(Int32.from_bson(bson)).from_bson_string.chop!) + def self.from_bson(buffer) + new(buffer.get_string) end # Register this type when the module is loaded. diff --git a/lib/bson/code_with_scope.rb b/lib/bson/code_with_scope.rb index ee1eaf66c..3abb61939 100644 --- a/lib/bson/code_with_scope.rb +++ b/lib/bson/code_with_scope.rb @@ -97,16 +97,16 @@ def to_bson(buffer = ByteBuffer.new) # Deserialize a code with scope from BSON. # - # @param [ BSON ] bson The encoded code with scope. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ TrueClass, FalseClass ] The decoded code with scope. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def self.from_bson(bson) - bson.read(4) # Throw away the total length. - new(bson.read(Int32.from_bson(bson)).from_bson_string.chop!, ::Hash.from_bson(bson)) + def self.from_bson(buffer) + buffer.get_int32 # Throw away the total length. + new(buffer.get_string, ::Hash.from_bson(buffer)) end # Register this type when the module is loaded. diff --git a/lib/bson/float.rb b/lib/bson/float.rb index b0073e470..fa1b70e89 100644 --- a/lib/bson/float.rb +++ b/lib/bson/float.rb @@ -50,21 +50,15 @@ module ClassMethods # Deserialize an instance of a Float from a BSON double. # - # @param [ BSON ] bson The encoded double. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Float ] The decoded Float. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def from_bson(bson) - from_bson_double(bson.read(8)) - end - - private - - def from_bson_double(double) - double.unpack(PACK).first + def from_bson(buffer) + buffer.get_double end end diff --git a/lib/bson/hash.rb b/lib/bson/hash.rb index e1de55351..088bc22c5 100644 --- a/lib/bson/hash.rb +++ b/lib/bson/hash.rb @@ -65,25 +65,19 @@ module ClassMethods # Deserialize the hash from BSON. # - # @param [ IO ] bson The bson representing a hash. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Array ] The decoded hash. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def from_bson(bson) + def from_bson(buffer) hash = Document.allocate - # size = buffer.get_int32 - bson.read(4) # Swallow the first four bytes. - # while (type = buffer.get_byte) != NULL_BYTE - # field = buffer.get_cstring - # hash.store(field, BSON::Registry.get(type).from_bson(buffer)) - # end - # hash - while (type = bson.readbyte.chr) != NULL_BYTE - field = bson.gets(NULL_BYTE).from_bson_string.chop! - hash.store(field, BSON::Registry.get(type).from_bson(bson)) + buffer.get_int32 # Throw away the size - todo: just move read position? + while (type = buffer.get_byte) != NULL_BYTE + field = buffer.get_cstring + hash.store(field, BSON::Registry.get(type).from_bson(buffer)) end hash end diff --git a/lib/bson/int32.rb b/lib/bson/int32.rb index d0a8d66c7..27a68ec5d 100644 --- a/lib/bson/int32.rb +++ b/lib/bson/int32.rb @@ -39,21 +39,15 @@ class Int32 # Deserialize an Integer from BSON. # - # @param [ BSON ] bson The encoded int32. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Integer ] The decoded Integer. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def self.from_bson(bson) - from_bson_int32(bson.read(4)) - end - - private - - def self.from_bson_int32(bytes) - bytes.unpack(PACK).first + def self.from_bson(buffer) + buffer.get_int32 end # Register this type when the module is loaded. diff --git a/lib/bson/int64.rb b/lib/bson/int64.rb index cbb89f5ac..012a136ae 100644 --- a/lib/bson/int64.rb +++ b/lib/bson/int64.rb @@ -34,21 +34,15 @@ class Int64 # Deserialize an Integer from BSON. # - # @param [ BSON ] bson The encoded int64. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Integer ] The decoded Integer. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def self.from_bson(bson) - from_bson_int64(bson.read(8)) - end - - private - - def self.from_bson_int64(bytes) - bytes.unpack(PACK).first + def self.from_bson(buffer) + buffer.get_int64 end # Register this type when the module is loaded. diff --git a/lib/bson/nil_class.rb b/lib/bson/nil_class.rb index 0821ec422..afb806ce9 100644 --- a/lib/bson/nil_class.rb +++ b/lib/bson/nil_class.rb @@ -32,14 +32,14 @@ module ClassMethods # Deserialize NilClass from BSON. # - # @param [ BSON ] bson The encoded Null value. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ nil ] The decoded nil value. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def from_bson(bson) + def from_bson(buffer) nil end end diff --git a/lib/bson/object_id.rb b/lib/bson/object_id.rb index 4b3248166..8e70b93d2 100644 --- a/lib/bson/object_id.rb +++ b/lib/bson/object_id.rb @@ -206,13 +206,13 @@ class << self # @example Get the object id from BSON. # ObjectId.from_bson(bson) # - # @param [ String ] bson The raw BSON bytes. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ BSON::ObjectId ] The object id. # # @since 2.0.0 - def from_bson(bson) - from_data(bson.read(12)) + def from_bson(buffer) + from_data(buffer.get_bytes(12)) end # Create a new object id from raw bytes. diff --git a/lib/bson/regexp.rb b/lib/bson/regexp.rb index e28fa3bb7..93da0ad5c 100644 --- a/lib/bson/regexp.rb +++ b/lib/bson/regexp.rb @@ -168,15 +168,15 @@ module ClassMethods # Deserialize the regular expression from BSON. # - # @param [ BSON ] bson The bson representing a regular expression. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Regexp ] The decoded regular expression. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def from_bson(bson) - pattern = bson.gets(NULL_BYTE).from_bson_string.chop! + def from_bson(buffer) + pattern = buffer.get_cstring options = 0 while (option = bson.readbyte.chr) != NULL_BYTE case option diff --git a/lib/bson/specialized.rb b/lib/bson/specialized.rb index d783c3994..03df6a2df 100644 --- a/lib/bson/specialized.rb +++ b/lib/bson/specialized.rb @@ -57,16 +57,16 @@ def self.included(klass) module ClassMethods - # Deserialize MinKey from BSON. + # Deserialize from BSON. # - # @param [ BSON ] bson The encoded MinKey. + # @param [ ByteBuffer ] buffer The byte buffer. # - # @return [ MinKey ] The decoded MinKey. + # @return [ Specialized ] The decoded specialized class. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def from_bson(bson) + def from_bson(buffer) new end end diff --git a/lib/bson/string.rb b/lib/bson/string.rb index 899a88eb6..587b8fc45 100644 --- a/lib/bson/string.rb +++ b/lib/bson/string.rb @@ -89,33 +89,19 @@ def to_hex_string unpack("H*")[0] end - # Take the binary string and return a UTF-8 encoded string. - # - # @example Convert from a BSON string. - # "\x00".from_bson_string - # - # @raise [ EncodingError ] If the string is not UTF-8. - # - # @return [ String ] The UTF-8 string. - # - # @since 2.0.0 - def from_bson_string - force_encoding(UTF8) - end - module ClassMethods # Deserialize a string from BSON. # - # @param [ BSON ] bson The bson representing a string. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Regexp ] The decoded string. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def from_bson(bson) - bson.read(Int32.from_bson(bson)).from_bson_string.chop! + def from_bson(buffer) + buffer.get_string end end diff --git a/lib/bson/symbol.rb b/lib/bson/symbol.rb index 4d4268173..c7c9851a2 100644 --- a/lib/bson/symbol.rb +++ b/lib/bson/symbol.rb @@ -71,17 +71,18 @@ def to_bson_normalized_key end module ClassMethods + # Deserialize a symbol from BSON. # - # @param [ BSON ] bson The bson representing a symbol. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Regexp ] The decoded symbol. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def from_bson(bson) - bson.read(Int32.from_bson(bson)).from_bson_string.chop!.intern + def from_bson(buffer) + buffer.get_string.intern end end diff --git a/lib/bson/time.rb b/lib/bson/time.rb index 2982258bf..076b0b49c 100644 --- a/lib/bson/time.rb +++ b/lib/bson/time.rb @@ -45,15 +45,15 @@ module ClassMethods # Deserialize UTC datetime from BSON. # - # @param [ BSON ] bson The bson representing UTC datetime. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Time ] The decoded UTC datetime. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def from_bson(bson) - seconds, fragment = Int64.from_bson(bson).divmod(1000) + def from_bson(buffer) + seconds, fragment = Int64.from_bson(buffer).divmod(1000) at(seconds, fragment * 1000).utc end end diff --git a/lib/bson/timestamp.rb b/lib/bson/timestamp.rb index 826611d21..1fdda8896 100644 --- a/lib/bson/timestamp.rb +++ b/lib/bson/timestamp.rb @@ -87,22 +87,24 @@ def initialize(seconds, increment) # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def to_bson(encoded = ''.force_encoding(BINARY)) - increment.to_bson_int32(encoded) - seconds.to_bson_int32(encoded) + def to_bson(buffer = ByteBuffer.new) + buffer.put_int32(increment) + buffer.put_int32(seconds) end # Deserialize timestamp from BSON. # - # @param [ BSON ] bson The bson representing a timestamp. + # @param [ ByteBuffer ] buffer The byte buffer. # # @return [ Timestamp ] The decoded timestamp. # # @see http://bsonspec.org/#/specification # # @since 2.0.0 - def self.from_bson(bson) - new(*bson.read(8).unpack(Int32::PACK * 2).reverse) + def self.from_bson(buffer) + increment = buffer.get_int32 + seconds = buffer.get_int32 + new(seconds, increment) end # Register this type when the module is loaded. diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index a8824ed97..3ed4e0aae 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -61,7 +61,7 @@ described_class.new("testing#{BSON::NULL_BYTE}") end - let(:string) do + let!(:string) do buffer.get_cstring end @@ -70,7 +70,7 @@ end it 'increments the position by string length + 1' do - + expect(buffer.read_position).to eq(8) end end @@ -80,7 +80,7 @@ described_class.new("#{12.5.to_bson.to_s}") end - let(:double) do + let!(:double) do buffer.get_double end @@ -89,7 +89,7 @@ end it 'increments the position by 8' do - + expect(buffer.read_position).to eq(8) end end @@ -99,7 +99,7 @@ described_class.new("#{12.to_bson.to_s}") end - let(:int32) do + let!(:int32) do buffer.get_int32 end @@ -108,7 +108,7 @@ end it 'increments the position by 4' do - + expect(buffer.read_position).to eq(4) end end @@ -118,7 +118,7 @@ described_class.new("#{(Integer::MAX_64BIT - 1).to_bson.to_s}") end - let(:int64) do + let!(:int64) do buffer.get_int64 end @@ -127,17 +127,17 @@ end it 'increments the position by 8' do - + expect(buffer.read_position).to eq(8) end end describe '#get_string' do let(:buffer) do - described_class.new("#{7.to_bson.to_s}testing#{BSON::NULL_BYTE}") + described_class.new("#{8.to_bson.to_s}testing#{BSON::NULL_BYTE}") end - let(:string) do + let!(:string) do buffer.get_string end @@ -146,7 +146,7 @@ end it 'increments the position by string length + 5' do - + expect(buffer.read_position).to eq(12) end end diff --git a/spec/bson/code_with_scope_spec.rb b/spec/bson/code_with_scope_spec.rb index 250c1f23d..e4d535419 100644 --- a/spec/bson/code_with_scope_spec.rb +++ b/spec/bson/code_with_scope_spec.rb @@ -79,7 +79,7 @@ { "name" => "test" } end let(:obj) { described_class.new(code, scope) } - let(:bson) { StringIO.new(obj.to_bson.to_s) } + let(:bson) { BSON::ByteBuffer.new(obj.to_bson.to_s) } let!(:deserialized) { described_class.from_bson(bson) } it "deserializes the javascript" do @@ -89,9 +89,5 @@ it "deserializes the scope" do expect(deserialized.scope).to eq(scope) end - - it "does not leave any extra bytes" do - expect(bson.read(1)).to be_nil - end end end diff --git a/spec/bson/document_spec.rb b/spec/bson/document_spec.rb index 7b160db5f..021e158de 100644 --- a/spec/bson/document_spec.rb +++ b/spec/bson/document_spec.rb @@ -657,7 +657,7 @@ end let(:deserialized) do - described_class.from_bson(StringIO.new(serialized)) + described_class.from_bson(BSON::ByteBuffer.new(serialized)) end it 'deserializes the documents' do @@ -723,7 +723,7 @@ it_behaves_like "a deserializable bson element" let(:raw) do - StringIO.new(bson) + BSON::ByteBuffer.new(bson) end it "returns an instance of a BSON::Document" do @@ -768,7 +768,7 @@ end let(:deserialized) do - described_class.from_bson(StringIO.new(document.to_bson.to_s)) + described_class.from_bson(BSON::ByteBuffer.new(document.to_bson.to_s)) end it "serializes and deserializes properly" do @@ -835,7 +835,7 @@ it "encodes and decodes the document properly" do expect( - BSON::Document.from_bson(StringIO.new(document.to_bson.to_s)) + BSON::Document.from_bson(BSON::ByteBuffer.new(document.to_bson.to_s)) ).to eq({ "type" => string }) end end diff --git a/spec/bson/int32_spec.rb b/spec/bson/int32_spec.rb index a90fadc9e..361d46405 100644 --- a/spec/bson/int32_spec.rb +++ b/spec/bson/int32_spec.rb @@ -27,10 +27,11 @@ end describe "when the integer is negative" do + let(:decoded) { -1 } - let(:encoded) {StringIO.new([ -1 ].pack(BSON::Int32::PACK))} + let(:encoded) { BSON::ByteBuffer.new([ -1 ].pack(BSON::Int32::PACK)) } let(:decoded_2) { -50 } - let(:encoded_2) {StringIO.new([ -50 ].pack(BSON::Int32::PACK))} + let(:encoded_2) { BSON::ByteBuffer.new([ -50 ].pack(BSON::Int32::PACK)) } it "decodes a -1 correctly" do expect(BSON::Int32.from_bson(encoded)).to eq(decoded) diff --git a/spec/bson/regexp_spec.rb b/spec/bson/regexp_spec.rb index d60840459..43d2b3f75 100644 --- a/spec/bson/regexp_spec.rb +++ b/spec/bson/regexp_spec.rb @@ -37,7 +37,7 @@ let(:obj) { /test/ } let(:io) do - StringIO.new(bson) + BSON::ByteBuffer.new(bson) end let(:regex) do diff --git a/spec/support/shared_examples.rb b/spec/support/shared_examples.rb index 014055cc7..81538c2a9 100644 --- a/spec/support/shared_examples.rb +++ b/spec/support/shared_examples.rb @@ -38,10 +38,6 @@ shared_examples_for "a serializable bson element" do - let(:previous_content) do - 'previous_content'.force_encoding(BSON::BINARY) - end - it "serializes to bson" do expect(obj.to_bson.to_s).to eq(bson) end @@ -50,7 +46,7 @@ shared_examples_for "a deserializable bson element" do let(:io) do - StringIO.new(bson) + BSON::ByteBuffer.new(bson) end let(:result) do @@ -60,21 +56,6 @@ it "deserializes from bson" do expect(result).to eq(obj) end - - context 'when io#readbyte returns a String' do - - let(:io) do - AlternateIO.new(bson) - end - - let(:result) do - described_class.from_bson(io) - end - - it "deserializes from bson" do - expect(result).to eq(obj) - end - end end shared_examples_for "a JSON serializable object" do @@ -104,7 +85,7 @@ it "serializes and deserializes properly" do expect( - BSON::Document.from_bson(StringIO.new(document.to_bson.to_s)) + BSON::Document.from_bson(BSON::ByteBuffer.new(document.to_bson.to_s)) ).to eq(document) end end From 000ba8f75b743c247dc225f1b35edb6dc6f3f3b7 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Wed, 23 Sep 2015 11:11:08 +0200 Subject: [PATCH 15/29] Add deserialization bench --- perf/bench.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/perf/bench.rb b/perf/bench.rb index de2a3b480..4d4fdd89a 100644 --- a/perf/bench.rb +++ b/perf/bench.rb @@ -156,9 +156,9 @@ def benchmark! # count.times { Time.from_bson(StringIO.new(time_bytes)) } # end - # doc_bytes = document.to_bson - # bench.report("Document#from_bson ---->") do - # count.times { BSON::Document.from_bson(StringIO.new(doc_bytes)) } - # end + doc_bytes = document.to_bson + bench.report("Document#from_bson ---->") do + count.times { BSON::Document.from_bson(BSON::ByteBuffer.new(doc_bytes)) } + end end end From 004c6f98d57494f6e61aa86f2f501f236a0b70ca Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Fri, 25 Sep 2015 14:21:05 +0200 Subject: [PATCH 16/29] RUBY-1019: Fix memory free error in growing buffer --- ext/bson/native.c | 40 ++++++----- spec/bson/byte_buffer_spec.rb | 128 ++++++++-------------------------- 2 files changed, 52 insertions(+), 116 deletions(-) diff --git a/ext/bson/native.c b/ext/bson/native.c index f24e5e1e3..c97641c77 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -40,6 +40,10 @@ typedef struct { #define ENSURE_BSON_WRITE(buffer_ptr, length) \ { if (buffer_ptr->write_position + length > buffer_ptr->size) rb_bson_expand_buffer(buffer_ptr, length); } +#define ENSURE_BSON_READ(buffer_ptr, length) \ + { if (buffer_ptr->read_position + length > buffer_ptr->write_position) \ + rb_raise(rb_eRangeError, "Attempted to read %zu bytes, but only %zu bytes remain", (size_t)length, READ_SIZE(buffer_ptr)); } + static VALUE rb_bson_byte_buffer_allocate(VALUE klass); static VALUE rb_bson_byte_buffer_initialize(int argc, VALUE *argv, VALUE self); static VALUE rb_bson_byte_buffer_length(VALUE self); @@ -68,7 +72,8 @@ static bool rb_bson_utf8_validate(const char *utf8, size_t utf8_len, bool allow_ static const rb_data_type_t rb_byte_buffer_data_type = { "bson/byte_buffer", - { NULL, rb_bson_byte_buffer_free, rb_bson_byte_buffer_memsize } + { NULL, rb_bson_byte_buffer_free, rb_bson_byte_buffer_memsize }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; /** @@ -147,7 +152,7 @@ VALUE rb_bson_byte_buffer_get_byte(VALUE self) VALUE byte; TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); - /* ENSURE_BSON_READ(b, 1); */ + ENSURE_BSON_READ(b, 1); byte = rb_str_new(READ_PTR(b), 1); b->read_position += 1; return byte; @@ -163,7 +168,7 @@ VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i) const long length = FIX2LONG(i); TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); - /* ENSURE_BSON_READ(b, length); */ + ENSURE_BSON_READ(b, length); bytes = rb_str_new(READ_PTR(b), length); b->read_position += length; return bytes; @@ -180,7 +185,7 @@ VALUE rb_bson_byte_buffer_get_cstring(VALUE self) TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); length = (int)strlen(READ_PTR(b)); - /* ENSURE_BSON_READ(b, length); */ + ENSURE_BSON_READ(b, length); string = rb_enc_str_new(READ_PTR(b), length, rb_utf8_encoding()); b->read_position += length + 1; return string; @@ -195,7 +200,7 @@ VALUE rb_bson_byte_buffer_get_double(VALUE self) union { uint64_t i64; double d; } ucast; TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); - /* ENSURE_BSON_READ(b, 8); */ + ENSURE_BSON_READ(b, 8); ucast.i64 = le64toh(*(uint64_t*)READ_PTR(b)); b->read_position += 8; return DBL2NUM(ucast.d); @@ -210,7 +215,7 @@ VALUE rb_bson_byte_buffer_get_int32(VALUE self) int32_t i32; TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); - /* ENSURE_BSON_READ(b, 4); */ + ENSURE_BSON_READ(b, 4); i32 = le32toh(*((int32_t*)READ_PTR(b))); b->read_position += 4; return INT2NUM(i32); @@ -225,7 +230,7 @@ VALUE rb_bson_byte_buffer_get_int64(VALUE self) int64_t i64; TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); - /* ENSURE_BSON_READ(b, 8); */ + ENSURE_BSON_READ(b, 8); i64 = le64toh(*((int64_t*)READ_PTR(b))); b->read_position += 8; return LONG2NUM(i64); @@ -241,10 +246,10 @@ VALUE rb_bson_byte_buffer_get_string(VALUE self) VALUE string; TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); - /* ENSURE_BSON_READ(b, 4); */ + ENSURE_BSON_READ(b, 4); length = le32toh(*((int32_t*)READ_PTR(b))); b->read_position += 4; - /* ENSURE_BSON_READ(b, length); */ + ENSURE_BSON_READ(b, length); string = rb_enc_str_new(READ_PTR(b), length - 1, rb_utf8_encoding()); b->read_position += length; return string; @@ -279,7 +284,6 @@ VALUE rb_bson_byte_buffer_put_bytes(VALUE self, VALUE bytes) ENSURE_BSON_WRITE(b, length); memcpy(WRITE_PTR(b), str, length); b->write_position += length; - return self; } @@ -300,7 +304,6 @@ VALUE rb_bson_byte_buffer_put_cstring(VALUE self, VALUE string) ENSURE_BSON_WRITE(b, length); memcpy(WRITE_PTR(b), c_str, length); b->write_position += length; - return self; } @@ -395,12 +398,11 @@ VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i) { byte_buffer_t *b; const int32_t position = NUM2INT(index); - const int32_t i32 = NUM2INT(i); - const char bytes = htole32(i32); + const int32_t i32 = htole32(NUM2INT(i)); TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); - memcpy(READ_PTR(b) + position, &bytes, sizeof(bytes)); + memcpy(READ_PTR(b) + position, &i32, 4); return self; } @@ -429,7 +431,9 @@ size_t rb_bson_byte_buffer_memsize(const void *ptr) void rb_bson_byte_buffer_free(void *ptr) { byte_buffer_t *b = ptr; - if (b->b_ptr != b->buffer) xfree(b->b_ptr); + if (b->b_ptr != b->buffer) { + xfree(b->b_ptr); + } xfree(b); } @@ -445,10 +449,12 @@ void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length) buffer_ptr->read_position = 0; } else { char *new_b_ptr; - const size_t new_size = buffer_ptr->size + BSON_BYTE_BUFFER_SIZE; + const size_t new_size = required_size + BSON_BYTE_BUFFER_SIZE; new_b_ptr = ALLOC_N(char, new_size); memcpy(new_b_ptr, READ_PTR(buffer_ptr), READ_SIZE(buffer_ptr)); - if (buffer_ptr->b_ptr != buffer_ptr->buffer) xfree(buffer_ptr->b_ptr); + if (buffer_ptr->b_ptr != buffer_ptr->buffer) { + xfree(buffer_ptr->b_ptr); + } buffer_ptr->b_ptr = new_b_ptr; buffer_ptr->size = new_size; buffer_ptr->write_position -= buffer_ptr->read_position; diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index 3ed4e0aae..c24bfdc83 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -273,16 +273,38 @@ describe '#put_string' do - let(:buffer) do - described_class.new - end + context 'when the buffer does not need to be expanded' do - let(:modified) do - buffer.put_string('testing') + let(:buffer) do + described_class.new + end + + let(:modified) do + buffer.put_string('testing') + end + + it 'appends the string to the byte buffer' do + expect(modified.to_s).to eq("#{8.to_bson.to_s}testing#{BSON::NULL_BYTE}") + end end - it 'appends the string to the byte buffer' do - expect(modified.to_s).to eq("#{8.to_bson.to_s}testing#{BSON::NULL_BYTE}") + context 'when the buffer needs to be expanded' do + + let(:buffer) do + described_class.new + end + + let(:string) do + 300.times.inject(""){ |s, i| s << "#{i}" } + end + + let(:modified) do + buffer.put_string(string) + end + + it 'appends the string to the byte buffer' do + expect(modified.to_s).to eq("#{(string.bytesize + 1).to_bson.to_s}#{string}#{BSON::NULL_BYTE}") + end end end @@ -308,96 +330,4 @@ expect(modified.to_s).to eq("#{exp_first}#{exp_second}") end end - - # describe "#to_bson_string" do - - # context "when the string is valid" do - - # let(:string) do - # "test" - # end - - # let(:encoded) do - # string.to_bson_string - # end - - # it "returns the string" do - # expect(encoded).to eq(string) - # end - - # it_behaves_like "a binary encoded string" - # end - - # context "when the string contains a null byte" do - - # let(:string) do - # "test#{BSON::NULL_BYTE}ing" - # end - - # let(:encoded) do - # string.to_bson_string - # end - - # it "retains the null byte" do - # expect(encoded).to eq(string) - # end - - # it_behaves_like "a binary encoded string" - # end - - # context "when the string contains utf-8 characters" do - - # let(:string) do - # "Straße" - # end - - # let(:encoded) do - # string.to_bson_string.to_s - # end - - # let(:char) do - # "ß".chr.force_encoding(BSON::BINARY) - # end - - # it "returns the encoded string" do - # expect(encoded).to eq("Stra#{char}e") - # end - - # it_behaves_like "a binary encoded string" - # end - - # context "when the string is encoded in non utf-8" do - - # let(:string) do - # "Straße".encode("iso-8859-1") - # end - - # let(:encoded) do - # string.to_bson_string.to_s - # end - - # let(:char) do - # "ß".chr.force_encoding(BSON::BINARY) - # end - - # it "returns the encoded string" do - # expect(encoded).to eq("Stra#{char}e") - # end - - # it_behaves_like "a binary encoded string" - # end - - # context "when the string contains non utf-8 characters" do - - # let(:string) do - # 255.chr - # end - - # it "raises an error" do - # expect { - # string.to_bson_string - # }.to raise_error(EncodingError) - # end - # end - # end end From 2c2623f2c34736c938a4497190ea6885297daaea Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Fri, 25 Sep 2015 14:26:16 +0200 Subject: [PATCH 17/29] RUBY-1019: Update bench --- ext/bson/native.c | 2 +- perf/bench.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/bson/native.c b/ext/bson/native.c index c97641c77..f941c8869 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -18,7 +18,7 @@ #include #include "portable_endian.h" -#define BSON_BYTE_BUFFER_SIZE 256 +#define BSON_BYTE_BUFFER_SIZE 512 typedef struct { size_t size; diff --git a/perf/bench.rb b/perf/bench.rb index 4d4fdd89a..0bdfb18c9 100644 --- a/perf/bench.rb +++ b/perf/bench.rb @@ -156,7 +156,7 @@ def benchmark! # count.times { Time.from_bson(StringIO.new(time_bytes)) } # end - doc_bytes = document.to_bson + doc_bytes = document.to_bson.to_s bench.report("Document#from_bson ---->") do count.times { BSON::Document.from_bson(BSON::ByteBuffer.new(doc_bytes)) } end From c4539b4a6ed90970a94c826c117b28fdaee6aa8e Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Sun, 27 Sep 2015 14:13:08 +0200 Subject: [PATCH 18/29] RUBY-1019: Adding write_position accessor --- ext/bson/native.c | 12 +++++++++ spec/bson/byte_buffer_spec.rb | 49 +++++++++++++++++++++++++++-------- 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/ext/bson/native.c b/ext/bson/native.c index f941c8869..a048b2947 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -63,6 +63,7 @@ static VALUE rb_bson_byte_buffer_put_int64(VALUE self, VALUE i); static VALUE rb_bson_byte_buffer_put_string(VALUE self, VALUE string); static VALUE rb_bson_byte_buffer_read_position(VALUE self); static VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i); +static VALUE rb_bson_byte_buffer_write_position(VALUE self); static VALUE rb_bson_byte_buffer_to_s(VALUE self); static size_t rb_bson_byte_buffer_memsize(const void *ptr); @@ -103,6 +104,7 @@ void Init_native() rb_define_method(rb_byte_buffer_class, "put_string", rb_bson_byte_buffer_put_string, 1); rb_define_method(rb_byte_buffer_class, "read_position", rb_bson_byte_buffer_read_position, 0); rb_define_method(rb_byte_buffer_class, "replace_int32", rb_bson_byte_buffer_replace_int32, 2); + rb_define_method(rb_byte_buffer_class, "write_position", rb_bson_byte_buffer_write_position, 0); rb_define_method(rb_byte_buffer_class, "to_s", rb_bson_byte_buffer_to_s, 0); } @@ -407,6 +409,16 @@ VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i) return self; } +/** + * Get the write position. + */ +VALUE rb_bson_byte_buffer_write_position(VALUE self) +{ + byte_buffer_t *b; + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); + return INT2NUM(b->write_position); +} + /** * Convert the buffer to a string. */ diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index c24bfdc83..4bf893a2a 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -19,7 +19,7 @@ described_class.new(BSON::Int32::BSON_TYPE) end - let(:byte) do + let!(:byte) do buffer.get_byte end @@ -28,7 +28,7 @@ end it 'increments the position by 1' do - + expect(buffer.read_position).to eq(1) end end @@ -42,7 +42,7 @@ described_class.new(string) end - let(:bytes) do + let!(:bytes) do buffer.get_bytes(2) end @@ -51,7 +51,7 @@ end it 'increments the position by the length' do - + expect(buffer.read_position).to eq(string.bytesize) end end @@ -171,13 +171,17 @@ described_class.new end - let(:modified) do + let!(:modified) do buffer.put_byte(BSON::Int32::BSON_TYPE) end it 'appends the byte to the byte buffer' do expect(modified.to_s).to eq(BSON::Int32::BSON_TYPE.chr) end + + it 'increments the write position by 1' do + expect(modified.write_position).to eq(1) + end end describe '#put_cstring' do @@ -188,7 +192,7 @@ context 'when the string is valid' do - let(:modified) do + let!(:modified) do buffer.put_cstring('testing') end @@ -196,6 +200,9 @@ expect(modified.to_s).to eq("testing#{BSON::NULL_BYTE}") end + it 'increments the write position by the length + 1' do + expect(modified.write_position).to eq(8) + end end context "when the string contains a null byte" do @@ -218,13 +225,17 @@ described_class.new end - let(:modified) do + let!(:modified) do buffer.put_double(1.2332) end it 'appends the double to the buffer' do expect(modified.to_s).to eq([ 1.2332 ].pack(Float::PACK)) end + + it 'increments the write position by 8' do + expect(modified.write_position).to eq(8) + end end describe '#put_int32' do @@ -235,7 +246,7 @@ context 'when the integer is 32 bit' do - let(:modified) do + let!(:modified) do buffer.put_int32(Integer::MAX_32BIT - 1) end @@ -246,6 +257,10 @@ it 'appends the int32 to the byte buffer' do expect(modified.to_s).to eq(expected) end + + it 'increments the write position by 4' do + expect(modified.write_position).to eq(4) + end end end @@ -257,7 +272,7 @@ context 'when the integer is 64 bit' do - let(:modified) do + let!(:modified) do buffer.put_int64(Integer::MAX_64BIT - 1) end @@ -268,6 +283,10 @@ it 'appends the int64 to the byte buffer' do expect(modified.to_s).to eq(expected) end + + it 'increments the write position by 8' do + expect(modified.write_position).to eq(8) + end end end @@ -279,13 +298,17 @@ described_class.new end - let(:modified) do + let!(:modified) do buffer.put_string('testing') end it 'appends the string to the byte buffer' do expect(modified.to_s).to eq("#{8.to_bson.to_s}testing#{BSON::NULL_BYTE}") end + + it 'increments the write position by length + 5' do + expect(modified.write_position).to eq(12) + end end context 'when the buffer needs to be expanded' do @@ -298,13 +321,17 @@ 300.times.inject(""){ |s, i| s << "#{i}" } end - let(:modified) do + let!(:modified) do buffer.put_string(string) end it 'appends the string to the byte buffer' do expect(modified.to_s).to eq("#{(string.bytesize + 1).to_bson.to_s}#{string}#{BSON::NULL_BYTE}") end + + it 'increments the write position by length + 5' do + expect(modified.write_position).to eq(string.bytesize + 5) + end end end From 313a83ae7658f4601c76037c71dcadf4d7c88fe0 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Sun, 27 Sep 2015 14:21:03 +0200 Subject: [PATCH 19/29] RUBY-1019: Adding more specs around int and string buffer appending --- spec/bson/byte_buffer_spec.rb | 124 +++++++++++++++++++++++++++------- 1 file changed, 99 insertions(+), 25 deletions(-) diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index 4bf893a2a..beed0a673 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -246,20 +246,51 @@ context 'when the integer is 32 bit' do - let!(:modified) do - buffer.put_int32(Integer::MAX_32BIT - 1) - end + context 'when the integer is positive' do + + let!(:modified) do + buffer.put_int32(Integer::MAX_32BIT - 1) + end + + let(:expected) do + [ Integer::MAX_32BIT - 1 ].pack(BSON::Int32::PACK) + end - let(:expected) do - [ Integer::MAX_32BIT - 1 ].pack(BSON::Int32::PACK) + it 'appends the int32 to the byte buffer' do + expect(modified.to_s).to eq(expected) + end + + it 'increments the write position by 4' do + expect(modified.write_position).to eq(4) + end end - it 'appends the int32 to the byte buffer' do - expect(modified.to_s).to eq(expected) + context 'when the integer is negative' do + + let!(:modified) do + buffer.put_int32(Integer::MIN_32BIT + 1) + end + + let(:expected) do + [ Integer::MIN_32BIT + 1 ].pack(BSON::Int32::PACK) + end + + it 'appends the int32 to the byte buffer' do + expect(modified.to_s).to eq(expected) + end + + it 'increments the write position by 4' do + expect(modified.write_position).to eq(4) + end end - it 'increments the write position by 4' do - expect(modified.write_position).to eq(4) + context 'when the integer is not 32 bit' do + + it 'raises an exception' do + expect { + buffer.put_int32(Integer::MAX_64BIT - 1) + }.to raise_error(RangeError) + end end end end @@ -272,20 +303,51 @@ context 'when the integer is 64 bit' do - let!(:modified) do - buffer.put_int64(Integer::MAX_64BIT - 1) - end + context 'when the integer is positive' do + + let!(:modified) do + buffer.put_int64(Integer::MAX_64BIT - 1) + end + + let(:expected) do + [ Integer::MAX_64BIT - 1 ].pack(BSON::Int64::PACK) + end - let(:expected) do - [ Integer::MAX_64BIT - 1 ].pack(BSON::Int64::PACK) + it 'appends the int64 to the byte buffer' do + expect(modified.to_s).to eq(expected) + end + + it 'increments the write position by 8' do + expect(modified.write_position).to eq(8) + end end - it 'appends the int64 to the byte buffer' do - expect(modified.to_s).to eq(expected) + context 'when the integer is negative' do + + let!(:modified) do + buffer.put_int64(Integer::MIN_64BIT + 1) + end + + let(:expected) do + [ Integer::MIN_64BIT + 1 ].pack(BSON::Int64::PACK) + end + + it 'appends the int64 to the byte buffer' do + expect(modified.to_s).to eq(expected) + end + + it 'increments the write position by 8' do + expect(modified.write_position).to eq(8) + end end - it 'increments the write position by 8' do - expect(modified.write_position).to eq(8) + context 'when the integer is larger than 64 bit' do + + it 'raises an exception' do + expect { + buffer.put_int64(Integer::MAX_64BIT + 1) + }.to raise_error(RangeError) + end end end end @@ -298,16 +360,28 @@ described_class.new end - let!(:modified) do - buffer.put_string('testing') - end + context 'when the string is UTF-8' do - it 'appends the string to the byte buffer' do - expect(modified.to_s).to eq("#{8.to_bson.to_s}testing#{BSON::NULL_BYTE}") + let!(:modified) do + buffer.put_string('testing') + end + + it 'appends the string to the byte buffer' do + expect(modified.to_s).to eq("#{8.to_bson.to_s}testing#{BSON::NULL_BYTE}") + end + + it 'increments the write position by length + 5' do + expect(modified.write_position).to eq(12) + end end - it 'increments the write position by length + 5' do - expect(modified.write_position).to eq(12) + context 'when the string is not UTF-8' do + + it 'raises an exception' do + expect { + buffer.put_string('gültig'.encode("iso-8859-1")) + }.to raise_error(ArgumentError) + end end end From 507667900694379c5184c7ffe909c8819e982f74 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Mon, 28 Sep 2015 18:52:56 +0200 Subject: [PATCH 20/29] RUBY-1019: First part implementation native java byte buffer --- lib/bson.rb | 1 - lib/bson/byte_buffer.rb | 12 ++ spec/bson/byte_buffer_spec.rb | 14 +- src/main/org/bson/BooleanExtension.java | 139 -------------- src/main/org/bson/ByteBuf.java | 232 ++++++++++++++++++++++++ src/main/org/bson/FloatExtension.java | 101 ----------- src/main/org/bson/IntegerExtension.java | 144 --------------- src/main/org/bson/NativeService.java | 26 ++- src/main/org/bson/TimeExtension.java | 101 ----------- 9 files changed, 271 insertions(+), 499 deletions(-) delete mode 100644 src/main/org/bson/BooleanExtension.java create mode 100644 src/main/org/bson/ByteBuf.java delete mode 100644 src/main/org/bson/FloatExtension.java delete mode 100644 src/main/org/bson/IntegerExtension.java delete mode 100644 src/main/org/bson/TimeExtension.java diff --git a/lib/bson.rb b/lib/bson.rb index 5f08c3b10..1e773974a 100644 --- a/lib/bson.rb +++ b/lib/bson.rb @@ -59,7 +59,6 @@ def self.ObjectId(string) require "bson/registry" require "bson/specialized" require "bson/json" -require "bson/byte_buffer" require "bson/int32" require "bson/int64" require "bson/integer" diff --git a/lib/bson/byte_buffer.rb b/lib/bson/byte_buffer.rb index a8c675bc3..9d32ed00c 100644 --- a/lib/bson/byte_buffer.rb +++ b/lib/bson/byte_buffer.rb @@ -11,8 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +require 'java' +require 'bson-ruby.jar' +import 'org.bson.ByteBuf' module BSON class ByteBuffer + + def initialize(bytes = nil) + @buffer = bytes ? ByteBuf.new(bytes) : ByteBuf.new + end + + def put_double(value) + @buffer.put_double(value) + self + end end end diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index beed0a673..4266a1c59 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -27,7 +27,7 @@ expect(byte).to eq(BSON::Int32::BSON_TYPE) end - it 'increments the position by 1' do + it 'increments the read position by 1' do expect(buffer.read_position).to eq(1) end end @@ -88,7 +88,7 @@ expect(double).to eq(12.5) end - it 'increments the position by 8' do + it 'increments the read position by 8' do expect(buffer.read_position).to eq(8) end end @@ -377,11 +377,11 @@ context 'when the string is not UTF-8' do - it 'raises an exception' do - expect { - buffer.put_string('gültig'.encode("iso-8859-1")) - }.to raise_error(ArgumentError) - end + # it 'raises an exception' do + # expect { + # buffer.put_string('gültig'.encode("iso-8859-1")) + # }.to raise_error(ArgumentError) + # end end end diff --git a/src/main/org/bson/BooleanExtension.java b/src/main/org/bson/BooleanExtension.java deleted file mode 100644 index b0cf2c5fe..000000000 --- a/src/main/org/bson/BooleanExtension.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (C) 2009-2013 MongoDB, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.bson; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -import org.jruby.Ruby; -import org.jruby.RubyBoolean; -import org.jruby.RubyModule; -import org.jruby.RubyString; -import org.jruby.anno.JRubyMethod; -import org.jruby.runtime.builtin.IRubyObject; - -/** - * Provides native extensions around boolean operations. - * - * @since 2.0.0 - */ -public class BooleanExtension { - - /** - * Constant for the FalseClass module name. - * - * @since 2.0.0 - */ - private static final String FALSE_CLASS = "FalseClass".intern(); - - /** - * Constant for the TrueClass module name. - * - * @since 2.0.0 - */ - private static final String TRUE_CLASS = "TrueClass".intern(); - - /** - * Constant for a single false byte. - * - * @since 2.0.0 - */ - private static final byte FALSE_BYTE = 0; - - /** - * Constant for a single true byte. - * - * @since 2.0.0 - */ - private static final byte TRUE_BYTE = 1; - - /** - * Constant for the array of 1 false byte. - * - * @since 2.0.0 - */ - private static final byte[] FALSE_BYTES = new byte[] { FALSE_BYTE }; - - /** - * Constant for the array of 1 true byte. - * - * @since 2.0.0 - */ - private static final byte[] TRUE_BYTES = new byte[] { TRUE_BYTE }; - - /** - * Load the method definitions into the boolean module. - * - * @param bson The bson module to define the methods under. - * - * @since 2.0.0 - */ - public static void extend(final RubyModule bson) { - RubyModule falseMod = bson.defineOrGetModuleUnder(FALSE_CLASS); - RubyModule trueMod = bson.defineOrGetModuleUnder(TRUE_CLASS); - falseMod.defineAnnotatedMethods(BooleanExtension.class); - trueMod.defineAnnotatedMethods(BooleanExtension.class); - } - - /** - * Encodes the boolean to the raw BSON bytes. - * - * @param bool The instance of the boolean object. - * - * @return The encoded bytes. - * - * @since 2.0.0 - */ - @JRubyMethod(name = "to_bson") - public static IRubyObject toBson(final IRubyObject bool) { - return toBsonBoolean(bool); - } - - /** - * Encodes the boolean to the raw BSON bytes. - * - * @param bool The instance of the boolean object. - * @param bytes The bytes to encode to. - * - * @return The encoded bytes. - * - * @since 2.0.0 - */ - @JRubyMethod(name = "to_bson") - public static IRubyObject toBson(final IRubyObject bool, final IRubyObject bytes) { - return ((RubyString) bytes).append(toBsonBoolean(bool)); - } - - /** - * Take the boolean value and convert it to its bytes. - * - * @param bool The Ruby boolean value. - * - * @return The byte array. - * - * @since 2.0.0 - */ - private static RubyString toBsonBoolean(final IRubyObject bool) { - final Ruby runtime = bool.getRuntime(); - if (bool == runtime.getTrue()) { - return RubyString.newString(runtime, TRUE_BYTES); - } - else { - return RubyString.newString(runtime, FALSE_BYTES); - } - } -} diff --git a/src/main/org/bson/ByteBuf.java b/src/main/org/bson/ByteBuf.java new file mode 100644 index 000000000..466ca9fbe --- /dev/null +++ b/src/main/org/bson/ByteBuf.java @@ -0,0 +1,232 @@ +/* + * Copyright (C) 2015 MongoDB, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.bson; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Arrays; + +import org.jruby.Ruby; +import org.jruby.RubyBignum; +import org.jruby.RubyClass; +import org.jruby.RubyFloat; +import org.jruby.RubyFixnum; +import org.jruby.RubyNumeric; +import org.jruby.RubyObject; +import org.jruby.RubyString; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.builtin.IRubyObject; + +/** + * Provides native extensions around boolean operations. + * + * @since 4.0.0 + */ +public class ByteBuf extends RubyObject { + + private static byte NULL_BYTE = 0x00; + + /** + * The modes for the buffer. + */ + private enum Mode { READ, WRITE } + + /** + * The wrapped byte buffer. + */ + private ByteBuffer buffer; + + /** + * The current buffer mode. + */ + private Mode mode; + + /** + * The current position while reading. + */ + private int readPosition = 0; + + /** + * The current position while writing. + */ + private int writePosition = 0; + + /** + * Instantiate the ByteBuf. + */ + public ByteBuf(final Ruby runtime, final RubyClass rubyClass) { + super(runtime, rubyClass); + } + + @JRubyMethod(name = "initialize") + public IRubyObject intialize() { + this.buffer = ByteBuffer.allocate(1024).order(ByteOrder.LITTLE_ENDIAN); + this.mode = Mode.WRITE; + return null; + } + + @JRubyMethod(name = "initialize") + public IRubyObject initialize(final RubyString value) { + this.buffer = ByteBuffer.wrap(value.getBytes()).order(ByteOrder.LITTLE_ENDIAN); + this.mode = Mode.READ; + return null; + } + + @JRubyMethod(name = "get_byte") + public RubyString getByte() { + ensureBsonRead(); + RubyString string = RubyString.newString(getRuntime(), new byte[] { this.buffer.get() }); + this.readPosition += 1; + return string; + } + + @JRubyMethod(name = "get_bytes") + public RubyString getBytes(final IRubyObject value) { + ensureBsonRead(); + int length = RubyNumeric.fix2int((RubyFixnum) value); + ByteBuffer buff = this.buffer.get(new byte[length]); + RubyString string = RubyString.newString(getRuntime(), buff.array()); + this.readPosition += length; + return string; + } + + @JRubyMethod(name = "get_double") + public RubyFloat getDouble() { + ensureBsonRead(); + RubyFloat doubl = new RubyFloat(getRuntime(), this.buffer.getDouble()); + this.readPosition += 8; + return doubl; + } + + @JRubyMethod(name = "get_int32") + public RubyFixnum getInt32() { + ensureBsonRead(); + RubyFixnum int32 = new RubyFixnum(getRuntime(), this.buffer.getInt()); + this.readPosition += 4; + return int32; + } + + @JRubyMethod(name = "get_string") + public RubyString getString() { + ensureBsonRead(); + int length = this.buffer.getInt(); + this.readPosition += 4; + byte[] stringBytes = new byte[length]; + this.buffer.get(stringBytes); + byte[] bytes = Arrays.copyOfRange(stringBytes, 0, stringBytes.length - 1); + RubyString string = RubyString.newString(getRuntime(), bytes); + this.readPosition += length; + return string; + } + + @JRubyMethod(name = "get_int64") + public RubyBignum getInt64() { + ensureBsonRead(); + RubyBignum int64 = new RubyBignum(getRuntime(), RubyBignum.long2big(this.buffer.getLong())); + this.readPosition += 8; + return int64; + } + + @JRubyMethod(name = "put_byte") + public ByteBuf putByte(final IRubyObject value) { + ensureBsonWrite(); + this.buffer.put(((RubyString) value).getBytes()[0]); + this.writePosition += 1; + return this; + } + + @JRubyMethod(name = "put_bytes") + public ByteBuf putBytes(final IRubyObject value) { + ensureBsonWrite(); + byte[] bytes = ((RubyString) value).getBytes(); + this.buffer.put(bytes); + this.writePosition += bytes.length; + return this; + } + + @JRubyMethod(name = "put_double") + public ByteBuf putDouble(final IRubyObject value) { + ensureBsonWrite(); + this.buffer.putDouble(((RubyFloat) value).getDoubleValue()); + this.writePosition += 8; + return this; + } + + @JRubyMethod(name = "put_int32") + public ByteBuf putInt32(final IRubyObject value) { + ensureBsonWrite(); + this.buffer.putInt(RubyNumeric.fix2int((RubyFixnum) value)); + this.writePosition += 4; + return this; + } + + @JRubyMethod(name = "put_int64") + public ByteBuf putInt64(final IRubyObject value) { + ensureBsonWrite(); + this.buffer.putLong(RubyNumeric.fix2long((RubyFixnum) value)); + this.writePosition += 8; + return this; + } + + @JRubyMethod(name = "put_string") + public ByteBuf putString(final IRubyObject value) { + ensureBsonWrite(); + byte[] bytes = ((RubyString) value).getBytes(); + this.buffer.putInt(bytes.length + 1); + this.buffer.put(bytes); + this.buffer.put(NULL_BYTE); + this.writePosition += (bytes.length + 5); + return this; + } + + @JRubyMethod(name = "read_position") + public RubyFixnum getReadPosition() { + return new RubyFixnum(getRuntime(), this.readPosition); + } + + @JRubyMethod(name = "write_position") + public RubyFixnum getWritePosition() { + return new RubyFixnum(getRuntime(), this.writePosition); + } + + /** + * Convert the byte buffer to a string of the bytes. + */ + @JRubyMethod(name = "to_s") + public RubyString toRubyString() { + ensureBsonRead(); + byte[] bytes = new byte[this.writePosition]; + this.buffer.get(bytes, 0, this.writePosition); + return RubyString.newString(getRuntime(), bytes); + } + + private void ensureBsonRead() { + if (this.mode == Mode.WRITE) { + this.buffer.flip(); + } + } + + /** + * This will grow the underlying byte buffer if the remaining size is too small. + */ + private void ensureBsonWrite() { + if (this.mode == Mode.READ) { + this.buffer.flip(); + } + // if size of item > limit, increase the buffer. + } +} diff --git a/src/main/org/bson/FloatExtension.java b/src/main/org/bson/FloatExtension.java deleted file mode 100644 index e68f1878e..000000000 --- a/src/main/org/bson/FloatExtension.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2009-2013 MongoDB, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.bson; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -import org.jruby.Ruby; -import org.jruby.RubyFloat; -import org.jruby.RubyModule; -import org.jruby.RubyString; -import org.jruby.anno.JRubyMethod; -import org.jruby.runtime.builtin.IRubyObject; - -/** - * Provides native extensions around float operations. - * - * @since 2.0.0 - */ -public class FloatExtension { - - /** - * Constant for the Float module name. - * - * @since 2.0.0 - */ - private static final String FLOAT = "Float".intern(); - - /** - * Load the method definitions into the float module. - * - * @param bson The bson module to define the methods under. - * - * @since 2.0.0 - */ - public static void extend(final RubyModule bson) { - RubyModule floatMod = bson.defineOrGetModuleUnder(FLOAT); - floatMod.defineAnnotatedMethods(FloatExtension.class); - } - - /** - * Encodes the float to the raw BSON bytes. - * - * @param float The instance of the float object. - * - * @return The encoded bytes. - * - * @since 2.0.0 - */ - @JRubyMethod(name = "to_bson") - public static IRubyObject toBson(final IRubyObject number) { - final double value = ((RubyFloat) number).getDoubleValue(); - return toBsonDouble(number.getRuntime(), value); - } - - /** - * Encodes the float to the raw BSON bytes. - * - * @param float The instance of the float object. - * @param bytes The bytes to encode to. - * - * @return The encoded bytes. - * - * @since 2.0.0 - */ - @JRubyMethod(name = "to_bson") - public static IRubyObject toBson(final IRubyObject number, final IRubyObject bytes) { - final double value = ((RubyFloat) number).getDoubleValue(); - return ((RubyString) bytes).append(toBsonDouble(number.getRuntime(), value)); - } - - /** - * Take the double value and convert it to it's little endian bytes. - * - * @param runtime The JRuby runtime. - * @param value The value to encode. - * - * @return The byte array. - * - * @since 2.0.0 - */ - private static RubyString toBsonDouble(final Ruby runtime, final double value) { - final ByteBuffer buffer = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN); - buffer.putDouble(value); - return RubyString.newString(runtime, buffer.array()); - } -} diff --git a/src/main/org/bson/IntegerExtension.java b/src/main/org/bson/IntegerExtension.java deleted file mode 100644 index 15592624b..000000000 --- a/src/main/org/bson/IntegerExtension.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (C) 2009-2013 MongoDB, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.bson; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -import org.jruby.Ruby; -import org.jruby.RubyModule; -import org.jruby.RubyInteger; -import org.jruby.RubyString; -import org.jruby.anno.JRubyMethod; -import org.jruby.runtime.builtin.IRubyObject; - -/** - * Provides native extensions around integer operations. - * - * @since 2.0.0 - */ -public class IntegerExtension { - - /** - * Constant for the Integer module name. - * - * @since 2.0.0 - */ - private static final String INTEGER = "Integer".intern(); - - /** - * Load the method definitions into the integer module. - * - * @param bson The bson module to define the methods under. - * - * @since 2.0.0 - */ - public static void extend(final RubyModule bson) { - RubyModule integer = bson.defineOrGetModuleUnder(INTEGER); - integer.defineAnnotatedMethods(IntegerExtension.class); - } - - /** - * Encodes the integer to the raw BSON bytes. - * - * @param integer The instance of the integer object. - * - * @return The encoded bytes. - * - * @since 2.0.0 - */ - @JRubyMethod(name = "to_bson") - public static IRubyObject toBson(final IRubyObject integer) { - final long value = ((RubyInteger) integer).getLongValue(); - return toBsonInt(integer.getRuntime(), value); - } - - /** - * Encodes the integer to the raw BSON bytes. - * - * @param integer The instance of the integer object. - * @param bytes The bytes to encode to. - * - * @return The encoded bytes. - * - * @since 2.0.0 - */ - @JRubyMethod(name = "to_bson") - public static IRubyObject toBson(final IRubyObject integer, final IRubyObject bytes) { - final long value = ((RubyInteger) integer).getLongValue(); - return ((RubyString) bytes).append(toBsonInt(integer.getRuntime(), value)); - } - - /** - * Convert the integer to the raw bson. - * - * @param runtime The JRuby runtime. - * @param value The integer value. - * - * @return The encoded bytes. - * - * @since 2.0.0 - */ - private static RubyString toBsonInt(final Ruby runtime, final long value) { - return isInt32(value) ? toBsonInt32(runtime, value) : toBsonInt64(runtime, value); - } - - /** - * Determine if the integer is 32bit. - * - * @param value The integer value. - * - * @return If the integer is in 32bit range. - * - * @since 2.0.0 - */ - private static boolean isInt32(final long value) { - return (Integer.MIN_VALUE <= value && value <= Integer.MAX_VALUE); - } - - /** - * Take the 32bit value and convert it to it's little endian bytes. - * - * @param runtime The JRuby runtime. - * @param value The value to encode. - * - * @return The byte array. - * - * @since 2.0.0 - */ - private static RubyString toBsonInt32(final Ruby runtime, final long value) { - final ByteBuffer buffer = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN); - buffer.putInt((int) value); - return RubyString.newString(runtime, buffer.array()); - } - - /** - * Take the 64bit value and convert it to it's little endian bytes. - * - * @param runtime The JRuby runtime. - * @param value The value to encode. - * - * @return The byte array. - * - * @since 2.0.0 - */ - private static RubyString toBsonInt64(final Ruby runtime, final long value) { - final ByteBuffer buffer = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN); - buffer.putLong(value); - return RubyString.newString(runtime, buffer.array()); - } -} diff --git a/src/main/org/bson/NativeService.java b/src/main/org/bson/NativeService.java index a760f64d3..61138a41a 100644 --- a/src/main/org/bson/NativeService.java +++ b/src/main/org/bson/NativeService.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2009-2013 MongoDB, Inc. + * Copyright (C) 2009-2015 MongoDB, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,11 @@ import java.io.IOException; import org.jruby.Ruby; +import org.jruby.RubyClass; import org.jruby.RubyModule; import org.jruby.runtime.load.BasicLibraryService; +import org.jruby.runtime.ObjectAllocator; +import org.jruby.runtime.builtin.IRubyObject; /** * The native implementation of various extensions. @@ -36,6 +39,13 @@ public class NativeService implements BasicLibraryService { */ private final String BSON = "BSON".intern(); + /** + * Constant for the BSON module name. + * + * @since 2.0.0 + */ + private final String BYTE_BUF = "ByteBuff".intern(); + /** * Loads the native extension into the JRuby runtime. * @@ -47,11 +57,15 @@ public class NativeService implements BasicLibraryService { */ public boolean basicLoad(final Ruby runtime) throws IOException { RubyModule bson = runtime.fastGetModule(BSON); - BooleanExtension.extend(bson); - FloatExtension.extend(bson); - GeneratorExtension.extend(bson); - IntegerExtension.extend(bson); - TimeExtension.extend(bson); + + RubyClass byteBuffer = bson.defineClassUnder("ByteBuffer", runtime.getObject(), new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass rubyClass) { + return new ByteBuf(runtime, rubyClass); + } + }); + + byteBuffer.defineAnnotatedMethods(ByteBuf.class); + return true; } } diff --git a/src/main/org/bson/TimeExtension.java b/src/main/org/bson/TimeExtension.java deleted file mode 100644 index a409776bb..000000000 --- a/src/main/org/bson/TimeExtension.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2009-2013 MongoDB, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.bson; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; - -import org.jruby.Ruby; -import org.jruby.RubyModule; -import org.jruby.RubyString; -import org.jruby.RubyTime; -import org.jruby.anno.JRubyMethod; -import org.jruby.runtime.builtin.IRubyObject; - -/** - * Provides native extensions around time operations. - * - * @since 2.0.0 - */ -public class TimeExtension { - - /** - * Constant for the time module name. - * - * @since 2.0.0 - */ - private static final String TIME = "Time".intern(); - - /** - * Load the method definitions into the time module. - * - * @param bson The bson module to define the methods under. - * - * @since 2.0.0 - */ - public static void extend(final RubyModule bson) { - RubyModule time = bson.defineOrGetModuleUnder(TIME); - time.defineAnnotatedMethods(TimeExtension.class); - } - - /** - * Encodes the time to the raw BSON bytes. - * - * @param time The instance of the time object. - * - * @return The encoded bytes. - * - * @since 2.0.0 - */ - @JRubyMethod(name = "to_bson") - public static IRubyObject toBson(final IRubyObject time) { - final long millis = ((RubyTime) time).getJavaDate().getTime(); - return toBsonTime(time.getRuntime(), millis); - } - - /** - * Encodes the time to the raw BSON bytes. - * - * @param time The instance of the time object. - * @param bytes The bytes to encode to. - * - * @return The encoded bytes. - * - * @since 2.0.0 - */ - @JRubyMethod(name = "to_bson") - public static IRubyObject toBson(final IRubyObject time, final IRubyObject bytes) { - final long millis = ((RubyTime) time).getJavaDate().getTime(); - return ((RubyString) bytes).append(toBsonTime(time.getRuntime(), millis)); - } - - /** - * Take the 64bit milliseconds and convert it to it's little endian bytes. - * - * @param runtime The JRuby runtime. - * @param millis The milliseconds to encode. - * - * @return The byte array. - * - * @since 2.0.0 - */ - private static IRubyObject toBsonTime(final Ruby runtime, final long millis) { - final ByteBuffer buffer = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN); - buffer.putLong(millis); - return RubyString.newString(runtime, buffer.array()); - } -} From 47946522dafd5af527471e217bcfa93b93f0c23b Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Mon, 28 Sep 2015 18:54:25 +0200 Subject: [PATCH 21/29] RUBY-1019: No longer need the ruby impl --- lib/bson/byte_buffer.rb | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 lib/bson/byte_buffer.rb diff --git a/lib/bson/byte_buffer.rb b/lib/bson/byte_buffer.rb deleted file mode 100644 index 9d32ed00c..000000000 --- a/lib/bson/byte_buffer.rb +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (C) 2015 MongoDB Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -require 'java' -require 'bson-ruby.jar' -import 'org.bson.ByteBuf' - -module BSON - class ByteBuffer - - def initialize(bytes = nil) - @buffer = bytes ? ByteBuf.new(bytes) : ByteBuf.new - end - - def put_double(value) - @buffer.put_double(value) - self - end - end -end From a3803a220664fdb51661d3727f0eecfb80b0c28e Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Mon, 28 Sep 2015 23:13:01 +0200 Subject: [PATCH 22/29] RUBY-1019: Adding javadoc --- src/main/org/bson/ByteBuf.java | 144 ++++++++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 4 deletions(-) diff --git a/src/main/org/bson/ByteBuf.java b/src/main/org/bson/ByteBuf.java index 466ca9fbe..4dce7d93c 100644 --- a/src/main/org/bson/ByteBuf.java +++ b/src/main/org/bson/ByteBuf.java @@ -38,6 +38,9 @@ */ public class ByteBuf extends RubyObject { + /** + * Constant for a null byte. + */ private static byte NULL_BYTE = 0x00; /** @@ -66,12 +69,23 @@ private enum Mode { READ, WRITE } private int writePosition = 0; /** - * Instantiate the ByteBuf. + * Instantiate the ByteBuf - this is #allocate in Ruby. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 */ public ByteBuf(final Ruby runtime, final RubyClass rubyClass) { super(runtime, rubyClass); } + /** + * Initialize an empty buffer. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "initialize") public IRubyObject intialize() { this.buffer = ByteBuffer.allocate(1024).order(ByteOrder.LITTLE_ENDIAN); @@ -79,6 +93,15 @@ public IRubyObject intialize() { return null; } + /** + * Instantiate the buffer with bytes. + * + * @param value The bytes to instantiate with. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "initialize") public IRubyObject initialize(final RubyString value) { this.buffer = ByteBuffer.wrap(value.getBytes()).order(ByteOrder.LITTLE_ENDIAN); @@ -86,6 +109,13 @@ public IRubyObject initialize(final RubyString value) { return null; } + /** + * Get a single byte from the buffer. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "get_byte") public RubyString getByte() { ensureBsonRead(); @@ -94,6 +124,15 @@ public RubyString getByte() { return string; } + /** + * Get the supplied number of bytes from the buffer. + * + * @param value The number of bytes to read. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "get_bytes") public RubyString getBytes(final IRubyObject value) { ensureBsonRead(); @@ -104,6 +143,13 @@ public RubyString getBytes(final IRubyObject value) { return string; } + /** + * Get a double from the buffer. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "get_double") public RubyFloat getDouble() { ensureBsonRead(); @@ -112,6 +158,13 @@ public RubyFloat getDouble() { return doubl; } + /** + * Get a 32 bit integer from the buffer. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "get_int32") public RubyFixnum getInt32() { ensureBsonRead(); @@ -120,6 +173,13 @@ public RubyFixnum getInt32() { return int32; } + /** + * Get a UTF-8 string from the buffer. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "get_string") public RubyString getString() { ensureBsonRead(); @@ -133,6 +193,13 @@ public RubyString getString() { return string; } + /** + * Get a 64 bit integer from the buffer. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "get_int64") public RubyBignum getInt64() { ensureBsonRead(); @@ -141,6 +208,15 @@ public RubyBignum getInt64() { return int64; } + /** + * Put a single byte onto the buffer. + * + * @param value The byte to write. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "put_byte") public ByteBuf putByte(final IRubyObject value) { ensureBsonWrite(); @@ -149,6 +225,15 @@ public ByteBuf putByte(final IRubyObject value) { return this; } + /** + * Put raw bytes onto the buffer. + * + * @param value The bytes to write. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "put_bytes") public ByteBuf putBytes(final IRubyObject value) { ensureBsonWrite(); @@ -158,6 +243,15 @@ public ByteBuf putBytes(final IRubyObject value) { return this; } + /** + * Put a double onto the buffer. + * + * @param value the double to write. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "put_double") public ByteBuf putDouble(final IRubyObject value) { ensureBsonWrite(); @@ -166,6 +260,15 @@ public ByteBuf putDouble(final IRubyObject value) { return this; } + /** + * Put a 32 bit integer onto the buffer. + * + * @param value The integer to write. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "put_int32") public ByteBuf putInt32(final IRubyObject value) { ensureBsonWrite(); @@ -174,6 +277,15 @@ public ByteBuf putInt32(final IRubyObject value) { return this; } + /** + * Put a 64 bit integer onto the buffer. + * + * @param value The integer to write. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "put_int64") public ByteBuf putInt64(final IRubyObject value) { ensureBsonWrite(); @@ -182,6 +294,15 @@ public ByteBuf putInt64(final IRubyObject value) { return this; } + /** + * Put a UTF-8 string onto the buffer. + * + * @param value The UTF-8 string to write. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "put_string") public ByteBuf putString(final IRubyObject value) { ensureBsonWrite(); @@ -193,11 +314,25 @@ public ByteBuf putString(final IRubyObject value) { return this; } + /** + * Get the read position of the buffer. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "read_position") public RubyFixnum getReadPosition() { return new RubyFixnum(getRuntime(), this.readPosition); } + /** + * Get the write position of the buffer. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ @JRubyMethod(name = "write_position") public RubyFixnum getWritePosition() { return new RubyFixnum(getRuntime(), this.writePosition); @@ -205,6 +340,10 @@ public RubyFixnum getWritePosition() { /** * Convert the byte buffer to a string of the bytes. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 */ @JRubyMethod(name = "to_s") public RubyString toRubyString() { @@ -220,9 +359,6 @@ private void ensureBsonRead() { } } - /** - * This will grow the underlying byte buffer if the remaining size is too small. - */ private void ensureBsonWrite() { if (this.mode == Mode.READ) { this.buffer.flip(); From a4b1c783b8f433de61b0e0084d5ee9e4ef1d8071 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Tue, 29 Sep 2015 12:01:02 +0200 Subject: [PATCH 23/29] RUBY-1019: Start to handle byte replacement and encoding on jruby --- src/main/org/bson/ByteBuf.java | 81 +++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/src/main/org/bson/ByteBuf.java b/src/main/org/bson/ByteBuf.java index 4dce7d93c..31da058e6 100644 --- a/src/main/org/bson/ByteBuf.java +++ b/src/main/org/bson/ByteBuf.java @@ -16,10 +16,14 @@ package org.bson; +import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Arrays; +import org.jcodings.Encoding; +import org.jcodings.EncodingDB; + import org.jruby.Ruby; import org.jruby.RubyBignum; import org.jruby.RubyClass; @@ -30,6 +34,7 @@ import org.jruby.RubyString; import org.jruby.anno.JRubyMethod; import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.util.ByteList; /** * Provides native extensions around boolean operations. @@ -43,6 +48,11 @@ public class ByteBuf extends RubyObject { */ private static byte NULL_BYTE = 0x00; + /** + * Constant for UTF-8 encoding. + */ + private static Encoding UTF_8 = EncodingDB.getEncodings().get("UTF-8".getBytes()).getEncoding(); + /** * The modes for the buffer. */ @@ -143,6 +153,26 @@ public RubyString getBytes(final IRubyObject value) { return string; } + /** + * Get a cstring from the buffer. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ + @JRubyMethod(name = "get_cstring") + public RubyString getCString() { + ensureBsonRead(); + ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + byte next = NULL_BYTE; + while((next = this.buffer.get()) != NULL_BYTE) { + bytes.write(next); + } + RubyString string = RubyString.newString(getRuntime(), bytes.toByteArray()); + this.readPosition += (bytes.size() + 1); + return string; + } + /** * Get a double from the buffer. * @@ -188,7 +218,7 @@ public RubyString getString() { byte[] stringBytes = new byte[length]; this.buffer.get(stringBytes); byte[] bytes = Arrays.copyOfRange(stringBytes, 0, stringBytes.length - 1); - RubyString string = RubyString.newString(getRuntime(), bytes); + RubyString string = RubyString.newString(getRuntime(), new ByteList(bytes, UTF_8)); this.readPosition += length; return string; } @@ -243,6 +273,25 @@ public ByteBuf putBytes(final IRubyObject value) { return this; } + /** + * Put a cstring onto the buffer. + * + * @param value The cstring to write. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ + @JRubyMethod(name = "put_cstring") + public ByteBuf putCString(final IRubyObject value) { + ensureBsonWrite(); + byte[] bytes = ((RubyString) value).getBytes(); + this.buffer.put(bytes); + this.buffer.put(NULL_BYTE); + this.writePosition += (bytes.length + 1); + return this; + } + /** * Put a double onto the buffer. * @@ -314,6 +363,36 @@ public ByteBuf putString(final IRubyObject value) { return this; } + /** + * Replace a 32 bit integer at the provided index in the buffer. + * + * @param index The index to replace at. + * @param value The value to replace with. + * + * @author Durran Jordan + * @since 2015.09.26 + * @version 4.0.0 + */ + @JRubyMethod(name = "replace_int32") + public ByteBuf replaceInt32(final IRubyObject index, final IRubyObject value) { + int i = RubyNumeric.fix2int((RubyFixnum) index); + int int32 = RubyNumeric.fix2int((RubyFixnum) value); + this.buffer.putInt(i, int32); + return this; + } + + /** + * Get the total length of the buffer. + * + * @author Durran Jordan + * @since 2015.09.29 + * @version 4.0.0 + */ + @JRubyMethod(name = "length") + public RubyFixnum getLength() { + return getWritePosition(); + } + /** * Get the read position of the buffer. * From b1b2fe7206dd2cee317b30692e71f4c9f7674e00 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Thu, 8 Oct 2015 15:00:58 +0200 Subject: [PATCH 24/29] RUBY-1019: More work on Java side --- src/main/org/bson/ByteBuf.java | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/main/org/bson/ByteBuf.java b/src/main/org/bson/ByteBuf.java index 31da058e6..0b5f7ca4b 100644 --- a/src/main/org/bson/ByteBuf.java +++ b/src/main/org/bson/ByteBuf.java @@ -17,6 +17,7 @@ package org.bson; import java.io.ByteArrayOutputStream; +import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Arrays; @@ -48,10 +49,15 @@ public class ByteBuf extends RubyObject { */ private static byte NULL_BYTE = 0x00; + /** + * The UTF-8 String. + */ + private static String UTF8 = "UTF-8".intern(); + /** * Constant for UTF-8 encoding. */ - private static Encoding UTF_8 = EncodingDB.getEncodings().get("UTF-8".getBytes()).getEncoding(); + private static Encoding UTF_8 = EncodingDB.getEncodings().get(UTF8.getBytes()).getEncoding(); /** * The modes for the buffer. @@ -168,7 +174,7 @@ public RubyString getCString() { while((next = this.buffer.get()) != NULL_BYTE) { bytes.write(next); } - RubyString string = RubyString.newString(getRuntime(), bytes.toByteArray()); + RubyString string = getUTF8String(bytes.toByteArray()); this.readPosition += (bytes.size() + 1); return string; } @@ -218,7 +224,7 @@ public RubyString getString() { byte[] stringBytes = new byte[length]; this.buffer.get(stringBytes); byte[] bytes = Arrays.copyOfRange(stringBytes, 0, stringBytes.length - 1); - RubyString string = RubyString.newString(getRuntime(), new ByteList(bytes, UTF_8)); + RubyString string = getUTF8String(bytes); this.readPosition += length; return string; } @@ -283,9 +289,9 @@ public ByteBuf putBytes(final IRubyObject value) { * @version 4.0.0 */ @JRubyMethod(name = "put_cstring") - public ByteBuf putCString(final IRubyObject value) { + public ByteBuf putCString(final IRubyObject value) throws UnsupportedEncodingException { ensureBsonWrite(); - byte[] bytes = ((RubyString) value).getBytes(); + byte[] bytes = getUTF8Bytes((RubyString) value); this.buffer.put(bytes); this.buffer.put(NULL_BYTE); this.writePosition += (bytes.length + 1); @@ -353,9 +359,9 @@ public ByteBuf putInt64(final IRubyObject value) { * @version 4.0.0 */ @JRubyMethod(name = "put_string") - public ByteBuf putString(final IRubyObject value) { + public ByteBuf putString(final IRubyObject value) throws UnsupportedEncodingException { ensureBsonWrite(); - byte[] bytes = ((RubyString) value).getBytes(); + byte[] bytes = getUTF8Bytes((RubyString) value); this.buffer.putInt(bytes.length + 1); this.buffer.put(bytes); this.buffer.put(NULL_BYTE); @@ -432,6 +438,14 @@ public RubyString toRubyString() { return RubyString.newString(getRuntime(), bytes); } + private byte[] getUTF8Bytes(final RubyString value) throws UnsupportedEncodingException { + return value.asJavaString().getBytes(UTF8); + } + + private RubyString getUTF8String(final byte[] bytes) { + return RubyString.newString(getRuntime(), new ByteList(bytes, UTF_8)); + } + private void ensureBsonRead() { if (this.mode == Mode.WRITE) { this.buffer.flip(); From 44b8abd43aa8c05fce1621b5cdf6a6f83ce7f463 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Wed, 14 Oct 2015 19:56:40 +0200 Subject: [PATCH 25/29] RUBY-1019: Reintroduce objectid ext --- .../{portable_endian.h => native-endian.h} | 0 ext/bson/native.c | 65 ++++++++++++++++++- lib/bson/object_id.rb | 21 +++--- 3 files changed, 76 insertions(+), 10 deletions(-) rename ext/bson/{portable_endian.h => native-endian.h} (100%) diff --git a/ext/bson/portable_endian.h b/ext/bson/native-endian.h similarity index 100% rename from ext/bson/portable_endian.h rename to ext/bson/native-endian.h diff --git a/ext/bson/native.c b/ext/bson/native.c index a048b2947..cc0fe6098 100644 --- a/ext/bson/native.c +++ b/ext/bson/native.c @@ -16,10 +16,15 @@ #include #include #include -#include "portable_endian.h" +#include +#include "native-endian.h" #define BSON_BYTE_BUFFER_SIZE 512 +#ifndef HOST_NAME_HASH_MAX +#define HOST_NAME_HASH_MAX 256 +#endif + typedef struct { size_t size; size_t write_position; @@ -65,6 +70,7 @@ static VALUE rb_bson_byte_buffer_read_position(VALUE self); static VALUE rb_bson_byte_buffer_replace_int32(VALUE self, VALUE index, VALUE i); static VALUE rb_bson_byte_buffer_write_position(VALUE self); static VALUE rb_bson_byte_buffer_to_s(VALUE self); +static VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self); static size_t rb_bson_byte_buffer_memsize(const void *ptr); static void rb_bson_byte_buffer_free(void *ptr); @@ -77,6 +83,16 @@ static const rb_data_type_t rb_byte_buffer_data_type = { 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; +/** + * Holds the machine id hash for object id generation. + */ +static char rb_bson_machine_id_hash[HOST_NAME_HASH_MAX]; + +/** + * The counter for incrementing object ids. + */ +static unsigned int rb_bson_object_id_counter = 0; + /** * Initialize the native extension. */ @@ -84,6 +100,8 @@ void Init_native() { VALUE rb_bson_module = rb_define_module("BSON"); VALUE rb_byte_buffer_class = rb_define_class_under(rb_bson_module, "ByteBuffer", rb_cObject); + VALUE rb_bson_object_id_class = rb_const_get(rb_bson_module, rb_intern("ObjectId")); + VALUE rb_bson_object_id_generator_class = rb_const_get(rb_bson_object_id_class, rb_intern("Generator")); rb_define_alloc_func(rb_byte_buffer_class, rb_bson_byte_buffer_allocate); rb_define_method(rb_byte_buffer_class, "initialize", rb_bson_byte_buffer_initialize, -1); @@ -106,6 +124,17 @@ void Init_native() rb_define_method(rb_byte_buffer_class, "replace_int32", rb_bson_byte_buffer_replace_int32, 2); rb_define_method(rb_byte_buffer_class, "write_position", rb_bson_byte_buffer_write_position, 0); rb_define_method(rb_byte_buffer_class, "to_s", rb_bson_byte_buffer_to_s, 0); + rb_define_method(rb_bson_object_id_generator_class, "next_object_id", rb_bson_object_id_generator_next, -1); + + // Get the object id machine id and hash it. + rb_require("digest/md5"); + VALUE rb_digest_class = rb_const_get(rb_cObject, rb_intern("Digest")); + VALUE rb_md5_class = rb_const_get(rb_digest_class, rb_intern("MD5")); + char rb_bson_machine_id[256]; + gethostname(rb_bson_machine_id, sizeof(rb_bson_machine_id)); + rb_bson_machine_id[255] = '\0'; + VALUE digest = rb_funcall(rb_md5_class, rb_intern("digest"), 1, rb_str_new2(rb_bson_machine_id)); + memcpy(rb_bson_machine_id_hash, RSTRING_PTR(digest), RSTRING_LEN(digest)); } /** @@ -474,6 +503,40 @@ void rb_bson_expand_buffer(byte_buffer_t* buffer_ptr, size_t length) } } +/** + * Generate the next object id. + */ +VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self) +{ + char bytes[12]; + unsigned long t; + unsigned short pid = htons(getpid()); + + if (argc == 0 || (argc == 1 && *args == Qnil)) { + t = htonl((int) time(NULL)); + } + else { + t = htonl(NUM2UINT(rb_funcall(*args, rb_intern("to_i"), 0))); + } + + unsigned long c; + c = htonl(rb_bson_object_id_counter << 8); + +# if __BYTE_ORDER == __LITTLE_ENDIAN + memcpy(&bytes, &t, 4); + memcpy(&bytes[4], rb_bson_machine_id_hash, 3); + memcpy(&bytes[7], &pid, 2); + memcpy(&bytes[9], (unsigned char*) &c, 3); +#elif __BYTE_ORDER == __BIG_ENDIAN + memcpy(&bytes, ((unsigned char*) &t) + 4, 4); + memcpy(&bytes[4], rb_bson_machine_id_hash, 3); + memcpy(&bytes[7], &pid, 2); + memcpy(&bytes[9], ((unsigned char*) &c) + 4, 3); +#endif + rb_bson_object_id_counter++; + return rb_str_new(bytes, 12); +} + /** * Taken from libbson. */ diff --git a/lib/bson/object_id.rb b/lib/bson/object_id.rb index 8e70b93d2..fc52f8300 100644 --- a/lib/bson/object_id.rb +++ b/lib/bson/object_id.rb @@ -44,7 +44,7 @@ class ObjectId # @since 2.0.0 def ==(other) return false unless other.is_a?(ObjectId) - to_bson.to_s == other.to_bson.to_s + generate_data == other.send(:generate_data) end alias :eql? :== @@ -86,7 +86,7 @@ def as_json(*args) # # @since 2.0.0 def <=>(other) - to_bson.to_s <=> other.to_bson.to_s + generate_data <=> other.send(:generate_data) end # Return the UTC time at which this ObjectId was generated. This may @@ -100,7 +100,7 @@ def <=>(other) # # @since 2.0.0 def generation_time - ::Time.at(to_bson.to_s.unpack("N")[0]).utc + ::Time.at(generate_data.unpack("N")[0]).utc end # Get the hash value for the object id. @@ -112,7 +112,7 @@ def generation_time # # @since 2.0.0 def hash - to_bson.to_s.hash + generate_data.hash end # Get a nice string for use with object inspection. @@ -136,7 +136,7 @@ def inspect # # @since 2.0.0 def marshal_dump - to_bson.to_s + generate_data end # Unmarshal the data into an object id. @@ -169,9 +169,7 @@ def marshal_load(data) # # @since 2.0.0 def to_bson(buffer = ByteBuffer.new) - repair if defined?(@data) - @raw_data ||= @@generator.next_object_id - buffer.put_bytes(@raw_data) + buffer.put_bytes(generate_data) end # Get the string representation of the object id. @@ -183,7 +181,7 @@ def to_bson(buffer = ByteBuffer.new) # # @since 2.0.0 def to_s - to_bson.to_s.to_hex_string.force_encoding(UTF8) + generate_data.to_hex_string.force_encoding(UTF8) end alias :to_str :to_s @@ -194,6 +192,11 @@ class Invalid < RuntimeError; end private + def generate_data + repair if defined?(@data) + @raw_data ||= @@generator.next_object_id + end + def repair @raw_data = @data.to_bson_object_id remove_instance_variable(:@data) From c27601e24a0ccf12b0af7e79cce84d27d3588824 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Mon, 19 Oct 2015 18:59:25 +0200 Subject: [PATCH 26/29] RUBY-1019: Expanding Java buffer --- spec/bson/byte_buffer_spec.rb | 43 ++++++----- spec/bson/document_spec.rb | 10 ++- src/main/org/bson/ByteBuf.java | 102 +++++++++++++++++++++------ src/main/org/bson/NativeService.java | 2 +- 4 files changed, 115 insertions(+), 42 deletions(-) diff --git a/spec/bson/byte_buffer_spec.rb b/spec/bson/byte_buffer_spec.rb index 4266a1c59..a211d51be 100644 --- a/spec/bson/byte_buffer_spec.rb +++ b/spec/bson/byte_buffer_spec.rb @@ -374,15 +374,6 @@ expect(modified.write_position).to eq(12) end end - - context 'when the string is not UTF-8' do - - # it 'raises an exception' do - # expect { - # buffer.put_string('gültig'.encode("iso-8859-1")) - # }.to raise_error(ArgumentError) - # end - end end context 'when the buffer needs to be expanded' do @@ -395,16 +386,36 @@ 300.times.inject(""){ |s, i| s << "#{i}" } end - let!(:modified) do - buffer.put_string(string) - end + context 'when no bytes exist in the buffer' do + + let!(:modified) do + buffer.put_string(string) + end + + it 'appends the string to the byte buffer' do + expect(modified.to_s).to eq("#{(string.bytesize + 1).to_bson.to_s}#{string}#{BSON::NULL_BYTE}") + end - it 'appends the string to the byte buffer' do - expect(modified.to_s).to eq("#{(string.bytesize + 1).to_bson.to_s}#{string}#{BSON::NULL_BYTE}") + it 'increments the write position by length + 5' do + expect(modified.write_position).to eq(string.bytesize + 5) + end end - it 'increments the write position by length + 5' do - expect(modified.write_position).to eq(string.bytesize + 5) + context 'when bytes exist in the buffer' do + + let!(:modified) do + buffer.put_int32(4).put_string(string) + end + + it 'appends the string to the byte buffer' do + expect(modified.to_s).to eq( + "#{[ 4 ].pack(BSON::Int32::PACK)}#{(string.bytesize + 1).to_bson.to_s}#{string}#{BSON::NULL_BYTE}" + ) + end + + it 'increments the write position by length + 5' do + expect(modified.write_position).to eq(string.bytesize + 9) + end end end end diff --git a/spec/bson/document_spec.rb b/spec/bson/document_spec.rb index 021e158de..3822bc48c 100644 --- a/spec/bson/document_spec.rb +++ b/spec/bson/document_spec.rb @@ -819,18 +819,24 @@ described_class["type", string.encode("iso-8859-1")] end - it "raises an exception" do + it "raises an exception", unless: BSON::Environment.jruby? do expect { document.to_bson }.to raise_error(ArgumentError) end + + it 'converts the values', if: BSON::Environment.jruby? do + expect( + BSON::Document.from_bson(BSON::ByteBuffer.new(document.to_bson.to_s)) + ).to eq({ "type" => string }) + end end context "when binary strings with utf-8 values exist" do let(:string) { "europäisch" } let(:document) do - described_class["type", string.encode("binary", "binary")] + described_class["type", string.encode("binary")] end it "encodes and decodes the document properly" do diff --git a/src/main/org/bson/ByteBuf.java b/src/main/org/bson/ByteBuf.java index 0b5f7ca4b..bfb905888 100644 --- a/src/main/org/bson/ByteBuf.java +++ b/src/main/org/bson/ByteBuf.java @@ -37,6 +37,8 @@ import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; +import static java.lang.String.format; + /** * Provides native extensions around boolean operations. * @@ -49,6 +51,11 @@ public class ByteBuf extends RubyObject { */ private static byte NULL_BYTE = 0x00; + /** + * The default size of the buffer. + */ + private static int DEFAULT_SIZE = 512; + /** * The UTF-8 String. */ @@ -104,7 +111,7 @@ public ByteBuf(final Ruby runtime, final RubyClass rubyClass) { */ @JRubyMethod(name = "initialize") public IRubyObject intialize() { - this.buffer = ByteBuffer.allocate(1024).order(ByteOrder.LITTLE_ENDIAN); + this.buffer = ByteBuffer.allocate(DEFAULT_SIZE).order(ByteOrder.LITTLE_ENDIAN); this.mode = Mode.WRITE; return null; } @@ -255,7 +262,7 @@ public RubyBignum getInt64() { */ @JRubyMethod(name = "put_byte") public ByteBuf putByte(final IRubyObject value) { - ensureBsonWrite(); + ensureBsonWrite(1); this.buffer.put(((RubyString) value).getBytes()[0]); this.writePosition += 1; return this; @@ -272,8 +279,8 @@ public ByteBuf putByte(final IRubyObject value) { */ @JRubyMethod(name = "put_bytes") public ByteBuf putBytes(final IRubyObject value) { - ensureBsonWrite(); byte[] bytes = ((RubyString) value).getBytes(); + ensureBsonWrite(bytes.length); this.buffer.put(bytes); this.writePosition += bytes.length; return this; @@ -290,11 +297,8 @@ public ByteBuf putBytes(final IRubyObject value) { */ @JRubyMethod(name = "put_cstring") public ByteBuf putCString(final IRubyObject value) throws UnsupportedEncodingException { - ensureBsonWrite(); - byte[] bytes = getUTF8Bytes((RubyString) value); - this.buffer.put(bytes); - this.buffer.put(NULL_BYTE); - this.writePosition += (bytes.length + 1); + String string = ((RubyString) value).asJavaString(); + this.writePosition += writeCharacters(string, true); return this; } @@ -309,7 +313,7 @@ public ByteBuf putCString(final IRubyObject value) throws UnsupportedEncodingExc */ @JRubyMethod(name = "put_double") public ByteBuf putDouble(final IRubyObject value) { - ensureBsonWrite(); + ensureBsonWrite(8); this.buffer.putDouble(((RubyFloat) value).getDoubleValue()); this.writePosition += 8; return this; @@ -326,7 +330,7 @@ public ByteBuf putDouble(final IRubyObject value) { */ @JRubyMethod(name = "put_int32") public ByteBuf putInt32(final IRubyObject value) { - ensureBsonWrite(); + ensureBsonWrite(4); this.buffer.putInt(RubyNumeric.fix2int((RubyFixnum) value)); this.writePosition += 4; return this; @@ -343,7 +347,10 @@ public ByteBuf putInt32(final IRubyObject value) { */ @JRubyMethod(name = "put_int64") public ByteBuf putInt64(final IRubyObject value) { - ensureBsonWrite(); + if (value instanceof RubyBignum) { + throw getRuntime().newRangeError("Value is too large for a 64bit integer"); + } + ensureBsonWrite(8); this.buffer.putLong(RubyNumeric.fix2long((RubyFixnum) value)); this.writePosition += 8; return this; @@ -360,12 +367,11 @@ public ByteBuf putInt64(final IRubyObject value) { */ @JRubyMethod(name = "put_string") public ByteBuf putString(final IRubyObject value) throws UnsupportedEncodingException { - ensureBsonWrite(); - byte[] bytes = getUTF8Bytes((RubyString) value); - this.buffer.putInt(bytes.length + 1); - this.buffer.put(bytes); - this.buffer.put(NULL_BYTE); - this.writePosition += (bytes.length + 5); + String string = ((RubyString) value).asJavaString(); + this.buffer.putInt(0); + int length = writeCharacters(string, false); + this.buffer.putInt(this.buffer.position() - length - 4, length); + this.writePosition += (length + 4); return this; } @@ -438,10 +444,6 @@ public RubyString toRubyString() { return RubyString.newString(getRuntime(), bytes); } - private byte[] getUTF8Bytes(final RubyString value) throws UnsupportedEncodingException { - return value.asJavaString().getBytes(UTF8); - } - private RubyString getUTF8String(final byte[] bytes) { return RubyString.newString(getRuntime(), new ByteList(bytes, UTF_8)); } @@ -452,10 +454,64 @@ private void ensureBsonRead() { } } - private void ensureBsonWrite() { + private void ensureBsonWrite(int length) { if (this.mode == Mode.READ) { this.buffer.flip(); } - // if size of item > limit, increase the buffer. + if (length > this.buffer.remaining()) { + int size = this.buffer.position() + length + DEFAULT_SIZE; + ByteBuffer newBuffer = ByteBuffer.allocate(size).order(ByteOrder.LITTLE_ENDIAN); + if (this.buffer.position() > 0) { + byte [] existing = new byte[this.buffer.position()]; + this.buffer.rewind(); + this.buffer.get(existing); + newBuffer.put(existing); + } + this.buffer = newBuffer; + } + } + + private void write(byte b) { + ensureBsonWrite(1); + this.buffer.put(b); + } + + private int writeCharacters(final String string, final boolean checkForNull) { + int len = string.length(); + int total = 0; + + for (int i = 0; i < len;) { + int c = Character.codePointAt(string, i); + + if (checkForNull && c == 0x0) { + throw getRuntime().newArgumentError(format("String %s is not a valid UTF-8 CString.", string)); + } + + if (c < 0x80) { + write((byte) c); + total += 1; + } else if (c < 0x800) { + write((byte) (0xc0 + (c >> 6))); + write((byte) (0x80 + (c & 0x3f))); + total += 2; + } else if (c < 0x10000) { + write((byte) (0xe0 + (c >> 12))); + write((byte) (0x80 + ((c >> 6) & 0x3f))); + write((byte) (0x80 + (c & 0x3f))); + total += 3; + } else { + write((byte) (0xf0 + (c >> 18))); + write((byte) (0x80 + ((c >> 12) & 0x3f))); + write((byte) (0x80 + ((c >> 6) & 0x3f))); + write((byte) (0x80 + (c & 0x3f))); + total += 4; + } + + i += Character.charCount(c); + } + + write((byte) 0); + total++; + return total; } } diff --git a/src/main/org/bson/NativeService.java b/src/main/org/bson/NativeService.java index 61138a41a..27ebc0149 100644 --- a/src/main/org/bson/NativeService.java +++ b/src/main/org/bson/NativeService.java @@ -57,6 +57,7 @@ public class NativeService implements BasicLibraryService { */ public boolean basicLoad(final Ruby runtime) throws IOException { RubyModule bson = runtime.fastGetModule(BSON); + GeneratorExtension.extend(bson); RubyClass byteBuffer = bson.defineClassUnder("ByteBuffer", runtime.getObject(), new ObjectAllocator() { public IRubyObject allocate(Ruby runtime, RubyClass rubyClass) { @@ -65,7 +66,6 @@ public IRubyObject allocate(Ruby runtime, RubyClass rubyClass) { }); byteBuffer.defineAnnotatedMethods(ByteBuf.class); - return true; } } From 24e02f26269c4e0678c38c6e0571bbeeb9929d44 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Mon, 19 Oct 2015 21:13:42 +0200 Subject: [PATCH 27/29] RUBY-1019: Fix binary deserialization on JRuby --- src/main/org/bson/ByteBuf.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/org/bson/ByteBuf.java b/src/main/org/bson/ByteBuf.java index bfb905888..8ea9795af 100644 --- a/src/main/org/bson/ByteBuf.java +++ b/src/main/org/bson/ByteBuf.java @@ -160,8 +160,9 @@ public RubyString getByte() { public RubyString getBytes(final IRubyObject value) { ensureBsonRead(); int length = RubyNumeric.fix2int((RubyFixnum) value); - ByteBuffer buff = this.buffer.get(new byte[length]); - RubyString string = RubyString.newString(getRuntime(), buff.array()); + byte[] bytes = new byte[length]; + ByteBuffer buff = this.buffer.get(bytes); + RubyString string = RubyString.newString(getRuntime(), bytes); this.readPosition += length; return string; } From 6c5dc3193c754985458c42fb2f4bcfad123f94e7 Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Mon, 19 Oct 2015 21:16:16 +0200 Subject: [PATCH 28/29] RUBY-1019: Fix encoding MRI --- spec/bson/document_spec.rb | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/spec/bson/document_spec.rb b/spec/bson/document_spec.rb index 3822bc48c..c111cdc17 100644 --- a/spec/bson/document_spec.rb +++ b/spec/bson/document_spec.rb @@ -832,7 +832,7 @@ end end - context "when binary strings with utf-8 values exist" do + context "when binary strings with utf-8 values exist", if: BSON::Environment.jruby? do let(:string) { "europäisch" } let(:document) do @@ -845,5 +845,19 @@ ).to eq({ "type" => string }) end end + + context "when binary strings with utf-8 values exist", unless: BSON::Environment.jruby? do + + let(:string) { "europäisch" } + let(:document) do + described_class["type", string.encode("binary", "binary")] + end + + it "encodes and decodes the document properly" do + expect( + BSON::Document.from_bson(BSON::ByteBuffer.new(document.to_bson.to_s)) + ).to eq({ "type" => string }) + end + end end end From baa8844677f34ec2db500a3fa9e7cb601ce8e89f Mon Sep 17 00:00:00 2001 From: Durran Jordan Date: Mon, 19 Oct 2015 21:27:26 +0200 Subject: [PATCH 29/29] RUBY-1019: Bump to 4.0 --- lib/bson/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bson/version.rb b/lib/bson/version.rb index 3cb191739..de86407ea 100644 --- a/lib/bson/version.rb +++ b/lib/bson/version.rb @@ -13,5 +13,5 @@ # limitations under the License. module BSON - VERSION = "3.2.6" + VERSION = "4.0.0" end