Skip to content
Permalink
Browse files

Optimized away the creation of empty string objects.

Prior to this CL, creating an empty message object would create
two empty string objects for every declared field.  First we
created a unique string object for the field's default.  Then
we created yet another string object when we assigned the
default value into the message: we called #encode to ensure
that the string would have the correct encoding and be frozen.

I optimized these unnecessary objects away with two fixes:

1. Memoize the empty string so that we don't create a new empty
   string for every field's default.
2. If we are assigning a string to a message object, avoid creating
   a new string if the assigned string has the correct encoding and
   is already frozen.
  • Loading branch information...
haberman committed Aug 13, 2019
1 parent c132a4a commit 1e37a94bb5efcdd6d8adc3d0aeb907b4f85c52ad
Showing with 49 additions and 15 deletions.
  1. +31 −0 ruby/ext/google/protobuf_c/protobuf.c
  2. +5 −0 ruby/ext/google/protobuf_c/protobuf.h
  3. +13 −15 ruby/ext/google/protobuf_c/storage.c
@@ -51,6 +51,32 @@ VALUE get_def_obj(const void* def) {
return rb_hash_aref(upb_def_to_ruby_obj_map, ULL2NUM((intptr_t)def));
}

static VALUE cached_empty_string = Qnil;
static VALUE cached_empty_bytes = Qnil;

static VALUE create_frozen_string(const char* str, size_t size, bool binary) {
VALUE str_rb = rb_str_new(str, size);

rb_enc_associate(str_rb,
binary ? kRubyString8bitEncoding : kRubyStringUtf8Encoding);
rb_obj_freeze(str_rb);
return str_rb;
}

VALUE get_frozen_string(const char* str, size_t size, bool binary) {
if (size == 0) {
return binary ? cached_empty_bytes : cached_empty_string;
} else {
// It is harder to memoize non-empty strings. The obvious approach would be
// to use a Ruby hash keyed by string as memo table, but looking up in such a table
// requires constructing a string (the very thing we're trying to avoid).
//
// Since few fields have defaults, we will just optimize the empty string
// case for now.
return create_frozen_string(str, size, binary);
}
}

// -----------------------------------------------------------------------------
// Utilities.
// -----------------------------------------------------------------------------
@@ -118,4 +144,9 @@ void Init_protobuf_c() {

rb_gc_register_address(&upb_def_to_ruby_obj_map);
upb_def_to_ruby_obj_map = rb_hash_new();

rb_gc_register_address(&cached_empty_string);
rb_gc_register_address(&cached_empty_bytes);
cached_empty_string = create_frozen_string("", 0, false);
cached_empty_bytes = create_frozen_string("", 0, true);
}
@@ -591,6 +591,11 @@ const upb_pbdecodermethod *new_fillmsg_decodermethod(
// cycles.
#define ENCODE_MAX_NESTING 63

// -----------------------------------------------------------------------------
// A cache of frozen string objects to use as field defaults.
// -----------------------------------------------------------------------------
VALUE get_frozen_string(const char* data, size_t size, bool binary);

// -----------------------------------------------------------------------------
// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
// instances.
@@ -96,17 +96,19 @@ VALUE native_slot_encode_and_freeze_string(upb_fieldtype_t type, VALUE value) {
kRubyStringUtf8Encoding : kRubyString8bitEncoding;
VALUE desired_encoding_value = rb_enc_from_encoding(desired_encoding);

// Note: this will not duplicate underlying string data unless necessary.
value = rb_str_encode(value, desired_encoding_value, 0, Qnil);
if (rb_obj_encoding(value) != desired_encoding_value || !OBJ_FROZEN(value)) {
// Note: this will not duplicate underlying string data unless necessary.
value = rb_str_encode(value, desired_encoding_value, 0, Qnil);

if (type == UPB_TYPE_STRING &&
rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncodingError, "String is invalid UTF-8");
}
if (type == UPB_TYPE_STRING &&
rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
rb_raise(rb_eEncodingError, "String is invalid UTF-8");
}

// Ensure the data remains valid. Since we called #encode a moment ago,
// this does not freeze the string the user assigned.
rb_obj_freeze(value);
// Ensure the data remains valid. Since we called #encode a moment ago,
// this does not freeze the string the user assigned.
rb_obj_freeze(value);
}

return value;
}
@@ -729,12 +731,8 @@ VALUE layout_get_default(const upb_fielddef *field) {
case UPB_TYPE_BYTES: {
size_t size;
const char *str = upb_fielddef_defaultstr(field, &size);
VALUE str_rb = rb_str_new(str, size);

rb_enc_associate(str_rb, (upb_fielddef_type(field) == UPB_TYPE_BYTES) ?
kRubyString8bitEncoding : kRubyStringUtf8Encoding);
rb_obj_freeze(str_rb);
return str_rb;
return get_frozen_string(str, size,
upb_fielddef_type(field) == UPB_TYPE_BYTES);
}
default: return Qnil;
}

0 comments on commit 1e37a94

Please sign in to comment.
You can’t perform that action at this time.