diff --git a/build_defs/upb.patch b/build_defs/upb.patch index ec1f28b50040..809fb7054966 100644 --- a/build_defs/upb.patch +++ b/build_defs/upb.patch @@ -9,22 +9,4 @@ # end:github_only def _upbc(stage): - ---- cmake/build_defs.bzl -+++ cmake/build_defs.bzl -@@ -25,7 +25,7 @@ - - """Bazel support functions related to CMake support.""" - --def staleness_test(name, outs, generated_pattern, target_files = None, **kwargs): -+def staleness_test(name, outs, generated_pattern, target_files = None, tags = [], **kwargs): - """Tests that checked-in file(s) match the contents of generated file(s). - - The resulting test will verify that all output files exist and have the -@@ -72,5 +72,6 @@ def staleness_test(name, outs, generated_pattern, target_files = None, **kwargs) - deps = [ - Label("//cmake:staleness_test_lib"), - ], -+ tags = ["staleness_test"] + tags, - **kwargs - ) + \ No newline at end of file diff --git a/php/ext/google/protobuf/php-upb.c b/php/ext/google/protobuf/php-upb.c index ef9572417e46..2dac3ad0cb94 100644 --- a/php/ext/google/protobuf/php-upb.c +++ b/php/ext/google/protobuf/php-upb.c @@ -641,14 +641,12 @@ static int (*const compar[kUpb_FieldType_SizeOf])(const void*, const void*) = { [kUpb_FieldType_Bytes] = _upb_mapsorter_cmpstr, }; -bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, - const upb_Map* map, _upb_sortedmap* sorted) { - int map_size = _upb_Map_Size(map); +static bool _upb_mapsorter_resize(_upb_mapsorter* s, _upb_sortedmap* sorted, + int size) { sorted->start = s->size; sorted->pos = sorted->start; - sorted->end = sorted->start + map_size; + sorted->end = sorted->start + size; - // Grow s->entries if necessary. if (sorted->end > s->cap) { s->cap = upb_Log2CeilingSize(sorted->end); s->entries = realloc(s->entries, s->cap * sizeof(*s->entries)); @@ -656,9 +654,17 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, } s->size = sorted->end; + return true; +} + +bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, + const upb_Map* map, _upb_sortedmap* sorted) { + int map_size = _upb_Map_Size(map); + + if (!_upb_mapsorter_resize(s, sorted, map_size)) return false; // Copy non-empty entries from the table to s->entries. - upb_tabent const** dst = &s->entries[sorted->start]; + const void** dst = &s->entries[sorted->start]; const upb_tabent* src = map->table.t.entries; const upb_tabent* end = src + upb_table_size(&map->table.t); for (; src < end; src++) { @@ -674,6 +680,29 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, compar[key_type]); return true; } + +static int _upb_mapsorter_cmpext(const void* _a, const void* _b) { + const upb_Message_Extension* const* a = _a; + const upb_Message_Extension* const* b = _b; + uint32_t a_num = (*a)->ext->field.number; + uint32_t b_num = (*b)->ext->field.number; + assert(a_num != b_num); + return a_num < b_num ? -1 : 1; +} + +bool _upb_mapsorter_pushexts(_upb_mapsorter* s, + const upb_Message_Extension* exts, size_t count, + _upb_sortedmap* sorted) { + if (!_upb_mapsorter_resize(s, sorted, count)) return false; + + for (size_t i = 0; i < count; i++) { + s->entries[sorted->start + i] = &exts[i]; + } + + qsort(&s->entries[sorted->start], count, sizeof(*s->entries), + _upb_mapsorter_cmpext); + return true; +} /* This file was generated by upbc (the upb compiler) from the input * file: * @@ -2225,7 +2254,7 @@ char* upb_strdup2(const char* s, size_t len, upb_Arena* a) { n = len + 1; p = upb_Arena_Malloc(a, n); if (p) { - memcpy(p, s, len); + if (len != 0) memcpy(p, s, len); p[len] = 0; } return p; @@ -7561,9 +7590,27 @@ void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { _upb_DefBuilder_FailJmp(ctx); } +// Verify a relative identifier string. The loop is branchless for speed. +static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; + + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); + + good &= is_alpha | is_numer; + } + + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); +} + const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, const char* prefix, upb_StringView name) { + _upb_DefBuilder_CheckIdentNotFull(ctx, name); if (prefix) { // ret = prefix + '.' + name; size_t n = strlen(prefix); @@ -7679,7 +7726,7 @@ static bool TryGetChar(const char** src, const char* end, char* ch) { return true; } -static char TryGetHexDigit(const char** src, const char* end) { +static int TryGetHexDigit(const char** src, const char* end) { char ch; if (!TryGetChar(src, end, &ch)) return -1; if ('0' <= ch && ch <= '9') { @@ -7696,10 +7743,10 @@ static char TryGetHexDigit(const char** src, const char* end) { static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, const char** src, const char* end) { - char hex_digit = TryGetHexDigit(src, end); + int hex_digit = TryGetHexDigit(src, end); if (hex_digit < 0) { _upb_DefBuilder_Errf( - ctx, "\\x cannot be followed by non-hex digit in field '%s' default", + ctx, "\\x must be followed by at least one hex digit (field='%s')", upb_FieldDef_FullName(f)); return 0; } @@ -7875,7 +7922,7 @@ upb_DefPool* upb_DefPool_New(void) { } bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - upb_FieldDef* f) { + const upb_FieldDef* f) { return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), s->arena); } @@ -8107,12 +8154,6 @@ static const upb_FileDef* _upb_DefPool_AddFile( const upb_MiniTableFile* layout, upb_Status* status) { const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - if (name.size == 0) { - upb_Status_SetErrorFormat(status, - "missing name in google_protobuf_FileDescriptorProto"); - return NULL; - } - // Determine whether we already know about this file. { upb_value v; @@ -8504,7 +8545,6 @@ static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, e->file = _upb_DefBuilder_File(ctx); name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); _upb_DefBuilder_Add(ctx, e->full_name, @@ -9314,7 +9354,14 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, } const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); + + f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); + f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); + f->is_proto3_optional = + UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); + f->msgdef = m; + f->scope.oneof = NULL; f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); if (f->has_json_name) { @@ -9326,14 +9373,6 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, } if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); - f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); - f->is_proto3_optional = - UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); - f->msgdef = m; - f->scope.oneof = NULL; - const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); const bool has_type_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); @@ -9463,19 +9502,24 @@ static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix, } _upb_MessageDef_InsertField(ctx, m, f); +} - if (!ctx->layout) return; +upb_FieldDef* _upb_Extensions_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* defs = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - const upb_MiniTable* mt = upb_MessageDef_MiniTable(m); - const upb_MiniTableField* fields = mt->fields; - for (int i = 0; i < mt->field_count; i++) { - if (fields[i].number == f->number_) { - f->layout_index = i; - return; - } + for (int i = 0; i < n; i++) { + upb_FieldDef* f = &defs[i]; + + _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); + f->index_ = i; } - UPB_ASSERT(false); // It should be impossible to reach this point. + return defs; } upb_FieldDef* _upb_FieldDefs_New( @@ -9486,28 +9530,23 @@ upb_FieldDef* _upb_FieldDefs_New( upb_FieldDef* defs = (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - // If we are creating extensions then is_sorted will be NULL. - // If we are not creating extensions then is_sorted will be non-NULL. - if (is_sorted) { - uint32_t previous = 0; - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; - - _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); - f->index_ = i; - if (!ctx->layout) f->layout_index = i; - - const uint32_t current = f->number_; - if (previous > current) *is_sorted = false; - previous = current; + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + upb_FieldDef* f = &defs[i]; + + _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); + f->index_ = i; + if (!ctx->layout) { + // Speculate that the def fields are sorted. We will always sort the + // MiniTable fields, so if defs are sorted then indices will match. + // + // If this is incorrect, we will overwrite later. + f->layout_index = i; } - } else { - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; - _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); - f->index_ = i; - } + const uint32_t current = f->number_; + if (previous > current) *is_sorted = false; + previous = current; } return defs; @@ -9563,6 +9602,9 @@ static int _upb_FieldDef_Compare(const void* p1, const void* p2) { return (v1 < v2) ? -1 : (v1 > v2); } +// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one +// critical side effect that we depend on: it sets layout_index appropriately +// for non-sorted lists of fields. const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, upb_Arena* a) { // TODO(salo): Replace this arena alloc with a persistent scratch buffer. @@ -9620,7 +9662,10 @@ static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, "field number %u in extension %s has no extension range in message %s", (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); } +} +void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, + const upb_FieldDef* f) { const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f); if (ctx->layout) { @@ -9639,8 +9684,8 @@ static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); } bool ok2 = upb_MiniTableExtension_Build(desc.data, desc.size, mut_ext, - upb_MessageDef_MiniTable(m), sub, - ctx->status); + upb_MessageDef_MiniTable(f->msgdef), + sub, ctx->status); if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); } @@ -9697,6 +9742,7 @@ struct upb_FileDef { const UPB_DESC(FileOptions) * opts; const char* name; const char* package; + const char* edition; const upb_FileDef** deps; const int32_t* public_deps; @@ -9733,6 +9779,10 @@ const char* upb_FileDef_Package(const upb_FileDef* f) { return f->package ? f->package : ""; } +const char* upb_FileDef_Edition(const upb_FileDef* f) { + return f->edition ? f->edition : ""; +} + const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } @@ -9880,13 +9930,14 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, } } - if (!UPB_DESC(FileDescriptorProto_has_name)(file_proto)) { - _upb_DefBuilder_Errf(ctx, "File has no name"); + upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); + file->name = strviewdup(ctx, name); + if (strlen(file->name) != name.size) { + _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); } - file->name = strviewdup(ctx, UPB_DESC(FileDescriptorProto_name)(file_proto)); - upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); + if (package.size) { _upb_DefBuilder_CheckIdentFull(ctx, package); file->package = strviewdup(ctx, package); @@ -9894,6 +9945,18 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, file->package = NULL; } + upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); + + if (edition.size == 0) { + file->edition = NULL; + } else { + // TODO(b/267770604): How should we validate this? + file->edition = strviewdup(ctx, edition); + if (strlen(file->edition) != edition.size) { + _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); + } + } + if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); @@ -9962,8 +10025,7 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, // Create extensions. exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); file->top_lvl_ext_count = n; - file->top_lvl_exts = - _upb_FieldDefs_New(ctx, n, exts, file->package, NULL, NULL); + file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL); // Create messages. msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); @@ -9987,11 +10049,19 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, _upb_FieldDef_Resolve(ctx, file->package, f); } - if (!ctx->layout) { - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_LinkMiniTable(ctx, m); - } + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m); + } + + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, f); + } + + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_LinkMiniTable(ctx, m); } if (file->ext_count) { @@ -10504,6 +10574,8 @@ bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, const upb_MessageDef* m) { upb_StringView desc; + // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable() + // is safe to call only after this call. bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); if (!ok) _upb_DefBuilder_OomErr(ctx); @@ -10523,23 +10595,6 @@ void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { _upb_FieldDef_Resolve(ctx, m->full_name, f); } - if (!ctx->layout) { - m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); - if (!m->layout) _upb_DefBuilder_OomErr(ctx); - } - -#ifndef NDEBUG - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const int layout_index = _upb_FieldDef_LayoutIndex(f); - UPB_ASSERT(layout_index < m->layout->field_count); - const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; - UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); - UPB_ASSERT(upb_FieldDef_HasPresence(f) == - upb_MiniTableField_HasPresence(mt_f)); - } -#endif - m->in_message_set = false; for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); @@ -10602,8 +10657,39 @@ void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, if (!ok) _upb_DefBuilder_OomErr(ctx); } +void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) { + if (ctx->layout == NULL) { + m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); + } else { + UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); + m->layout = ctx->layout->msgs[ctx->msg_count++]; + UPB_ASSERT(m->field_count == m->layout->field_count); + + // We don't need the result of this call, but it will assign layout_index + // for all the fields in O(n lg n) time. + _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena); + } + + for (int i = 0; i < m->nested_msg_count; i++) { + upb_MessageDef* nested = + (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_CreateMiniTable(ctx, nested); + } +} + void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, const upb_MessageDef* m) { + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, ext); + } + + for (int i = 0; i < m->nested_msg_count; i++) { + _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + } + + if (ctx->layout) return; + for (int i = 0; i < m->field_count; i++) { const upb_FieldDef* f = upb_MessageDef_Field(m, i); const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); @@ -10631,9 +10717,17 @@ void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, } } - for (int i = 0; i < m->nested_msg_count; i++) { - _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); +#ifndef NDEBUG + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + UPB_ASSERT(layout_index < m->layout->field_count); + const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; + UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); + UPB_ASSERT(upb_FieldDef_HasPresence(f) == + upb_MiniTableField_HasPresence(mt_f)); } +#endif } static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { @@ -10766,7 +10860,6 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, m->is_sorted = true; name = UPB_DESC(DescriptorProto_name)(msg_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); @@ -10785,17 +10878,6 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); if (!ok) _upb_DefBuilder_OomErr(ctx); - if (ctx->layout) { - /* create_fielddef() below depends on this being set. */ - UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); - m->layout = ctx->layout->msgs[ctx->msg_count++]; - UPB_ASSERT(n_field == m->layout->field_count); - } else { - /* Allocate now (to allow cross-linking), populate later. */ - m->layout = _upb_DefBuilder_Alloc( - ctx, sizeof(*m->layout) + sizeof(_upb_FastTable_Entry)); - } - UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); m->oneof_count = n_oneof; @@ -10836,7 +10918,7 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, const UPB_DESC(FieldDescriptorProto)* const* exts = UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext); m->nested_ext_count = n_ext; - m->nested_exts = _upb_FieldDefs_New(ctx, n_ext, exts, m->full_name, m, NULL); + m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m); const UPB_DESC(DescriptorProto)* const* msgs = UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg); @@ -11243,7 +11325,6 @@ static void create_service(upb_DefBuilder* ctx, s->file = _upb_DefBuilder_File(ctx); name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); const char* package = _upb_FileDef_RawPackage(s->file); s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); _upb_DefBuilder_Add(ctx, s->full_name, @@ -14011,6 +14092,15 @@ static void encode_msgset_item(upb_encstate* e, encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); } +static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext, + bool is_message_set) { + if (UPB_UNLIKELY(is_message_set)) { + encode_msgset_item(e, ext); + } else { + encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); + } +} + static void encode_message(upb_encstate* e, const upb_Message* msg, const upb_MiniTable* m, size_t* size) { size_t pre_len = e->limit - e->ptr; @@ -14040,12 +14130,17 @@ static void encode_message(upb_encstate* e, const upb_Message* msg, size_t ext_count; const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); if (ext_count) { - const upb_Message_Extension* end = ext + ext_count; - for (; ext != end; ext++) { - if (UPB_UNLIKELY(m->ext == kUpb_ExtMode_IsMessageSet)) { - encode_msgset_item(e, ext); - } else { - encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); + while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + const upb_Message_Extension* end = ext + ext_count; + for (; ext != end; ext++) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); } } } diff --git a/php/ext/google/protobuf/php-upb.h b/php/ext/google/protobuf/php-upb.h index 6b554cc3427e..3de3e6d34c00 100644 --- a/php/ext/google/protobuf/php-upb.h +++ b/php/ext/google/protobuf/php-upb.h @@ -1422,179 +1422,6 @@ UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); } #include -#ifndef UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ -#define UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ - - -// Must be last. - -struct upb_Decoder; -typedef const char* _upb_FieldParser(struct upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data); -typedef struct { - uint64_t field_data; - _upb_FieldParser* field_parser; -} _upb_FastTable_Entry; - -typedef enum { - kUpb_ExtMode_NonExtendable = 0, // Non-extendable message. - kUpb_ExtMode_Extendable = 1, // Normal extendable message. - kUpb_ExtMode_IsMessageSet = 2, // MessageSet message. - kUpb_ExtMode_IsMessageSet_ITEM = - 3, // MessageSet item (temporary only, see decode.c) - - // During table building we steal a bit to indicate that the message is a map - // entry. *Only* used during table building! - kUpb_ExtMode_IsMapEntry = 4, -} upb_ExtMode; - -// upb_MiniTable represents the memory layout of a given upb_MessageDef. -// The members are public so generated code can initialize them, -// but users MUST NOT directly read or write any of its members. -struct upb_MiniTable { - const upb_MiniTableSub* subs; - const upb_MiniTableField* fields; - - // Must be aligned to sizeof(void*). Doesn't include internal members like - // unknown fields, extension dict, pointer to msglayout, etc. - uint16_t size; - - uint16_t field_count; - uint8_t ext; // upb_ExtMode, declared as uint8_t so sizeof(ext) == 1 - uint8_t dense_below; - uint8_t table_mask; - uint8_t required_count; // Required fields have the lowest hasbits. - - // To statically initialize the tables of variable length, we need a flexible - // array member, and we need to compile in gnu99 mode (constant initialization - // of flexible array members is a GNU extension, not in C99 unfortunately. - _upb_FastTable_Entry fasttable[]; -}; - -// Map entries aren't actually stored for map fields, they are only used during -// parsing. For parsing, it helps a lot if all map entry messages have the same -// layout. The layout code in mini_table/decode.c will ensure that all map -// entries have this layout. -// -// Note that users can and do create map entries directly, which will also use -// this layout. -// -// NOTE: sync with mini_table/decode.c. -typedef struct { - // We only need 2 hasbits max, but due to alignment we'll use 8 bytes here, - // and the uint64_t helps make this clear. - uint64_t hasbits; - union { - upb_StringView str; // For str/bytes. - upb_value val; // For all other types. - } k; - union { - upb_StringView str; // For str/bytes. - upb_value val; // For all other types. - } v; -} upb_MapEntryData; - -typedef struct { - void* internal_data; - upb_MapEntryData data; -} upb_MapEntry; - -#ifdef __cplusplus -extern "C" { -#endif - -// Computes a bitmask in which the |l->required_count| lowest bits are set, -// except that we skip the lowest bit (because upb never uses hasbit 0). -// -// Sample output: -// requiredmask(1) => 0b10 (0x2) -// requiredmask(5) => 0b111110 (0x3e) -UPB_INLINE uint64_t upb_MiniTable_requiredmask(const upb_MiniTable* l) { - int n = l->required_count; - assert(0 < n && n <= 63); - return ((1ULL << n) - 1) << 1; -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ */ - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -// _upb_mapsorter sorts maps and provides ordered iteration over the entries. -// Since maps can be recursive (map values can be messages which contain other -// maps), _upb_mapsorter can contain a stack of maps. - -typedef struct { - upb_tabent const** entries; - int size; - int cap; -} _upb_mapsorter; - -typedef struct { - int start; - int pos; - int end; -} _upb_sortedmap; - -UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter* s) { - s->entries = NULL; - s->size = 0; - s->cap = 0; -} - -UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter* s) { - if (s->entries) free(s->entries); -} - -UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter* s, const upb_Map* map, - _upb_sortedmap* sorted, upb_MapEntry* ent) { - if (sorted->pos == sorted->end) return false; - const upb_tabent* tabent = s->entries[sorted->pos++]; - upb_StringView key = upb_tabstrview(tabent->key); - _upb_map_fromkey(key, &ent->data.k, map->key_size); - upb_value val = {tabent->val.val}; - _upb_map_fromvalue(val, &ent->data.v, map->val_size); - return true; -} - -UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter* s, - _upb_sortedmap* sorted) { - s->size = sorted->start; -} - -bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, - const upb_Map* map, _upb_sortedmap* sorted); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ */ - -/* -** Our memory representation for parsing tables and messages themselves. -** Functions in this file are used by generated code and possibly reflection. -** -** The definitions in this file are internal to upb. -**/ - -#ifndef UPB_MESSAGE_INTERNAL_H_ -#define UPB_MESSAGE_INTERNAL_H_ - -#include -#include - - #ifndef UPB_MESSAGE_EXTENSION_INTERNAL_H_ #define UPB_MESSAGE_EXTENSION_INTERNAL_H_ @@ -1867,6 +1694,191 @@ const upb_Message_Extension* _upb_Message_Getext( #endif /* UPB_MESSAGE_EXTENSION_INTERNAL_H_ */ +#ifndef UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ +#define UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ + + +// Must be last. + +struct upb_Decoder; +typedef const char* _upb_FieldParser(struct upb_Decoder* d, const char* ptr, + upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data); +typedef struct { + uint64_t field_data; + _upb_FieldParser* field_parser; +} _upb_FastTable_Entry; + +typedef enum { + kUpb_ExtMode_NonExtendable = 0, // Non-extendable message. + kUpb_ExtMode_Extendable = 1, // Normal extendable message. + kUpb_ExtMode_IsMessageSet = 2, // MessageSet message. + kUpb_ExtMode_IsMessageSet_ITEM = + 3, // MessageSet item (temporary only, see decode.c) + + // During table building we steal a bit to indicate that the message is a map + // entry. *Only* used during table building! + kUpb_ExtMode_IsMapEntry = 4, +} upb_ExtMode; + +// upb_MiniTable represents the memory layout of a given upb_MessageDef. +// The members are public so generated code can initialize them, +// but users MUST NOT directly read or write any of its members. +struct upb_MiniTable { + const upb_MiniTableSub* subs; + const upb_MiniTableField* fields; + + // Must be aligned to sizeof(void*). Doesn't include internal members like + // unknown fields, extension dict, pointer to msglayout, etc. + uint16_t size; + + uint16_t field_count; + uint8_t ext; // upb_ExtMode, declared as uint8_t so sizeof(ext) == 1 + uint8_t dense_below; + uint8_t table_mask; + uint8_t required_count; // Required fields have the lowest hasbits. + + // To statically initialize the tables of variable length, we need a flexible + // array member, and we need to compile in gnu99 mode (constant initialization + // of flexible array members is a GNU extension, not in C99 unfortunately. + _upb_FastTable_Entry fasttable[]; +}; + +// Map entries aren't actually stored for map fields, they are only used during +// parsing. For parsing, it helps a lot if all map entry messages have the same +// layout. The layout code in mini_table/decode.c will ensure that all map +// entries have this layout. +// +// Note that users can and do create map entries directly, which will also use +// this layout. +// +// NOTE: sync with mini_table/decode.c. +typedef struct { + // We only need 2 hasbits max, but due to alignment we'll use 8 bytes here, + // and the uint64_t helps make this clear. + uint64_t hasbits; + union { + upb_StringView str; // For str/bytes. + upb_value val; // For all other types. + } k; + union { + upb_StringView str; // For str/bytes. + upb_value val; // For all other types. + } v; +} upb_MapEntryData; + +typedef struct { + void* internal_data; + upb_MapEntryData data; +} upb_MapEntry; + +#ifdef __cplusplus +extern "C" { +#endif + +// Computes a bitmask in which the |l->required_count| lowest bits are set, +// except that we skip the lowest bit (because upb never uses hasbit 0). +// +// Sample output: +// requiredmask(1) => 0b10 (0x2) +// requiredmask(5) => 0b111110 (0x3e) +UPB_INLINE uint64_t upb_MiniTable_requiredmask(const upb_MiniTable* l) { + int n = l->required_count; + assert(0 < n && n <= 63); + return ((1ULL << n) - 1) << 1; +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ */ + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +// _upb_mapsorter sorts maps and provides ordered iteration over the entries. +// Since maps can be recursive (map values can be messages which contain other +// maps), _upb_mapsorter can contain a stack of maps. + +typedef struct { + void const** entries; + int size; + int cap; +} _upb_mapsorter; + +typedef struct { + int start; + int pos; + int end; +} _upb_sortedmap; + +UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter* s) { + s->entries = NULL; + s->size = 0; + s->cap = 0; +} + +UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter* s) { + if (s->entries) free(s->entries); +} + +UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter* s, const upb_Map* map, + _upb_sortedmap* sorted, upb_MapEntry* ent) { + if (sorted->pos == sorted->end) return false; + const upb_tabent* tabent = (const upb_tabent*)s->entries[sorted->pos++]; + upb_StringView key = upb_tabstrview(tabent->key); + _upb_map_fromkey(key, &ent->data.k, map->key_size); + upb_value val = {tabent->val.val}; + _upb_map_fromvalue(val, &ent->data.v, map->val_size); + return true; +} + +UPB_INLINE bool _upb_sortedmap_nextext(_upb_mapsorter* s, + _upb_sortedmap* sorted, + const upb_Message_Extension** ext) { + if (sorted->pos == sorted->end) return false; + *ext = (const upb_Message_Extension*)s->entries[sorted->pos++]; + return true; +} + +UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter* s, + _upb_sortedmap* sorted) { + s->size = sorted->start; +} + +bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, + const upb_Map* map, _upb_sortedmap* sorted); + +bool _upb_mapsorter_pushexts(_upb_mapsorter* s, + const upb_Message_Extension* exts, size_t count, + _upb_sortedmap* sorted); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ */ + +/* +** Our memory representation for parsing tables and messages themselves. +** Functions in this file are used by generated code and possibly reflection. +** +** The definitions in this file are internal to upb. +**/ + +#ifndef UPB_MESSAGE_INTERNAL_H_ +#define UPB_MESSAGE_INTERNAL_H_ + +#include +#include + + #ifndef UPB_MINI_TABLE_EXTENSION_REGISTRY_H_ #define UPB_MINI_TABLE_EXTENSION_REGISTRY_H_ @@ -8326,6 +8338,7 @@ bool upb_FileDef_HasOptions(const upb_FileDef* f); const char* upb_FileDef_Name(const upb_FileDef* f); const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f); const char* upb_FileDef_Package(const upb_FileDef* f); +const char* upb_FileDef_Edition(const upb_FileDef* f); const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f); const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i); @@ -8711,7 +8724,7 @@ size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s); upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s); bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - upb_FieldDef* f); + const upb_FieldDef* f); bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, upb_Status* status); bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, @@ -10197,23 +10210,6 @@ UPB_INLINE upb_FileDef* _upb_DefBuilder_File(const upb_DefBuilder* ctx) { void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, bool full); -// Verify a relative identifier string. The loop is branchless for speed. -UPB_INLINE void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; - - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); - - good &= is_alpha | is_numer; - } - - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); -} - // Verify a full identifier string. This is slightly more complicated than // verifying a relative identifier string because we must track '.' chars. UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, @@ -10317,6 +10313,14 @@ int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f); uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f); void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, upb_FieldDef* f); +void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, + const upb_FieldDef* f); + +// Allocate and initialize an array of |n| extensions (field defs). +upb_FieldDef* _upb_Extensions_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m); // Allocate and initialize an array of |n| field defs. upb_FieldDef* _upb_FieldDefs_New( @@ -10381,6 +10385,7 @@ bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t size, void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, const upb_FieldDef* f); bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n); +void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m); void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, const upb_MessageDef* m); void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m); diff --git a/protobuf_deps.bzl b/protobuf_deps.bzl index fee1e9c1b15e..97b1dd38d34e 100644 --- a/protobuf_deps.bzl +++ b/protobuf_deps.bzl @@ -149,7 +149,7 @@ def protobuf_deps(): _github_archive( name = "upb", repo = "https://github.com/protocolbuffers/upb", - commit = "662497f1d3dcced2bba1620cea9aae8b484bd3cd", - sha256 = "57c87ca4145d2cbc162a6c613b114b9325b577f4f6525bd78747a34b3d03627c", + commit = "499a83042edf9b66cf9cd6c36a96ac3e2c98bdbe", + sha256 = "fe3c65d67e20025c12feb667af77a61f1548824fb025f3e2a6c4a9840ac19888", patches = ["@com_google_protobuf//build_defs:upb.patch"], ) diff --git a/ruby/ext/google/protobuf_c/ruby-upb.c b/ruby/ext/google/protobuf_c/ruby-upb.c index 6fbf78bc0282..bda70b21c33e 100644 --- a/ruby/ext/google/protobuf_c/ruby-upb.c +++ b/ruby/ext/google/protobuf_c/ruby-upb.c @@ -641,14 +641,12 @@ static int (*const compar[kUpb_FieldType_SizeOf])(const void*, const void*) = { [kUpb_FieldType_Bytes] = _upb_mapsorter_cmpstr, }; -bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, - const upb_Map* map, _upb_sortedmap* sorted) { - int map_size = _upb_Map_Size(map); +static bool _upb_mapsorter_resize(_upb_mapsorter* s, _upb_sortedmap* sorted, + int size) { sorted->start = s->size; sorted->pos = sorted->start; - sorted->end = sorted->start + map_size; + sorted->end = sorted->start + size; - // Grow s->entries if necessary. if (sorted->end > s->cap) { s->cap = upb_Log2CeilingSize(sorted->end); s->entries = realloc(s->entries, s->cap * sizeof(*s->entries)); @@ -656,9 +654,17 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, } s->size = sorted->end; + return true; +} + +bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, + const upb_Map* map, _upb_sortedmap* sorted) { + int map_size = _upb_Map_Size(map); + + if (!_upb_mapsorter_resize(s, sorted, map_size)) return false; // Copy non-empty entries from the table to s->entries. - upb_tabent const** dst = &s->entries[sorted->start]; + const void** dst = &s->entries[sorted->start]; const upb_tabent* src = map->table.t.entries; const upb_tabent* end = src + upb_table_size(&map->table.t); for (; src < end; src++) { @@ -674,6 +680,29 @@ bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, compar[key_type]); return true; } + +static int _upb_mapsorter_cmpext(const void* _a, const void* _b) { + const upb_Message_Extension* const* a = _a; + const upb_Message_Extension* const* b = _b; + uint32_t a_num = (*a)->ext->field.number; + uint32_t b_num = (*b)->ext->field.number; + assert(a_num != b_num); + return a_num < b_num ? -1 : 1; +} + +bool _upb_mapsorter_pushexts(_upb_mapsorter* s, + const upb_Message_Extension* exts, size_t count, + _upb_sortedmap* sorted) { + if (!_upb_mapsorter_resize(s, sorted, count)) return false; + + for (size_t i = 0; i < count; i++) { + s->entries[sorted->start + i] = &exts[i]; + } + + qsort(&s->entries[sorted->start], count, sizeof(*s->entries), + _upb_mapsorter_cmpext); + return true; +} /* This file was generated by upbc (the upb compiler) from the input * file: * @@ -1862,7 +1891,7 @@ char* upb_strdup2(const char* s, size_t len, upb_Arena* a) { n = len + 1; p = upb_Arena_Malloc(a, n); if (p) { - memcpy(p, s, len); + if (len != 0) memcpy(p, s, len); p[len] = 0; } return p; @@ -7198,9 +7227,27 @@ void _upb_DefBuilder_OomErr(upb_DefBuilder* ctx) { _upb_DefBuilder_FailJmp(ctx); } +// Verify a relative identifier string. The loop is branchless for speed. +static void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, + upb_StringView name) { + bool good = name.size > 0; + + for (size_t i = 0; i < name.size; i++) { + const char c = name.data[i]; + const char d = c | 0x20; // force lowercase + const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); + const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); + + good &= is_alpha | is_numer; + } + + if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); +} + const char* _upb_DefBuilder_MakeFullName(upb_DefBuilder* ctx, const char* prefix, upb_StringView name) { + _upb_DefBuilder_CheckIdentNotFull(ctx, name); if (prefix) { // ret = prefix + '.' + name; size_t n = strlen(prefix); @@ -7316,7 +7363,7 @@ static bool TryGetChar(const char** src, const char* end, char* ch) { return true; } -static char TryGetHexDigit(const char** src, const char* end) { +static int TryGetHexDigit(const char** src, const char* end) { char ch; if (!TryGetChar(src, end, &ch)) return -1; if ('0' <= ch && ch <= '9') { @@ -7333,10 +7380,10 @@ static char TryGetHexDigit(const char** src, const char* end) { static char upb_DefBuilder_ParseHexEscape(upb_DefBuilder* ctx, const upb_FieldDef* f, const char** src, const char* end) { - char hex_digit = TryGetHexDigit(src, end); + int hex_digit = TryGetHexDigit(src, end); if (hex_digit < 0) { _upb_DefBuilder_Errf( - ctx, "\\x cannot be followed by non-hex digit in field '%s' default", + ctx, "\\x must be followed by at least one hex digit (field='%s')", upb_FieldDef_FullName(f)); return 0; } @@ -7512,7 +7559,7 @@ upb_DefPool* upb_DefPool_New(void) { } bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - upb_FieldDef* f) { + const upb_FieldDef* f) { return upb_inttable_insert(&s->exts, (uintptr_t)ext, upb_value_constptr(f), s->arena); } @@ -7744,12 +7791,6 @@ static const upb_FileDef* _upb_DefPool_AddFile( const upb_MiniTableFile* layout, upb_Status* status) { const upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); - if (name.size == 0) { - upb_Status_SetErrorFormat(status, - "missing name in google_protobuf_FileDescriptorProto"); - return NULL; - } - // Determine whether we already know about this file. { upb_value v; @@ -8141,7 +8182,6 @@ static void create_enumdef(upb_DefBuilder* ctx, const char* prefix, e->file = _upb_DefBuilder_File(ctx); name = UPB_DESC(EnumDescriptorProto_name)(enum_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); e->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); _upb_DefBuilder_Add(ctx, e->full_name, @@ -8951,7 +8991,14 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, } const upb_StringView name = UPB_DESC(FieldDescriptorProto_name)(field_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); + + f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); + f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); + f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); + f->is_proto3_optional = + UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); + f->msgdef = m; + f->scope.oneof = NULL; f->has_json_name = UPB_DESC(FieldDescriptorProto_has_json_name)(field_proto); if (f->has_json_name) { @@ -8963,14 +9010,6 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, } if (!f->json_name) _upb_DefBuilder_OomErr(ctx); - f->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); - f->label_ = (int)UPB_DESC(FieldDescriptorProto_label)(field_proto); - f->number_ = UPB_DESC(FieldDescriptorProto_number)(field_proto); - f->is_proto3_optional = - UPB_DESC(FieldDescriptorProto_proto3_optional)(field_proto); - f->msgdef = m; - f->scope.oneof = NULL; - const bool has_type = UPB_DESC(FieldDescriptorProto_has_type)(field_proto); const bool has_type_name = UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); @@ -9100,19 +9139,24 @@ static void _upb_FieldDef_CreateNotExt(upb_DefBuilder* ctx, const char* prefix, } _upb_MessageDef_InsertField(ctx, m, f); +} - if (!ctx->layout) return; +upb_FieldDef* _upb_Extensions_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m) { + _upb_DefType_CheckPadding(sizeof(upb_FieldDef)); + upb_FieldDef* defs = + (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - const upb_MiniTable* mt = upb_MessageDef_MiniTable(m); - const upb_MiniTableField* fields = mt->fields; - for (int i = 0; i < mt->field_count; i++) { - if (fields[i].number == f->number_) { - f->layout_index = i; - return; - } + for (int i = 0; i < n; i++) { + upb_FieldDef* f = &defs[i]; + + _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); + f->index_ = i; } - UPB_ASSERT(false); // It should be impossible to reach this point. + return defs; } upb_FieldDef* _upb_FieldDefs_New( @@ -9123,28 +9167,23 @@ upb_FieldDef* _upb_FieldDefs_New( upb_FieldDef* defs = (upb_FieldDef*)_upb_DefBuilder_Alloc(ctx, sizeof(upb_FieldDef) * n); - // If we are creating extensions then is_sorted will be NULL. - // If we are not creating extensions then is_sorted will be non-NULL. - if (is_sorted) { - uint32_t previous = 0; - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; - - _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); - f->index_ = i; - if (!ctx->layout) f->layout_index = i; - - const uint32_t current = f->number_; - if (previous > current) *is_sorted = false; - previous = current; + uint32_t previous = 0; + for (int i = 0; i < n; i++) { + upb_FieldDef* f = &defs[i]; + + _upb_FieldDef_CreateNotExt(ctx, prefix, protos[i], m, f); + f->index_ = i; + if (!ctx->layout) { + // Speculate that the def fields are sorted. We will always sort the + // MiniTable fields, so if defs are sorted then indices will match. + // + // If this is incorrect, we will overwrite later. + f->layout_index = i; } - } else { - for (int i = 0; i < n; i++) { - upb_FieldDef* f = &defs[i]; - _upb_FieldDef_CreateExt(ctx, prefix, protos[i], m, f); - f->index_ = i; - } + const uint32_t current = f->number_; + if (previous > current) *is_sorted = false; + previous = current; } return defs; @@ -9200,6 +9239,9 @@ static int _upb_FieldDef_Compare(const void* p1, const void* p2) { return (v1 < v2) ? -1 : (v1 > v2); } +// _upb_FieldDefs_Sorted() is mostly a pure function of its inputs, but has one +// critical side effect that we depend on: it sets layout_index appropriately +// for non-sorted lists of fields. const upb_FieldDef** _upb_FieldDefs_Sorted(const upb_FieldDef* f, int n, upb_Arena* a) { // TODO(salo): Replace this arena alloc with a persistent scratch buffer. @@ -9257,7 +9299,10 @@ static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, "field number %u in extension %s has no extension range in message %s", (unsigned)f->number_, f->full_name, upb_MessageDef_FullName(m)); } +} +void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, + const upb_FieldDef* f) { const upb_MiniTableExtension* ext = _upb_FieldDef_ExtensionMiniTable(f); if (ctx->layout) { @@ -9276,8 +9321,8 @@ static void resolve_extension(upb_DefBuilder* ctx, const char* prefix, sub.subenum = _upb_EnumDef_MiniTable(f->sub.enumdef); } bool ok2 = upb_MiniTableExtension_Build(desc.data, desc.size, mut_ext, - upb_MessageDef_MiniTable(m), sub, - ctx->status); + upb_MessageDef_MiniTable(f->msgdef), + sub, ctx->status); if (!ok2) _upb_DefBuilder_Errf(ctx, "Could not build extension mini table"); } @@ -9334,6 +9379,7 @@ struct upb_FileDef { const UPB_DESC(FileOptions) * opts; const char* name; const char* package; + const char* edition; const upb_FileDef** deps; const int32_t* public_deps; @@ -9370,6 +9416,10 @@ const char* upb_FileDef_Package(const upb_FileDef* f) { return f->package ? f->package : ""; } +const char* upb_FileDef_Edition(const upb_FileDef* f) { + return f->edition ? f->edition : ""; +} + const char* _upb_FileDef_RawPackage(const upb_FileDef* f) { return f->package; } upb_Syntax upb_FileDef_Syntax(const upb_FileDef* f) { return f->syntax; } @@ -9517,13 +9567,14 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, } } - if (!UPB_DESC(FileDescriptorProto_has_name)(file_proto)) { - _upb_DefBuilder_Errf(ctx, "File has no name"); + upb_StringView name = UPB_DESC(FileDescriptorProto_name)(file_proto); + file->name = strviewdup(ctx, name); + if (strlen(file->name) != name.size) { + _upb_DefBuilder_Errf(ctx, "File name contained embedded NULL"); } - file->name = strviewdup(ctx, UPB_DESC(FileDescriptorProto_name)(file_proto)); - upb_StringView package = UPB_DESC(FileDescriptorProto_package)(file_proto); + if (package.size) { _upb_DefBuilder_CheckIdentFull(ctx, package); file->package = strviewdup(ctx, package); @@ -9531,6 +9582,18 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, file->package = NULL; } + upb_StringView edition = UPB_DESC(FileDescriptorProto_edition)(file_proto); + + if (edition.size == 0) { + file->edition = NULL; + } else { + // TODO(b/267770604): How should we validate this? + file->edition = strviewdup(ctx, edition); + if (strlen(file->edition) != edition.size) { + _upb_DefBuilder_Errf(ctx, "Edition name contained embedded NULL"); + } + } + if (UPB_DESC(FileDescriptorProto_has_syntax)(file_proto)) { upb_StringView syntax = UPB_DESC(FileDescriptorProto_syntax)(file_proto); @@ -9599,8 +9662,7 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, // Create extensions. exts = UPB_DESC(FileDescriptorProto_extension)(file_proto, &n); file->top_lvl_ext_count = n; - file->top_lvl_exts = - _upb_FieldDefs_New(ctx, n, exts, file->package, NULL, NULL); + file->top_lvl_exts = _upb_Extensions_New(ctx, n, exts, file->package, NULL); // Create messages. msgs = UPB_DESC(FileDescriptorProto_message_type)(file_proto, &n); @@ -9624,11 +9686,19 @@ void _upb_FileDef_Create(upb_DefBuilder* ctx, _upb_FieldDef_Resolve(ctx, file->package, f); } - if (!ctx->layout) { - for (int i = 0; i < file->top_lvl_msg_count; i++) { - upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); - _upb_MessageDef_LinkMiniTable(ctx, m); - } + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_CreateMiniTable(ctx, (upb_MessageDef*)m); + } + + for (int i = 0; i < file->top_lvl_ext_count; i++) { + upb_FieldDef* f = (upb_FieldDef*)upb_FileDef_TopLevelExtension(file, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, f); + } + + for (int i = 0; i < file->top_lvl_msg_count; i++) { + upb_MessageDef* m = (upb_MessageDef*)upb_FileDef_TopLevelMessage(file, i); + _upb_MessageDef_LinkMiniTable(ctx, m); } if (file->ext_count) { @@ -10141,6 +10211,8 @@ bool upb_MessageDef_IsMessageSet(const upb_MessageDef* m) { static upb_MiniTable* _upb_MessageDef_MakeMiniTable(upb_DefBuilder* ctx, const upb_MessageDef* m) { upb_StringView desc; + // Note: this will assign layout_index for fields, so upb_FieldDef_MiniTable() + // is safe to call only after this call. bool ok = upb_MessageDef_MiniDescriptorEncode(m, ctx->tmp_arena, &desc); if (!ok) _upb_DefBuilder_OomErr(ctx); @@ -10160,23 +10232,6 @@ void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m) { _upb_FieldDef_Resolve(ctx, m->full_name, f); } - if (!ctx->layout) { - m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); - if (!m->layout) _upb_DefBuilder_OomErr(ctx); - } - -#ifndef NDEBUG - for (int i = 0; i < m->field_count; i++) { - const upb_FieldDef* f = upb_MessageDef_Field(m, i); - const int layout_index = _upb_FieldDef_LayoutIndex(f); - UPB_ASSERT(layout_index < m->layout->field_count); - const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; - UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); - UPB_ASSERT(upb_FieldDef_HasPresence(f) == - upb_MiniTableField_HasPresence(mt_f)); - } -#endif - m->in_message_set = false; for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { upb_FieldDef* ext = (upb_FieldDef*)upb_MessageDef_NestedExtension(m, i); @@ -10239,8 +10294,39 @@ void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, if (!ok) _upb_DefBuilder_OomErr(ctx); } +void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m) { + if (ctx->layout == NULL) { + m->layout = _upb_MessageDef_MakeMiniTable(ctx, m); + } else { + UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); + m->layout = ctx->layout->msgs[ctx->msg_count++]; + UPB_ASSERT(m->field_count == m->layout->field_count); + + // We don't need the result of this call, but it will assign layout_index + // for all the fields in O(n lg n) time. + _upb_FieldDefs_Sorted(m->fields, m->field_count, ctx->tmp_arena); + } + + for (int i = 0; i < m->nested_msg_count; i++) { + upb_MessageDef* nested = + (upb_MessageDef*)upb_MessageDef_NestedMessage(m, i); + _upb_MessageDef_CreateMiniTable(ctx, nested); + } +} + void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, const upb_MessageDef* m) { + for (int i = 0; i < upb_MessageDef_NestedExtensionCount(m); i++) { + const upb_FieldDef* ext = upb_MessageDef_NestedExtension(m, i); + _upb_FieldDef_BuildMiniTableExtension(ctx, ext); + } + + for (int i = 0; i < m->nested_msg_count; i++) { + _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); + } + + if (ctx->layout) return; + for (int i = 0; i < m->field_count; i++) { const upb_FieldDef* f = upb_MessageDef_Field(m, i); const upb_MessageDef* sub_m = upb_FieldDef_MessageSubDef(f); @@ -10268,9 +10354,17 @@ void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, } } - for (int i = 0; i < m->nested_msg_count; i++) { - _upb_MessageDef_LinkMiniTable(ctx, upb_MessageDef_NestedMessage(m, i)); +#ifndef NDEBUG + for (int i = 0; i < m->field_count; i++) { + const upb_FieldDef* f = upb_MessageDef_Field(m, i); + const int layout_index = _upb_FieldDef_LayoutIndex(f); + UPB_ASSERT(layout_index < m->layout->field_count); + const upb_MiniTableField* mt_f = &m->layout->fields[layout_index]; + UPB_ASSERT(upb_FieldDef_Type(f) == upb_MiniTableField_Type(mt_f)); + UPB_ASSERT(upb_FieldDef_HasPresence(f) == + upb_MiniTableField_HasPresence(mt_f)); } +#endif } static uint64_t _upb_MessageDef_Modifiers(const upb_MessageDef* m) { @@ -10403,7 +10497,6 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, m->is_sorted = true; name = UPB_DESC(DescriptorProto_name)(msg_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); m->full_name = _upb_DefBuilder_MakeFullName(ctx, prefix, name); _upb_DefBuilder_Add(ctx, m->full_name, _upb_DefType_Pack(m, UPB_DEFTYPE_MSG)); @@ -10422,17 +10515,6 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, ok = upb_strtable_init(&m->ntof, n_oneof + n_field, ctx->arena); if (!ok) _upb_DefBuilder_OomErr(ctx); - if (ctx->layout) { - /* create_fielddef() below depends on this being set. */ - UPB_ASSERT(ctx->msg_count < ctx->layout->msg_count); - m->layout = ctx->layout->msgs[ctx->msg_count++]; - UPB_ASSERT(n_field == m->layout->field_count); - } else { - /* Allocate now (to allow cross-linking), populate later. */ - m->layout = _upb_DefBuilder_Alloc( - ctx, sizeof(*m->layout) + sizeof(_upb_FastTable_Entry)); - } - UPB_DEF_SET_OPTIONS(m->opts, DescriptorProto, MessageOptions, msg_proto); m->oneof_count = n_oneof; @@ -10473,7 +10555,7 @@ static void create_msgdef(upb_DefBuilder* ctx, const char* prefix, const UPB_DESC(FieldDescriptorProto)* const* exts = UPB_DESC(DescriptorProto_extension)(msg_proto, &n_ext); m->nested_ext_count = n_ext; - m->nested_exts = _upb_FieldDefs_New(ctx, n_ext, exts, m->full_name, m, NULL); + m->nested_exts = _upb_Extensions_New(ctx, n_ext, exts, m->full_name, m); const UPB_DESC(DescriptorProto)* const* msgs = UPB_DESC(DescriptorProto_nested_type)(msg_proto, &n_msg); @@ -10880,7 +10962,6 @@ static void create_service(upb_DefBuilder* ctx, s->file = _upb_DefBuilder_File(ctx); name = UPB_DESC(ServiceDescriptorProto_name)(svc_proto); - _upb_DefBuilder_CheckIdentNotFull(ctx, name); const char* package = _upb_FileDef_RawPackage(s->file); s->full_name = _upb_DefBuilder_MakeFullName(ctx, package, name); _upb_DefBuilder_Add(ctx, s->full_name, @@ -13648,6 +13729,15 @@ static void encode_msgset_item(upb_encstate* e, encode_tag(e, kUpb_MsgSet_Item, kUpb_WireType_StartGroup); } +static void encode_ext(upb_encstate* e, const upb_Message_Extension* ext, + bool is_message_set) { + if (UPB_UNLIKELY(is_message_set)) { + encode_msgset_item(e, ext); + } else { + encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); + } +} + static void encode_message(upb_encstate* e, const upb_Message* msg, const upb_MiniTable* m, size_t* size) { size_t pre_len = e->limit - e->ptr; @@ -13677,12 +13767,17 @@ static void encode_message(upb_encstate* e, const upb_Message* msg, size_t ext_count; const upb_Message_Extension* ext = _upb_Message_Getexts(msg, &ext_count); if (ext_count) { - const upb_Message_Extension* end = ext + ext_count; - for (; ext != end; ext++) { - if (UPB_UNLIKELY(m->ext == kUpb_ExtMode_IsMessageSet)) { - encode_msgset_item(e, ext); - } else { - encode_field(e, &ext->data, &ext->ext->sub, &ext->ext->field); + if (e->options & kUpb_EncodeOption_Deterministic) { + _upb_sortedmap sorted; + _upb_mapsorter_pushexts(&e->sorter, ext, ext_count, &sorted); + while (_upb_sortedmap_nextext(&e->sorter, &sorted, &ext)) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); + } + _upb_mapsorter_popmap(&e->sorter, &sorted); + } else { + const upb_Message_Extension* end = ext + ext_count; + for (; ext != end; ext++) { + encode_ext(e, ext, m->ext == kUpb_ExtMode_IsMessageSet); } } } diff --git a/ruby/ext/google/protobuf_c/ruby-upb.h b/ruby/ext/google/protobuf_c/ruby-upb.h index cd703c288fae..0af88d642c20 100755 --- a/ruby/ext/google/protobuf_c/ruby-upb.h +++ b/ruby/ext/google/protobuf_c/ruby-upb.h @@ -1424,179 +1424,6 @@ UPB_INLINE int upb_Log2CeilingSize(int x) { return 1 << upb_Log2Ceiling(x); } #include -#ifndef UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ -#define UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ - - -// Must be last. - -struct upb_Decoder; -typedef const char* _upb_FieldParser(struct upb_Decoder* d, const char* ptr, - upb_Message* msg, intptr_t table, - uint64_t hasbits, uint64_t data); -typedef struct { - uint64_t field_data; - _upb_FieldParser* field_parser; -} _upb_FastTable_Entry; - -typedef enum { - kUpb_ExtMode_NonExtendable = 0, // Non-extendable message. - kUpb_ExtMode_Extendable = 1, // Normal extendable message. - kUpb_ExtMode_IsMessageSet = 2, // MessageSet message. - kUpb_ExtMode_IsMessageSet_ITEM = - 3, // MessageSet item (temporary only, see decode.c) - - // During table building we steal a bit to indicate that the message is a map - // entry. *Only* used during table building! - kUpb_ExtMode_IsMapEntry = 4, -} upb_ExtMode; - -// upb_MiniTable represents the memory layout of a given upb_MessageDef. -// The members are public so generated code can initialize them, -// but users MUST NOT directly read or write any of its members. -struct upb_MiniTable { - const upb_MiniTableSub* subs; - const upb_MiniTableField* fields; - - // Must be aligned to sizeof(void*). Doesn't include internal members like - // unknown fields, extension dict, pointer to msglayout, etc. - uint16_t size; - - uint16_t field_count; - uint8_t ext; // upb_ExtMode, declared as uint8_t so sizeof(ext) == 1 - uint8_t dense_below; - uint8_t table_mask; - uint8_t required_count; // Required fields have the lowest hasbits. - - // To statically initialize the tables of variable length, we need a flexible - // array member, and we need to compile in gnu99 mode (constant initialization - // of flexible array members is a GNU extension, not in C99 unfortunately. - _upb_FastTable_Entry fasttable[]; -}; - -// Map entries aren't actually stored for map fields, they are only used during -// parsing. For parsing, it helps a lot if all map entry messages have the same -// layout. The layout code in mini_table/decode.c will ensure that all map -// entries have this layout. -// -// Note that users can and do create map entries directly, which will also use -// this layout. -// -// NOTE: sync with mini_table/decode.c. -typedef struct { - // We only need 2 hasbits max, but due to alignment we'll use 8 bytes here, - // and the uint64_t helps make this clear. - uint64_t hasbits; - union { - upb_StringView str; // For str/bytes. - upb_value val; // For all other types. - } k; - union { - upb_StringView str; // For str/bytes. - upb_value val; // For all other types. - } v; -} upb_MapEntryData; - -typedef struct { - void* internal_data; - upb_MapEntryData data; -} upb_MapEntry; - -#ifdef __cplusplus -extern "C" { -#endif - -// Computes a bitmask in which the |l->required_count| lowest bits are set, -// except that we skip the lowest bit (because upb never uses hasbit 0). -// -// Sample output: -// requiredmask(1) => 0b10 (0x2) -// requiredmask(5) => 0b111110 (0x3e) -UPB_INLINE uint64_t upb_MiniTable_requiredmask(const upb_MiniTable* l) { - int n = l->required_count; - assert(0 < n && n <= 63); - return ((1ULL << n) - 1) << 1; -} - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ */ - -// Must be last. - -#ifdef __cplusplus -extern "C" { -#endif - -// _upb_mapsorter sorts maps and provides ordered iteration over the entries. -// Since maps can be recursive (map values can be messages which contain other -// maps), _upb_mapsorter can contain a stack of maps. - -typedef struct { - upb_tabent const** entries; - int size; - int cap; -} _upb_mapsorter; - -typedef struct { - int start; - int pos; - int end; -} _upb_sortedmap; - -UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter* s) { - s->entries = NULL; - s->size = 0; - s->cap = 0; -} - -UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter* s) { - if (s->entries) free(s->entries); -} - -UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter* s, const upb_Map* map, - _upb_sortedmap* sorted, upb_MapEntry* ent) { - if (sorted->pos == sorted->end) return false; - const upb_tabent* tabent = s->entries[sorted->pos++]; - upb_StringView key = upb_tabstrview(tabent->key); - _upb_map_fromkey(key, &ent->data.k, map->key_size); - upb_value val = {tabent->val.val}; - _upb_map_fromvalue(val, &ent->data.v, map->val_size); - return true; -} - -UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter* s, - _upb_sortedmap* sorted) { - s->size = sorted->start; -} - -bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, - const upb_Map* map, _upb_sortedmap* sorted); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - - -#endif /* UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ */ - -/* -** Our memory representation for parsing tables and messages themselves. -** Functions in this file are used by generated code and possibly reflection. -** -** The definitions in this file are internal to upb. -**/ - -#ifndef UPB_MESSAGE_INTERNAL_H_ -#define UPB_MESSAGE_INTERNAL_H_ - -#include -#include - - #ifndef UPB_MESSAGE_EXTENSION_INTERNAL_H_ #define UPB_MESSAGE_EXTENSION_INTERNAL_H_ @@ -1869,6 +1696,191 @@ const upb_Message_Extension* _upb_Message_Getext( #endif /* UPB_MESSAGE_EXTENSION_INTERNAL_H_ */ +#ifndef UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ +#define UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ + + +// Must be last. + +struct upb_Decoder; +typedef const char* _upb_FieldParser(struct upb_Decoder* d, const char* ptr, + upb_Message* msg, intptr_t table, + uint64_t hasbits, uint64_t data); +typedef struct { + uint64_t field_data; + _upb_FieldParser* field_parser; +} _upb_FastTable_Entry; + +typedef enum { + kUpb_ExtMode_NonExtendable = 0, // Non-extendable message. + kUpb_ExtMode_Extendable = 1, // Normal extendable message. + kUpb_ExtMode_IsMessageSet = 2, // MessageSet message. + kUpb_ExtMode_IsMessageSet_ITEM = + 3, // MessageSet item (temporary only, see decode.c) + + // During table building we steal a bit to indicate that the message is a map + // entry. *Only* used during table building! + kUpb_ExtMode_IsMapEntry = 4, +} upb_ExtMode; + +// upb_MiniTable represents the memory layout of a given upb_MessageDef. +// The members are public so generated code can initialize them, +// but users MUST NOT directly read or write any of its members. +struct upb_MiniTable { + const upb_MiniTableSub* subs; + const upb_MiniTableField* fields; + + // Must be aligned to sizeof(void*). Doesn't include internal members like + // unknown fields, extension dict, pointer to msglayout, etc. + uint16_t size; + + uint16_t field_count; + uint8_t ext; // upb_ExtMode, declared as uint8_t so sizeof(ext) == 1 + uint8_t dense_below; + uint8_t table_mask; + uint8_t required_count; // Required fields have the lowest hasbits. + + // To statically initialize the tables of variable length, we need a flexible + // array member, and we need to compile in gnu99 mode (constant initialization + // of flexible array members is a GNU extension, not in C99 unfortunately. + _upb_FastTable_Entry fasttable[]; +}; + +// Map entries aren't actually stored for map fields, they are only used during +// parsing. For parsing, it helps a lot if all map entry messages have the same +// layout. The layout code in mini_table/decode.c will ensure that all map +// entries have this layout. +// +// Note that users can and do create map entries directly, which will also use +// this layout. +// +// NOTE: sync with mini_table/decode.c. +typedef struct { + // We only need 2 hasbits max, but due to alignment we'll use 8 bytes here, + // and the uint64_t helps make this clear. + uint64_t hasbits; + union { + upb_StringView str; // For str/bytes. + upb_value val; // For all other types. + } k; + union { + upb_StringView str; // For str/bytes. + upb_value val; // For all other types. + } v; +} upb_MapEntryData; + +typedef struct { + void* internal_data; + upb_MapEntryData data; +} upb_MapEntry; + +#ifdef __cplusplus +extern "C" { +#endif + +// Computes a bitmask in which the |l->required_count| lowest bits are set, +// except that we skip the lowest bit (because upb never uses hasbit 0). +// +// Sample output: +// requiredmask(1) => 0b10 (0x2) +// requiredmask(5) => 0b111110 (0x3e) +UPB_INLINE uint64_t upb_MiniTable_requiredmask(const upb_MiniTable* l) { + int n = l->required_count; + assert(0 < n && n <= 63); + return ((1ULL << n) - 1) << 1; +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_MINI_TABLE_MESSAGE_INTERNAL_H_ */ + +// Must be last. + +#ifdef __cplusplus +extern "C" { +#endif + +// _upb_mapsorter sorts maps and provides ordered iteration over the entries. +// Since maps can be recursive (map values can be messages which contain other +// maps), _upb_mapsorter can contain a stack of maps. + +typedef struct { + void const** entries; + int size; + int cap; +} _upb_mapsorter; + +typedef struct { + int start; + int pos; + int end; +} _upb_sortedmap; + +UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter* s) { + s->entries = NULL; + s->size = 0; + s->cap = 0; +} + +UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter* s) { + if (s->entries) free(s->entries); +} + +UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter* s, const upb_Map* map, + _upb_sortedmap* sorted, upb_MapEntry* ent) { + if (sorted->pos == sorted->end) return false; + const upb_tabent* tabent = (const upb_tabent*)s->entries[sorted->pos++]; + upb_StringView key = upb_tabstrview(tabent->key); + _upb_map_fromkey(key, &ent->data.k, map->key_size); + upb_value val = {tabent->val.val}; + _upb_map_fromvalue(val, &ent->data.v, map->val_size); + return true; +} + +UPB_INLINE bool _upb_sortedmap_nextext(_upb_mapsorter* s, + _upb_sortedmap* sorted, + const upb_Message_Extension** ext) { + if (sorted->pos == sorted->end) return false; + *ext = (const upb_Message_Extension*)s->entries[sorted->pos++]; + return true; +} + +UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter* s, + _upb_sortedmap* sorted) { + s->size = sorted->start; +} + +bool _upb_mapsorter_pushmap(_upb_mapsorter* s, upb_FieldType key_type, + const upb_Map* map, _upb_sortedmap* sorted); + +bool _upb_mapsorter_pushexts(_upb_mapsorter* s, + const upb_Message_Extension* exts, size_t count, + _upb_sortedmap* sorted); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + + +#endif /* UPB_COLLECTIONS_MAP_SORTER_INTERNAL_H_ */ + +/* +** Our memory representation for parsing tables and messages themselves. +** Functions in this file are used by generated code and possibly reflection. +** +** The definitions in this file are internal to upb. +**/ + +#ifndef UPB_MESSAGE_INTERNAL_H_ +#define UPB_MESSAGE_INTERNAL_H_ + +#include +#include + + #ifndef UPB_MINI_TABLE_EXTENSION_REGISTRY_H_ #define UPB_MINI_TABLE_EXTENSION_REGISTRY_H_ @@ -8801,6 +8813,7 @@ bool upb_FileDef_HasOptions(const upb_FileDef* f); const char* upb_FileDef_Name(const upb_FileDef* f); const UPB_DESC(FileOptions) * upb_FileDef_Options(const upb_FileDef* f); const char* upb_FileDef_Package(const upb_FileDef* f); +const char* upb_FileDef_Edition(const upb_FileDef* f); const upb_DefPool* upb_FileDef_Pool(const upb_FileDef* f); const upb_FileDef* upb_FileDef_PublicDependency(const upb_FileDef* f, int i); @@ -9913,7 +9926,7 @@ size_t _upb_DefPool_BytesLoaded(const upb_DefPool* s); upb_ExtensionRegistry* _upb_DefPool_ExtReg(const upb_DefPool* s); bool _upb_DefPool_InsertExt(upb_DefPool* s, const upb_MiniTableExtension* ext, - upb_FieldDef* f); + const upb_FieldDef* f); bool _upb_DefPool_InsertSym(upb_DefPool* s, upb_StringView sym, upb_value v, upb_Status* status); bool _upb_DefPool_LookupSym(const upb_DefPool* s, const char* sym, size_t size, @@ -10039,23 +10052,6 @@ UPB_INLINE upb_FileDef* _upb_DefBuilder_File(const upb_DefBuilder* ctx) { void _upb_DefBuilder_CheckIdentSlow(upb_DefBuilder* ctx, upb_StringView name, bool full); -// Verify a relative identifier string. The loop is branchless for speed. -UPB_INLINE void _upb_DefBuilder_CheckIdentNotFull(upb_DefBuilder* ctx, - upb_StringView name) { - bool good = name.size > 0; - - for (size_t i = 0; i < name.size; i++) { - const char c = name.data[i]; - const char d = c | 0x20; // force lowercase - const bool is_alpha = (('a' <= d) & (d <= 'z')) | (c == '_'); - const bool is_numer = ('0' <= c) & (c <= '9') & (i != 0); - - good &= is_alpha | is_numer; - } - - if (!good) _upb_DefBuilder_CheckIdentSlow(ctx, name, false); -} - // Verify a full identifier string. This is slightly more complicated than // verifying a relative identifier string because we must track '.' chars. UPB_INLINE void _upb_DefBuilder_CheckIdentFull(upb_DefBuilder* ctx, @@ -10159,6 +10155,14 @@ int _upb_FieldDef_LayoutIndex(const upb_FieldDef* f); uint64_t _upb_FieldDef_Modifiers(const upb_FieldDef* f); void _upb_FieldDef_Resolve(upb_DefBuilder* ctx, const char* prefix, upb_FieldDef* f); +void _upb_FieldDef_BuildMiniTableExtension(upb_DefBuilder* ctx, + const upb_FieldDef* f); + +// Allocate and initialize an array of |n| extensions (field defs). +upb_FieldDef* _upb_Extensions_New( + upb_DefBuilder* ctx, int n, + const UPB_DESC(FieldDescriptorProto) * const* protos, const char* prefix, + upb_MessageDef* m); // Allocate and initialize an array of |n| field defs. upb_FieldDef* _upb_FieldDefs_New( @@ -10223,6 +10227,7 @@ bool _upb_MessageDef_Insert(upb_MessageDef* m, const char* name, size_t size, void _upb_MessageDef_InsertField(upb_DefBuilder* ctx, upb_MessageDef* m, const upb_FieldDef* f); bool _upb_MessageDef_IsValidExtensionNumber(const upb_MessageDef* m, int n); +void _upb_MessageDef_CreateMiniTable(upb_DefBuilder* ctx, upb_MessageDef* m); void _upb_MessageDef_LinkMiniTable(upb_DefBuilder* ctx, const upb_MessageDef* m); void _upb_MessageDef_Resolve(upb_DefBuilder* ctx, upb_MessageDef* m);