From d946ccabb19b35f1ba990c16b9f80c76aba04b99 Mon Sep 17 00:00:00 2001 From: Tomas Abrahamsson Date: Mon, 13 Oct 2014 22:44:08 +0200 Subject: [PATCH 1/8] Parse oneof The oneof type of fields is described here: https://developers.google.com/protocol-buffers/docs/proto#oneof In gpb, each message had a list of #?gpb_field{}s. Now each message has a list of elements that can be either #?gpb_field{} or elements at that record position. Example: Given the following protobuf messages message m1 { required uint32 f1 = 1; oneof x { uint32 a1 = 2; uint32 a2 = 3; } } This would be parsed into the following structure: [{{msg,m1}, [#?gpb_field{name=f1, ...}, #gpb_oneof{name=x, fields=[#?gpb_field{name=a1}, #?gpb_field{name=a2}]}]}]. I've noticed: * No name or field number of any oneof field alternatives may appear elsewhere in the message, not even inside other oneofs. * The interpretation is to be that all oneof alternatives, a1 and a2 in the example above are to be treated as if had they been optional fields directly in the m1, except at most one of a1 and a2 can be defined. --- include/gpb.hrl | 6 ++++ src/gpb_parse.yrl | 61 +++++++++++++++++++++++++++++++++------- src/gpb_scan.xrl | 1 + test/gpb_parse_tests.erl | 47 +++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+), 10 deletions(-) diff --git a/include/gpb.hrl b/include/gpb.hrl index c90eba96..54e75c1e 100644 --- a/include/gpb.hrl +++ b/include/gpb.hrl @@ -57,4 +57,10 @@ output }). +-record(gpb_oneof, + {name :: atom(), + rnum :: pos_integer(), %% field number in the record + fields :: [#?gpb_field{}] %% all fields have the same rnum + }). + -endif. diff --git a/src/gpb_parse.yrl b/src/gpb_parse.yrl index 660733d4..ffe6fb8f 100644 --- a/src/gpb_parse.yrl +++ b/src/gpb_parse.yrl @@ -25,11 +25,12 @@ Nonterminals enum_def enum_fields enum_field opt_enum_opts enum_opts enum_opt message_def msg_elems msg_elem - opt_field_opts field_opts field_opt cardinality type + opt_field_opts field_opts field_opt occurrence type package_def import_def identifiers extend_def extensions_def exts ext + oneof_def oneof_elems oneof_elem option_def service_def rpc_defs rpc_def m_opts name @@ -51,6 +52,7 @@ Terminals import option extensions extend max to + oneof service rpc returns packed deprecated '.' ';' '(' ')' '{' '}' '[' ']' '=' ',' @@ -123,13 +125,13 @@ msg_elems -> msg_elem msg_elems: ['$1' | '$2']. msg_elems -> ';' msg_elems: '$2'. msg_elems -> '$empty': []. -msg_elem -> cardinality type fidentifier '=' dec_lit ';': +msg_elem -> occurrence type fidentifier '=' dec_lit ';': #?gpb_field{occurrence='$1', type='$2', name=identifier_name('$3'), fnum=literal_value('$5'), opts=[]}. -msg_elem -> cardinality type fidentifier '=' dec_lit '[' opt_field_opts ']' ';': +msg_elem -> occurrence type fidentifier '=' dec_lit '[' opt_field_opts ']' ';': #?gpb_field{occurrence='$1', type='$2', name=identifier_name('$3'), @@ -138,6 +140,7 @@ msg_elem -> cardinality type fidentifier '=' dec_lit '[' opt_field_opts ']' ';': msg_elem -> message_def: '$1'. msg_elem -> enum_def: '$1'. msg_elem -> extensions_def: {extensions,lists:sort('$1')}. +msg_elem -> oneof_def: '$1'. fidentifier -> identifier: '$1'. fidentifier -> package: kw_to_identifier('$1'). @@ -190,9 +193,9 @@ field_opt -> deprecated '=' bool_lit: {deprecated, literal_value('$3')}. field_opt -> name: {identifier_name('$1'), true}. field_opt -> name '=' constant: {identifier_name('$1'), '$3'}. -cardinality -> required: required. -cardinality -> optional: optional. -cardinality -> repeated: repeated. +occurrence -> required: required. +occurrence -> optional: optional. +occurrence -> repeated: repeated. type -> double: double. type -> float: float. @@ -234,6 +237,26 @@ ext -> integer: {'$1','$1'}. ext -> integer to integer: {'$1','$3'}. ext -> integer to max: {'$1',max}. +oneof_def -> 'oneof' identifier '{' oneof_elems '}': + #gpb_oneof{name=identifier_name('$2'), + fields='$4'}. + +oneof_elems -> oneof_elem oneof_elems: ['$1' | '$2']. +oneof_elems -> oneof_elem: ['$1']. + +oneof_elem -> type fidentifier '=' dec_lit ';': + #?gpb_field{occurrence=optional, + type='$1', + name=identifier_name('$2'), + fnum=literal_value('$4'), + opts=[]}. +oneof_elem -> type fidentifier '=' dec_lit '[' opt_field_opts ']' ';': + #?gpb_field{occurrence=optional, + type='$1', + name=identifier_name('$2'), + fnum=literal_value('$4'), + opts='$6'}. + extend_def -> extend identifier '{' msg_elems '}': {{extend,identifier_name('$2')},'$4'}. @@ -381,6 +404,8 @@ flatten_fields(FieldsOrDefs, FullName) -> {RFields2, Defs2} = lists:foldl(fun(#?gpb_field{}=F, {Fs,Ds}) -> {[F | Fs], Ds}; + (#gpb_oneof{}=O, {Fs,Ds}) -> + {[O | Fs], Ds}; (Def, {Fs,Ds}) -> QDefs = flatten_qualify_defnames([Def], FullName), {Fs, QDefs++Ds} @@ -424,7 +449,11 @@ resolve_field_refs(Fields, Defs, Root, FullName, Reasons) -> {Field, [Reason | Acc]} end; (#?gpb_field{}=Field, Acc) -> - {Field, Acc} + {Field, Acc}; + (#gpb_oneof{fields=OFields1}=Oneof, Acc) -> + {OFields2, Acc2} = + resolve_field_refs(OFields1, Defs, Root, FullName, Acc), + {Oneof#gpb_oneof{fields=OFields2}, Acc2} end, Reasons, Fields). @@ -568,6 +597,9 @@ verify_field_defaults({{msg,M}, Fields}, AllDefs) -> false -> ok end, + add_acc(Acc, Res); + (#gpb_oneof{fields=OFields}, Acc) -> + Res = verify_field_defaults({{msg,M},OFields}, AllDefs), add_acc(Acc, Res) end, ok, @@ -696,7 +728,9 @@ reformat_fields(Fields) -> fun(#?gpb_field{type={T,Nm}}=F) -> F#?gpb_field{type={T,reformat_name(Nm)}}; (#?gpb_field{}=F) -> - F + F; + (#gpb_oneof{fields=Fs}=O) -> + O#gpb_oneof{fields=reformat_fields(Fs)} end, Fields). @@ -741,7 +775,12 @@ enumerate_msg_fields(Defs) -> Defs). enumerate_fields(Fields) -> - lists:map(fun({I, #?gpb_field{}=F}) -> F#?gpb_field{rnum=I} end, + lists:map(fun({I, #?gpb_field{}=F}) -> + F#?gpb_field{rnum=I}; + ({I, #gpb_oneof{fields=Fs}=O}) -> + NewFields = [F#?gpb_field{rnum=I} || F <- Fs], + O#gpb_oneof{rnum=I, fields=NewFields} + end, index_seq(2, Fields)). index_seq(_Start, []) -> []; @@ -759,7 +798,9 @@ normalize_msg_field_options(Defs) -> normalize_field_options(Fields) -> lists:map(fun(#?gpb_field{opts=Opts}=F) -> Opts1 = normalize_field_options_2(Opts), - F#?gpb_field{opts = Opts1} + F#?gpb_field{opts = Opts1}; + (#gpb_oneof{fields=Fs}=O) -> + O#gpb_oneof{fields=normalize_field_options(Fs)} end, Fields). diff --git a/src/gpb_scan.xrl b/src/gpb_scan.xrl index 8ae4a3b3..f1213502 100644 --- a/src/gpb_scan.xrl +++ b/src/gpb_scan.xrl @@ -30,6 +30,7 @@ Rules. required : {token, {required,TokenLine}}. optional : {token, {optional,TokenLine}}. repeated : {token, {repeated,TokenLine}}. +oneof : {token, {oneof,TokenLine}}. double : {token, {double,TokenLine}}. float : {token, {float,TokenLine}}. diff --git a/test/gpb_parse_tests.erl b/test/gpb_parse_tests.erl index 60cf8a2d..93479352 100644 --- a/test/gpb_parse_tests.erl +++ b/test/gpb_parse_tests.erl @@ -30,6 +30,29 @@ parses_simple_msg_test() -> " optional uint32 x = 1;", "}"]). +parses_simple_oneof_test() -> + {ok, [{{msg,'Msg'}, + [#gpb_oneof{ + name=x, + fields=[#field{name=a1, fnum=1, type=uint32, occurrence=optional}, + #field{name=a2, fnum=2, type=string, occurrence=optional} + ]}]}]=Defs} = + parse_lines( + ["message Msg {", + " oneof x {", + " uint32 a1 = 1;", + " string a2 = 2;", + " };", + "}"]), + %% Verify oneof fields are enumerated to get the same rnum + %% (since they occupy the same record position) + [{{msg,'Msg'}, [#gpb_oneof{ + name=x, + rnum=2, + fields=[#field{name=a1, rnum=2}, + #field{name=a2, rnum=2}]}]}] = + do_process_sort_defs(Defs). + parses_default_value_test() -> {ok, [{{msg,'Msg'}, [#?gpb_field{name=x, type=uint32, fnum=1, occurrence=optional, @@ -88,6 +111,30 @@ parses_relative_nested_messages_test() -> {{msg,'m1.m5'}, [#?gpb_field{name=fx}]}] = do_process_sort_defs(Elems). +parses_relative_nested_messages_with_oneof_test() -> + {ok, Elems} = parse_lines(["message t1 {", + " message t2 {", + " required uint32 bb = 18;", + " }", + "}", + "message m1 {", + " message m2 {", + " required uint32 aa = 17;", + " }", + " oneof x {", + " .t1.t2 xa1 = 2;", + " m2 xa2 = 3;", + " }", + "}"]), + [{{msg,m1}, [#gpb_oneof{ + name=x, + fields=[#?gpb_field{name=xa1, type={msg,'t1.t2'}}, + #?gpb_field{name=xa2, type={msg,'m1.m2'}}]}]}, + {{msg,'m1.m2'}, [#?gpb_field{}]}, + {{msg,'t1'}, []}, + {{msg,'t1.t2'}, [#?gpb_field{}]}] = + do_process_sort_defs(Elems). + parses_circular_messages_test() -> {ok, Elems} = parse_lines(["message m1 {", " optional m1 f = 1;", From e8fa7b661a861b522530633c8df300c4b9d768c0 Mon Sep 17 00:00:00 2001 From: Tomas Abrahamsson Date: Wed, 15 Oct 2014 01:28:52 +0200 Subject: [PATCH 2/8] Document internal representation of oneof For oneof fields, input to encoding, as well as output from decoding, is as follows, the oneof field is either undefined or a two-tuple, {Name, Value}. Example: Given the following protobuf messages message m1 { oneof x { uint32 alt1 = 1; m2 alt2 = 2; }; } message m2 { repeated uint32 f1 = 1; } Then the following are valid "erlang-native" types for gpb for this message: #m1{x = undefined} #m1{x = {alt1, 4711}} #m1{x = {alt2, #m2{f1 = [17, 18, 19]}}} --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 6b8ffd1e..d29e8926 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,8 @@ Mapping of protocol buffer datatypes to erlang string unicode string, thus list of integers ---------------------------------------------------------------- bytes binary + ---------------------------------------------------------------- + oneof {ChosenFieldName, Value} Interaction with rebar From 266b9dace6b945cc7001c4ca6767d56e6e02a39e Mon Sep 17 00:00:00 2001 From: Tomas Abrahamsson Date: Wed, 15 Oct 2014 01:29:23 +0200 Subject: [PATCH 3/8] Handle oneof in gpb --- src/gpb.erl | 120 ++++++++++++++++++++++++++++++++++++++++++--- test/gpb_tests.erl | 93 +++++++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+), 8 deletions(-) diff --git a/src/gpb.erl b/src/gpb.erl index c0b8abea..25274423 100644 --- a/src/gpb.erl +++ b/src/gpb.erl @@ -146,6 +146,8 @@ new_initial_msg({msg,MsgName}=MsgKey, MsgDefs) -> SubMsg = new_initial_msg(FMsgKey, MsgDefs), setelement(RNum, Record, SubMsg); (#?gpb_field{}, Record) -> + Record; + (#gpb_oneof{}, Record) -> Record end, erlang:make_tuple(length(MsgDef)+1, undefined, [{1,MsgName}]), @@ -155,11 +157,11 @@ decode_field(Bin, MsgDef, MsgDefs, Msg) when byte_size(Bin) > 0 -> {Key, Rest} = decode_varint(Bin, 32), FieldNum = Key bsr 3, WireType = Key band 7, - case lists:keysearch(FieldNum, #?gpb_field.fnum, MsgDef) of + case find_field(FieldNum, MsgDef) of false -> Rest2 = skip_field(Rest, WireType), decode_field(Rest2, MsgDef, MsgDefs, Msg); - {value, #?gpb_field{type = FieldType, rnum=RNum} = FieldDef} -> + {#?gpb_field{name=FName, type=FieldType, rnum=RNum}=FieldDef, IsOneof} -> case fielddef_matches_wiretype_get_packed(WireType, FieldDef) of {yes,true} -> AccSeq = element(RNum, Msg), @@ -169,7 +171,11 @@ decode_field(Bin, MsgDef, MsgDefs, Msg) when byte_size(Bin) > 0 -> decode_field(Rest2, MsgDef, MsgDefs, NewMsg); {yes,false} -> {NewValue, Rest2} = decode_type(FieldType, Rest, MsgDefs), - NewMsg = add_field(NewValue, FieldDef, MsgDefs, Msg), + NewMsg = if IsOneof -> + setelement(RNum, Msg, {FName, NewValue}); + not IsOneof -> + add_field(NewValue, FieldDef, MsgDefs, Msg) + end, decode_field(Rest2, MsgDef, MsgDefs, NewMsg); no -> Rest2 = skip_field(Rest, WireType), @@ -187,6 +193,18 @@ decode_field(<<>>, MsgDef, _MsgDefs, Record0) -> Record0, RepeatedRNums). +find_field(N, [#?gpb_field{fnum=N}=F | _]) -> + {F, false}; +find_field(N, [#?gpb_field{} | Rest]) -> + find_field(N, Rest); +find_field(N, [#gpb_oneof{fields=Fs} | Rest]) -> + case lists:keyfind(N, #?gpb_field.fnum, Fs) of + #?gpb_field{}=F -> {F, true}; + false -> find_field(N, Rest) + end; +find_field(_, []) -> + false. + fielddef_matches_wiretype_get_packed(WireType, #?gpb_field{type=Type}=FieldDef)-> IsPacked = is_packed(FieldDef), ExpectedWireType = if IsPacked -> encode_wiretype(bytes); @@ -346,6 +364,35 @@ merge_msgs(PrevMsg, NewMsg, MsgDefs) case element(RNum, NewMsg) of undefined -> AccRecord; NewValue -> setelement(RNum, AccRecord, NewValue) + end; + (#gpb_oneof{rnum=RNum, fields=OFields}, AccRecord) -> + %% The language guide for oneof says that + %% + %% "If the parser encounters multiple members of the + %% same oneof on the wire, only the last member seen + %% is used in the parsed message." + %% + %% In practice, this seems to mean they are merged, + %% at least according to experiments with generated c++ code. + %% + case {element(RNum, AccRecord), element(RNum, NewMsg)} of + {undefined, undefined} -> + AccRecord; + {undefined, NewElem} -> + setelement(RNum, AccRecord, NewElem); + {_PrevElem, undefined} -> + AccRecord; + {{OFName, PrevValue}, {OFName, NewValue}=NewElem} -> + case lists:keyfind(OFName, #?gpb_field.name, OFields) of + #?gpb_field{type={msg,_}} -> + NewSub = merge_msgs(PrevValue, NewValue, MsgDefs), + setelement(RNum, AccRecord, {OFName,NewSub}); + #?gpb_field{} -> + setelement(RNum, AccRecord, NewElem) + end; + {_PrevElem, NewElem} -> + %% oneof fields + setelement(RNum, AccRecord, NewElem) end end, PrevMsg, @@ -357,15 +404,25 @@ encode_msg(Msg, MsgDefs) -> MsgDef = keyfetch({msg, MsgName}, MsgDefs), encode_2(MsgDef, Msg, MsgDefs, <<>>). -encode_2([Field | Rest], Msg, MsgDefs, Acc) -> +encode_2([#?gpb_field{occurrence=Occurrence}=Field | Rest], Msg, MsgDefs, Acc) -> EncodedField = - case {Field#?gpb_field.occurrence, is_packed(Field)} of + case {Occurrence, is_packed(Field)} of {repeated, true} -> encode_packed(Field, Msg, MsgDefs); _ -> encode_field(Field, Msg, MsgDefs) end, encode_2(Rest, Msg, MsgDefs, <>); +encode_2([#gpb_oneof{fields=Fields, rnum=RNum} | Rest], Msg, MsgDefs, Acc) -> + case element(RNum, Msg) of + {Name, Value} -> + Field = lists:keyfind(Name, #?gpb_field.name, Fields), + NewAcc = encode_2([Field], setelement(RNum, Msg, Value), MsgDefs, + Acc), + encode_2(Rest, Msg, MsgDefs, NewAcc); + undefined -> + encode_2(Rest, Msg, MsgDefs, Acc) + end; encode_2([], _Msg, _MsgDefs, Acc) -> Acc. @@ -541,7 +598,22 @@ verify_fields(Msg, Fields, Path, MsgDefs) when tuple_size(Msg) lists:foreach( fun(#?gpb_field{name=Name, type=Type, rnum=RNum, occurrence=Occurrence}) -> Value = element(RNum, Msg), - verify_value(Value, Type, Occurrence, Path++[Name], MsgDefs) + verify_value(Value, Type, Occurrence, Path++[Name], MsgDefs); + (#gpb_oneof{name=Name, rnum=RNum, fields=OFields}) -> + case element(RNum, Msg) of + {FName, Value} -> + case lists:keyfind(FName, #?gpb_field.name, OFields) of + #?gpb_field{type=Type} -> + verify_value(Value, Type, optional, Path++[Name], + MsgDefs); + false -> + mk_type_error(bad_oneof_indicator, FName, Path) + end; + undefined -> + ok; + Other -> + mk_type_error(bad_oneof_value, Other, Path) + end end, Fields); verify_fields(Msg, _Fields, Path, _MsgDefs) -> @@ -670,20 +742,52 @@ proplists_to_defs_records(Defs) -> || Def <- Defs]. field_records_to_proplists(Fields) when is_list(Fields) -> - [field_record_to_proplist(F) || F <- Fields]. + [case F of + #?gpb_field{} -> field_record_to_proplist(F); + #gpb_oneof{} -> oneof_record_to_proplist(F) + end + || F <- Fields]. field_record_to_proplist(#?gpb_field{}=F) -> Names = record_info(fields, ?gpb_field), lists:zip(Names, tl(tuple_to_list(F))). +oneof_record_to_proplist(#gpb_oneof{}=F) -> + Names = record_info(fields, gpb_oneof), + [if FName == fields -> {FName, field_records_to_proplists(FValue)}; + FName /= fields -> {FName, FValue} + end + || {FName, FValue} <- lists:zip(Names, tl(tuple_to_list(F)))]. + proplists_to_field_records(PLs) -> - [proplist_to_field_record(PL) || PL <- PLs]. + [case {is_field_pl(PL), is_oneof_pl(PL)} of + {true, false} -> proplist_to_field_record(PL); + {false, true} -> proplist_to_oneof_record(PL) + end + || PL <- PLs]. + +is_field_pl(PL) -> are_all_fields_present(record_info(fields, ?gpb_field), PL). + +is_oneof_pl(PL) -> are_all_fields_present(record_info(fields, gpb_oneof), PL). + +are_all_fields_present(FNames, PL) -> + lists:all(fun(FName) -> lists:keymember(FName, 1, PL) end, + FNames). proplist_to_field_record(PL) when is_list(PL) -> Names = record_info(fields, ?gpb_field), RFields = [proplists:get_value(Name, PL) || Name <- Names], list_to_tuple([?gpb_field | RFields]). +proplist_to_oneof_record(PL) when is_list(PL) -> + Names = record_info(fields, gpb_oneof), + RFields = [proplists:get_value(Name, PL) || Name <- Names], + list_to_tuple( + [gpb_oneof | [if N == fields -> proplists_to_field_records(V); + N /= fields -> V + end + || {N, V} <- lists:zip(Names, RFields)]]). + rpc_records_to_proplists(Rpcs) when is_list(Rpcs) -> [rpc_record_to_proplist(R) || R <- Rpcs]. diff --git a/test/gpb_tests.erl b/test/gpb_tests.erl index a03242c0..01343003 100644 --- a/test/gpb_tests.erl +++ b/test/gpb_tests.erl @@ -28,6 +28,7 @@ %% but actually seems to work. Better than duplicating tests) -ifndef(gpb_compile_common_tests). %% non-shared code below vvvvvvvvv -module(gpb_tests). + %-compile(export_all). -import(gpb, [decode_msg/3, encode_msg/2, merge_msgs/3, verify_msg/2]). @@ -797,3 +798,95 @@ version_test() -> {ok, [{application, App, PL}]} = file:consult(AppFile), ?assertEqual(S, proplists:get_value(vsn, PL)), ok. + +-ifndef(gpb_compile_common_tests). %% non-shared code below vvvvvvvvv + +skipping_with_oneof_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}, + #?gpb_field{name=a2, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}], + #m1{a = undefined} = decode_msg(<<32,150,1>>, m1, Defs). + +encode_oneof_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}, + #?gpb_field{name=a2, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}], + <<>> = encode_msg(#m1{a=undefined}, Defs), + <<8,150,1>> = encode_msg(#m1{a={a1,150}}, Defs), + <<16,150,1>> = encode_msg(#m1{a={a2,150}}, Defs). + +decoding_oneof_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}, + #?gpb_field{name=a2, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}], + #m1{a = undefined} = decode_msg(<<>>, m1, Defs), + #m1{a = {a1, 150}} = decode_msg(<<8, 150, 1>>, m1, Defs), + #m1{a = {a2, 150}} = decode_msg(<<16, 150, 1>>, m1, Defs). + +merge_oneof_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}, + #?gpb_field{name=a2, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}], + #m1{a = undefined} = merge_msgs(#m1{a=undefined}, #m1{a=undefined}, Defs), + #m1{a = {a1, 150}} = merge_msgs(#m1{a={a1, 150}}, #m1{a=undefined}, Defs), + #m1{a = {a1, 150}} = merge_msgs(#m1{a=undefined}, #m1{a={a1, 150}}, Defs), + #m1{a = {a1, 151}} = merge_msgs(#m1{a={a1, 150}}, #m1{a={a1, 151}}, Defs), + #m1{a = {a2, 150}} = merge_msgs(#m1{a={a1, 150}}, #m1{a={a2, 150}}, Defs). + +merge_oneof_msg_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=x, fnum=1, rnum=#m1.a, + type={msg,m2},occurrence=optional, + opts=[]}, + #?gpb_field{name=y, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}, + {{msg,m2}, [#?gpb_field{name=b, fnum=1, rnum=#m2.b, + type=fixed32, occurrence=repeated}]}], + %% Will get b=[1,2] since messages are merged + #m1{a = {x, #m2{b=[1,2]}}} = merge_msgs(#m1{a={x,#m2{b=[1]}}}, + #m1{a={x,#m2{b=[2]}}}, + Defs), + %% Different oneof fields ==> no merge + #m1{a = {y,150}} = merge_msgs(#m1{a={x,#m2{b=[1]}}}, + #m1{a={y,150}}, + Defs). + +verify_invalid_oneof_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}, + #?gpb_field{name=a2, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}], + ok = verify_msg(#m1{a=undefined}, Defs), + ok = verify_msg(#m1{a={a1,150}}, Defs), + ?verify_gpb_err(verify_msg(#m1{a=150}, Defs)), + ?verify_gpb_err(verify_msg(#m1{a={a1,150,3}}, Defs)), + ?verify_gpb_err(verify_msg(#m1{a={a3,150}}, Defs)), + ?verify_gpb_err(verify_msg(#m1{a={a1,false}}, Defs)). + +-endif. From d189eca079718538240eca8364c3c1c66918ad77 Mon Sep 17 00:00:00 2001 From: Tomas Abrahamsson Date: Sat, 18 Oct 2014 21:00:43 +0200 Subject: [PATCH 4/8] Handle oneof in gpb_compile --- src/gpb_compile.erl | 577 +++++++++++++++++++++++++++---------- test/gpb_compile_tests.erl | 13 +- test/gpb_tests.erl | 178 ++++++------ 3 files changed, 515 insertions(+), 253 deletions(-) diff --git a/src/gpb_compile.erl b/src/gpb_compile.erl index 294db58e..7b6fed80 100644 --- a/src/gpb_compile.erl +++ b/src/gpb_compile.erl @@ -941,8 +941,13 @@ locate_import_aux([], Import, _Opts) -> try_topsort_defs(Defs) -> G = digraph:new(), [digraph:add_vertex(G, M) || {{msg,M}, _Fields} <- Defs], - [[digraph:add_edge(G, From, To) || #?gpb_field{type={msg,To}} <- Fields] - || {{msg,From},Fields} <- Defs], + fold_msg_fields(fun(From, #?gpb_field{type={msg,To}}, _) -> + digraph:add_edge(G, From, To); + (_MsgName, _Feild, _Acc) -> + ok + end, + ok, + Defs), case digraph_utils:topsort(G) of false -> digraph:delete(G), @@ -1034,18 +1039,46 @@ find_num_packed_fields(Defs) -> 0, Defs). +%% Loop over all message fields, including oneof-fields +%% Call Fun for all #?gpb_fields{}, skip over non-msg defs fold_msg_fields(Fun, InitAcc, Defs) -> - lists:foldl(fun({{msg, MsgName}, Fields}, Acc) -> - lists:foldl(fun(Field, FAcc) -> - Fun(MsgName, Field, FAcc) - end, - Acc, - Fields); - (_Def, Acc) -> - Acc - end, - InitAcc, - Defs). + lists:foldl( + fun({{msg, MsgName}, Fields}, Acc) -> + FFun = fun(Field, FAcc) -> Fun(MsgName, Field, FAcc) end, + fold_msgdef_fields(FFun, Acc, Fields); + (_Def, Acc) -> + Acc + end, + InitAcc, + Defs). + +fold_msgdef_fields(Fun, InitAcc, Fields) -> + lists:foldl( + fun(#?gpb_field{}=Field, Acc) -> + Fun(Field, Acc); + (#gpb_oneof{fields=OFields}, Acc) -> + lists:foldl(fun(OField, OAcc) -> Fun(OField, OAcc) end, + Acc, + OFields) + end, + InitAcc, + Fields). + +%% The fun takes two args: Fun(#?gpb_field{}, IsOneofField) -> term() +map_msgdef_fields_o(Fun, Fields) -> + lists:reverse( + lists:foldl( + fun(#?gpb_field{}=Field, Acc) -> + [Fun(Field, false) | Acc]; + (#gpb_oneof{name=CFName, fields=OFields}, Acc) -> + IsOneOf = {true, CFName}, + lists:foldl(fun(OField, OAcc) -> [Fun(OField, IsOneOf) | OAcc] + end, + Acc, + OFields) + end, + [], + Fields)). find_num_fields(Defs) -> lists:foldl(fun({MsgName, MsgDef}, Acc) -> @@ -1072,6 +1105,8 @@ find_msgsize(MsgName, Defs, T) -> Result end. +find_msgsize_2([#gpb_oneof{} | _], _AccSize, _Defs, _T) -> + undefined; find_msgsize_2([#?gpb_field{occurrence=repeated} | _], _AccSize, _Defs, _T) -> undefined; find_msgsize_2([#?gpb_field{occurrence=optional} | _], _AccSize, _Defs, _T) -> @@ -1172,7 +1207,7 @@ d_field_pass_method(MsgDef) -> NF == 0 -> pass_as_params; true -> - NumSubMsgFields = length([x || #?gpb_field{type={msg,_}} <- MsgDef]), + NumSubMsgFields = count_submsg_fields(MsgDef), IsMsgDominatedBySubMsgs = NumSubMsgFields / NF > 0.5, if IsMsgDominatedBySubMsgs, NF >= 100 -> pass_as_record; @@ -1181,6 +1216,12 @@ d_field_pass_method(MsgDef) -> end end. +count_submsg_fields(MsgDef) -> + fold_msgdef_fields(fun(#?gpb_field{type={msg,_}}, N) -> N+1; + (#?gpb_field{}, N) -> N + end, + 0, + MsgDef). %% -- generating code ---------------------------------------------- @@ -1414,7 +1455,7 @@ format_msg_encoder(MsgName, [], _Opts) -> <<>> end); format_msg_encoder(MsgName, MsgDef, Opts) -> - FNames = [FName || #?gpb_field{name=FName} <- MsgDef], + FNames = get_field_names(MsgDef), FVars = [var_f_n(I) || I <- lists:seq(1, length(FNames))], BVars = [var_b_n(I) || I <- lists:seq(1, length(FNames)-1)] ++ [last], {EncodeExprs, _} = @@ -1439,7 +1480,7 @@ format_msg_encoder(MsgName, MsgDef, Opts) -> end), "\n", gpb_codegen:format_fn( - mk_fn(e_msg_, MsgName), + FnName, fun('', Bin) -> '' end, @@ -1447,7 +1488,14 @@ format_msg_encoder(MsgName, MsgDef, Opts) -> mapping_match(MsgName, lists:zip(FNames, FVars), Opts)), splice_trees('', EncodeExprs)])]. -field_encode_expr(MsgName, Field, FVar, PrevBVar) -> +get_field_names(MsgDef) -> + [case Field of + #?gpb_field{name=FName} -> FName; + #gpb_oneof{name=FName} -> FName + end + || Field <- MsgDef]. + +field_encode_expr(MsgName, #?gpb_field{}=Field, FVar, PrevBVar) -> FEncoder = mk_field_encode_fn_name(MsgName, Field), #?gpb_field{occurrence=Occurrence, type=Type, fnum=FNum}=Field, Transforms = [replace_tree('', FVar), @@ -1471,7 +1519,32 @@ field_encode_expr(MsgName, Field, FVar, PrevBVar) -> ?expr( ''('', <<''/binary, ''>>), Transforms) - end. + end; +field_encode_expr(MsgName, #gpb_oneof{fields=OFields}, FVar, PrevBVar) -> + OFVar = prefix_var("O", FVar), + OneofClauseTransform = + repeat_clauses( + '', + [begin + MatchPattern = ?expr({'', ''}, + [replace_term('', Name), + replace_tree('', OFVar)]), + %% undefined is already handled, we have a match, + %% the field occurs, as if it had been required + OField2 = OField#?gpb_field{occurrence=required}, + EncExpr = field_encode_expr(MsgName, OField2, OFVar, PrevBVar), + [replace_tree('', MatchPattern), + replace_tree('', EncExpr)] + end + || #?gpb_field{name=Name}=OField <- OFields]), + ?expr(case '' of + undefined -> ''; + '' -> '' + end, + [replace_tree('', FVar), + replace_tree('', PrevBVar), + OneofClauseTransform]). +%%). mk_field_encode_fn_name(MsgName, #?gpb_field{occurrence=repeated, name=FName}) -> mk_fn(e_field_, MsgName, FName); @@ -1491,19 +1564,22 @@ mk_field_encode_fn_name(_MsgName, #?gpb_field{type=Type}) -> mk_fn(e_type_, Type). format_special_field_encoders(Defs, AnRes) -> - [[format_field_encoder(MsgName, FieldDef, AnRes) - || #?gpb_field{occurrence=Occ, type=Type}=FieldDef <- MsgDef, - Occ == repeated orelse is_msg_type(Type)] - || {{msg,MsgName}, MsgDef} <- Defs]. - -is_msg_type({msg,_}) -> true; -is_msg_type(_) -> false. + lists:reverse( %% so generated auxiliary functions come in logical order + fold_msg_fields( + fun(MsgName, #?gpb_field{occurrence=repeated}=FieldDef, Acc) -> + [format_field_encoder(MsgName, FieldDef, AnRes) | Acc]; + (MsgName, #?gpb_field{type={msg,_}}=FieldDef, Acc)-> + [format_field_encoder(MsgName, FieldDef, AnRes) | Acc]; + (_MsgName, #?gpb_field{}, Acc) -> + Acc + end, + [], + Defs)). format_field_encoder(MsgName, FieldDef, AnRes) -> #?gpb_field{occurrence=Occurrence} = FieldDef, - [possibly_format_mfield_encoder(MsgName, - FieldDef#?gpb_field{occurrence=required}, - AnRes), + RFieldDef = FieldDef#?gpb_field{occurrence=required}, + [possibly_format_mfield_encoder(MsgName, RFieldDef, AnRes), case {Occurrence, is_packed(FieldDef)} of {repeated, false} -> format_repeated_field_encoder2(MsgName, FieldDef); {repeated, true} -> format_packed_field_encoder2(MsgName, FieldDef); @@ -1909,12 +1985,17 @@ format_msg_generic_decoder(Bindings, MsgName, MsgDef, AnRes, Opts) -> MsgName, MsgDef, AnRes, Opts))]). msg_decoder_initial_params(MsgName, MsgDef, AnRes, Opts) -> - FNVExprs = [case Occurrence of - repeated -> {FName, [], ?expr([])}; - required -> {FName, undefined, ?expr(undefined)}; - optional -> {FName, undefined, ?expr(undefined)} + FNVExprs = [case Field of + #?gpb_field{name=FName, occurrence=Occurrence} -> + case Occurrence of + repeated -> {FName, [], ?expr([])}; + required -> {FName, undefined, ?expr(undefined)}; + optional -> {FName, undefined, ?expr(undefined)} + end; + #gpb_oneof{name=FName} -> + {FName, undefined, ?expr(undefined)} end - || #?gpb_field{name=FName, occurrence=Occurrence} <- MsgDef], + || Field <- MsgDef], case get_field_pass(MsgName, AnRes) of pass_as_params -> [Expr || {_FName, _Value, Expr} <- FNVExprs]; @@ -2037,16 +2118,17 @@ decoder_skip_calls(Bindings, MsgName) -> replace_term(skip_32, mk_fn(skip_32_, MsgName))]). decoder_field_selectors(MsgName, MsgDef) -> - [begin - Wiretype = case is_packed(FieldDef) of - true -> gpb:encode_wiretype(bytes); - false -> gpb:encode_wiretype(Type) - end, - Selector = (FNum bsl 3) bor Wiretype, - DecodeFn = mk_fn(d_field_, MsgName, FName), - {Selector, DecodeFn} - end - || #?gpb_field{fnum=FNum, type=Type, name=FName}=FieldDef <- MsgDef]. + map_msgdef_fields_o( + fun(#?gpb_field{name=FName, fnum=FNum, type=Type}=FieldDef, _IsOneof) -> + Wiretype = case is_packed(FieldDef) of + true -> gpb:encode_wiretype(bytes); + false -> gpb:encode_wiretype(Type) + end, + Selector = (FNum bsl 3) bor Wiretype, + DecodeFn = mk_fn(d_field_, MsgName, FName), + {Selector, DecodeFn} + end, + MsgDef). decoder_finalize_result(Params, FFields, MsgName, MsgDef, AnRes, Opts) -> case get_field_pass(MsgName, AnRes) of @@ -2054,7 +2136,8 @@ decoder_finalize_result(Params, FFields, MsgName, MsgDef, AnRes, Opts) -> mapping_create( MsgName, [begin - #?gpb_field{name=FName, occurrence=Occurrence}=Field, + FName = get_field_name(Field), + Occurrence = get_field_occurrence(Field), FValueExpr = case Occurrence of required -> Param; @@ -2081,35 +2164,42 @@ decoder_finalize_result(Params, FFields, MsgName, MsgDef, AnRes, Opts) -> end. format_field_decoders(MsgName, MsgDef, AnRes, Opts) -> - [[format_field_decoder(MsgName, Field, AnRes, Opts), "\n"] - || Field <- MsgDef]. + map_msgdef_fields_o( + fun(Field, IsOneof) -> + [format_field_decoder(MsgName, Field, IsOneof, AnRes, Opts), "\n"] + end, + MsgDef). -format_field_decoder(MsgName, Field, AnRes, Opts) -> +format_field_decoder(MsgName, Field, IsOneof, AnRes, Opts) -> case is_packed(Field) of - false -> format_non_packed_field_decoder(MsgName, Field, AnRes, Opts); - true -> format_packed_field_decoder(MsgName, Field, AnRes, Opts) + false -> + XField = {Field, IsOneof}, + format_non_packed_field_decoder(MsgName, XField, AnRes, Opts); + true -> + %% a packed field can never be one of a `oneof' fields + format_packed_field_decoder(MsgName, Field, AnRes, Opts) end. -format_non_packed_field_decoder(MsgName, Field, AnRes, Opts) -> - #?gpb_field{type=Type} = Field, +format_non_packed_field_decoder(MsgName, XField, AnRes, Opts) -> + {#?gpb_field{type=Type}, _IsOneof} = XField, case Type of - sint32 -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts); - sint64 -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts); - int32 -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts); - int64 -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts); - uint32 -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts); - uint64 -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts); - bool -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts); - {enum,_} -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts); - fixed32 -> format_fixlen_field_decoder(MsgName, Field, AnRes, Opts); - sfixed32 -> format_fixlen_field_decoder(MsgName, Field, AnRes, Opts); - float -> format_fixlen_field_decoder(MsgName, Field, AnRes, Opts); - fixed64 -> format_fixlen_field_decoder(MsgName, Field, AnRes, Opts); - sfixed64 -> format_fixlen_field_decoder(MsgName, Field, AnRes, Opts); - double -> format_fixlen_field_decoder(MsgName, Field, AnRes, Opts); - string -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts); - bytes -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts); - {msg,_} -> format_vi_based_field_decoder(MsgName, Field, AnRes, Opts) + sint32 -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts); + sint64 -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts); + int32 -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts); + int64 -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts); + uint32 -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts); + uint64 -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts); + bool -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts); + {enum,_} -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts); + fixed32 -> format_fixlen_field_decoder(MsgName, XField, AnRes, Opts); + sfixed32 -> format_fixlen_field_decoder(MsgName, XField, AnRes, Opts); + float -> format_fixlen_field_decoder(MsgName, XField, AnRes, Opts); + fixed64 -> format_fixlen_field_decoder(MsgName, XField, AnRes, Opts); + sfixed64 -> format_fixlen_field_decoder(MsgName, XField, AnRes, Opts); + double -> format_fixlen_field_decoder(MsgName, XField, AnRes, Opts); + string -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts); + bytes -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts); + {msg,_} -> format_vi_based_field_decoder(MsgName, XField, AnRes, Opts) end. format_packed_field_decoder(MsgName, FieldDef, AnRes, Opts) -> @@ -2205,20 +2295,20 @@ format_dpacked_vi(MsgName, #?gpb_field{name=FName}=FieldDef, Opts) -> end, [splice_trees('', Body)]). -format_vi_based_field_decoder(MsgName, FieldDef, AnRes, Opts) -> - #?gpb_field{name=FName}=FieldDef, +format_vi_based_field_decoder(MsgName, XFieldDef, AnRes, Opts) -> + {#?gpb_field{name=FName}=FieldDef, _IsOneof}=XFieldDef, ExtValue = ?expr(X bsl N + Acc), FVar = ?expr(NewFValue), %% result is to be put in this variable Rest = ?expr(Rest), Bindings = new_bindings([{'', ExtValue}, {'', Rest}]), Params = decoder_params(MsgName, AnRes), - {InParams, PrevValue} = decoder_in_params(Params, MsgName, FieldDef, AnRes, + {InParams, PrevValue} = decoder_in_params(Params, MsgName, XFieldDef, AnRes, Opts), BodyTailFn = fun(DecodeExprs, Rest2Var) -> ReadFieldDefFn = mk_fn(dfp_read_field_def_, MsgName), - Params2 = updated_merged_params(MsgName, FieldDef, AnRes, + Params2 = updated_merged_params(MsgName, XFieldDef, AnRes, FVar, PrevValue, Params, Opts), C = ?exprs(''('', 0, 0, ''), [replace_term('', ReadFieldDefFn), @@ -2335,20 +2425,23 @@ unpack_bytes(ResVar, Value, Rest, Rest2, Opts) -> Transforms) end. -updated_merged_params(MsgName, FieldDef, AnRes, NewValue, PrevValue, +updated_merged_params(MsgName, XFieldDef, AnRes, NewValue, PrevValue, Params, Opts) -> - #?gpb_field{name=FName, rnum=RNum} = FieldDef, - case get_field_pass(MsgName, AnRes) of - pass_as_params -> - MergedValue = merge_field_expr(FieldDef, PrevValue, NewValue), + case {get_field_pass(MsgName, AnRes), XFieldDef} of + {pass_as_params, {#?gpb_field{rnum=RNum}, _IsOneof}} -> + MergedValue = merge_field_expr(XFieldDef, PrevValue, NewValue), lists_setelement(RNum - 1, Params, MergedValue); - pass_as_record -> + {pass_as_record, {#?gpb_field{name=FName}, false}} -> MsgVar = hd(Params), - MergedValue = merge_field_expr(FieldDef, PrevValue, NewValue), - [mapping_update(MsgVar, MsgName, [{FName, MergedValue}], Opts)] + MergedValue = merge_field_expr(XFieldDef, PrevValue, NewValue), + [mapping_update(MsgVar, MsgName, [{FName, MergedValue}], Opts)]; + {pass_as_record, {_OField, {true, CFName}}} -> + MsgVar = hd(Params), + MergedValue = merge_field_expr(XFieldDef, PrevValue, NewValue), + [mapping_update(MsgVar, MsgName, [{CFName, MergedValue}], Opts)] end. -merge_field_expr(FieldDef, PrevValue, NewValue) -> +merge_field_expr({FieldDef, false}, PrevValue, NewValue) -> case classify_field_merge_action(FieldDef) of overwrite -> NewValue; @@ -2364,10 +2457,35 @@ merge_field_expr(FieldDef, PrevValue, NewValue) -> [replace_term('', mk_fn(merge_msg_, FMsgName)), replace_tree('', PrevValue), replace_tree('', NewValue)]) + end; +merge_field_expr({FieldDef, {true, _CFName}}, PrevValue, NewValue) -> + #?gpb_field{name=FName, type=Type} = FieldDef, + case Type of + {msg, FMsgName} -> + MVPrev = prefix_var("MV", PrevValue), + ?expr(case '' of + undefined -> + {'fieldname', ''}; + {'fieldname', ''} -> + {'fieldname', + ''('', '')}; + _ -> + {'fieldname', ''} + end, + [replace_term('', mk_fn(merge_msg_, FMsgName)), + replace_tree('', PrevValue), + replace_tree('', MVPrev), + replace_tree('', NewValue), + replace_term('fieldname', FName)]); + _ -> + %% Replace + ?expr({'fieldname', ''}, + [replace_term('fieldname', FName), + replace_tree('', NewValue)]) end. -decoder_in_params(Params, MsgName, FieldDef, AnRes, Opts) -> - #?gpb_field{name=FName} = FieldDef, +decoder_in_params(Params, MsgName, {FieldDef, false}, AnRes, Opts) -> + #?gpb_field{name=FName}=FieldDef, Any = ?expr(_), case get_field_pass(MsgName, AnRes) of pass_as_params -> @@ -2389,10 +2507,37 @@ decoder_in_params(Params, MsgName, FieldDef, AnRes, Opts) -> seqadd -> {InParams, Prev}; msgmerge -> {InParams, Prev} end + end; +decoder_in_params(Params, MsgName, {FieldDef, {true, CFName}}, AnRes, Opts) -> + #?gpb_field{type=Type, rnum=RNum} = FieldDef, + case Type of + {msg, _} -> + %% oneof fields that of message type may need merging + case get_field_pass(MsgName, AnRes) of + pass_as_params -> + Prev = lists:nth(RNum-1, Params), + {Params, Prev}; + pass_as_record -> + Prev = ?expr(Prev), + MMatch = mapping_match(MsgName, [{CFName, Prev}], Opts), + InParams = [?expr(mmatch = '', + [replace_tree(mmatch, MMatch), + replace_tree('', hd(Params))])], + {InParams, Prev} + end; + _ -> + %% Non-messages, treat as an optional field + Any = ?expr(_), + case get_field_pass(MsgName, AnRes) of + pass_as_params -> + {lists_setelement(RNum-1, Params, Any), Any}; + pass_as_record -> + {Params, Any} + end end. -format_fixlen_field_decoder(MsgName, FieldDef, AnRes, Opts) -> - #?gpb_field{name=FName, type=Type}=FieldDef, +format_fixlen_field_decoder(MsgName, XFieldDef, AnRes, Opts) -> + {#?gpb_field{name=FName, type=Type}, _IsOneof} = XFieldDef, {BitLen, BitTypes} = case Type of fixed32 -> {32, [little]}; sfixed32 -> {32, [little,signed]}; @@ -2402,10 +2547,10 @@ format_fixlen_field_decoder(MsgName, FieldDef, AnRes, Opts) -> double -> {64, [little,float]} end, Params = decoder_params(MsgName, AnRes), - {InParams, PrevValue} = decoder_in_params(Params, MsgName, FieldDef, AnRes, + {InParams, PrevValue} = decoder_in_params(Params, MsgName, XFieldDef, AnRes, Opts), Value = ?expr(Value), - Params2 = updated_merged_params(MsgName, FieldDef, AnRes, + Params2 = updated_merged_params(MsgName, XFieldDef, AnRes, Value, PrevValue, Params, Opts), ReadFieldDefFnName = mk_fn(dfp_read_field_def_, MsgName), @@ -2535,36 +2680,67 @@ format_msg_merger(MsgName, MsgDef, AnRes, Opts) -> MsgUndefFnClauses)]). compute_msg_field_merge_exprs(MsgDef) -> - PFields = [{FName, erl_syntax:variable(?ff("PF~s", [FName]))} - || #?gpb_field{name=FName} <- MsgDef], - NFields = [{FName, erl_syntax:variable(?ff("NF~s", [FName]))} - || #?gpb_field{name=FName} <- MsgDef], + FNames = [get_field_name(Field) || Field <- MsgDef], + PFVars = [erl_syntax:variable(?ff("PF~s", [FName])) || FName <- FNames], + NFVars = [erl_syntax:variable(?ff("NF~s", [FName])) || FName <- FNames], + PFields = lists:zip(FNames, PFVars), + NFields = lists:zip(FNames, NFVars), Mergings = - [begin - {value, {FName, PF}} = lists:keysearch(FName, 1, PFields), - {value, {FName, NF}} = lists:keysearch(FName, 1, NFields), - Transforms = [replace_tree('', PF), - replace_tree('', NF)], - Expr = case classify_field_merge_action(Field) of - overwrite -> - ?expr(if '' =:= undefined -> ''; - true -> '' - end, - Transforms); - seqadd -> - ?expr('' ++ '', Transforms); - msgmerge -> - #?gpb_field{type={msg,SubMsgName}}=Field, - MergeFn = mk_fn(merge_msg_, SubMsgName), - NewTranform = replace_term('', MergeFn), - MTransforms = [NewTranform | Transforms], - ?expr(''('', ''), MTransforms) - end, - {FName, Expr} - end - || #?gpb_field{name=FName}=Field <- MsgDef], + [begin + FName = get_field_name(Field), + {FName, format_field_merge_expr(Field, PF, NF)} + end + || {Field, PF, NF} <- lists:zip3(MsgDef, PFVars, NFVars)], {PFields, NFields, Mergings}. +format_field_merge_expr(#?gpb_field{}=Field, PF, NF) -> + Transforms = [replace_tree('', PF), + replace_tree('', NF)], + case classify_field_merge_action(Field) of + overwrite -> + ?expr(if '' =:= undefined -> ''; + true -> '' + end, + Transforms); + seqadd -> + ?expr('' ++ '', Transforms); + msgmerge -> + #?gpb_field{type={msg,SubMsgName}}=Field, + MergeFn = mk_fn(merge_msg_, SubMsgName), + NewTranform = replace_term('', MergeFn), + MTransforms = [NewTranform | Transforms], + ?expr(''('', ''), MTransforms) + end; +format_field_merge_expr(#gpb_oneof{fields=OFields}, PF, NF) -> + Transforms = [replace_tree('', PF), + replace_tree('', NF)], + case [OField || #?gpb_field{type={msg,_}}=OField <- OFields] of + [] -> + ?expr(if '' =:= undefined -> ''; + true -> '' + end, + Transforms); + MOFields -> + Transforms2 = Transforms ++ + [replace_tree('', prefix_var("O", PF)), + replace_tree('', prefix_var("O", NF))], + ?expr( + case {'', ''} of + '' -> {'T', merge_submsg('','')}; + {_, undefined} -> ''; + _ -> '' + end, + [repeat_clauses( + '', + [[replace_tree('', + ?expr({{'T', ''}, {'T', ''}})), + replace_term('T', OFName), + replace_term(merge_submsg, mk_fn(merge_msg_, M2Name)) + | Transforms2] + || #?gpb_field{name=OFName, type={msg,M2Name}} <- MOFields]) + | Transforms2]) + end. + occurs_as_optional_submsg(MsgName, #anres{msg_occurrences=Occurrences}=AnRes) -> %% Note: order of evaluation below is important (the exprs of `andalso'): %% Messages are present in Occurrences only if they are sub-messages @@ -2672,7 +2848,7 @@ format_msg_verifiers(Defs, AnRes, Opts) -> format_msg_verifier(MsgName, MsgDef, AnRes, Opts) -> FVars = [{var_f_n(I), Field} || {I, Field} <- index_seq(MsgDef)], - RFields = [{FName, Var} || {Var, #?gpb_field{name=FName}} <- FVars], + RFields = [{get_field_name(Field), Var} || {Var, Field} <- FVars], NeedsMatchOther = case get_records_or_maps_by_opts(Opts) of records -> can_occur_as_sub_msg(MsgName, AnRes); maps -> true @@ -2697,41 +2873,70 @@ format_msg_verifier(MsgName, MsgDef, AnRes, Opts) -> replace_term('', MsgName)]). field_verifiers(FVars) -> - [begin - #?gpb_field{name=FName, type=Type, occurrence=Occurrence} = Field, - FVerifierFn = case Type of - {msg,FMsgName} -> mk_fn(v_msg_, FMsgName); - {enum,EnumName} -> mk_fn(v_enum_, EnumName); - Type -> mk_fn(v_type_, Type) - end, - Replacements = [replace_term('', FVerifierFn), - replace_tree('', FVar), - replace_term('', FName), - replace_term('', Type)], - case Occurrence of - required -> - %% FIXME: check especially for `undefined' - %% and if found, error out with required_field_not_set - %% specifying expected type - ?expr(''('', ['' | Path]), - Replacements); - repeated -> - ?expr(if is_list('') -> - [''(Elem, ['' | Path]) - || Elem <- '']; - true -> - mk_type_error( - {invalid_list_of, ''}, '', Path) - end, - Replacements); - optional -> - ?expr(if '' == undefined -> ok; - true -> ''('', ['' | Path]) - end, - Replacements) - end - end - || {FVar, Field} <- FVars]. + [field_verifier(FVar, Field) || {FVar, Field} <- FVars]. + +field_verifier(FVar, + #?gpb_field{name=FName, type=Type, occurrence=Occurrence}) -> + FVerifierFn = case Type of + {msg,FMsgName} -> mk_fn(v_msg_, FMsgName); + {enum,EnumName} -> mk_fn(v_enum_, EnumName); + Type -> mk_fn(v_type_, Type) + end, + Replacements = [replace_term('', FVerifierFn), + replace_tree('', FVar), + replace_term('', FName), + replace_term('', Type)], + case Occurrence of + required -> + %% FIXME: check especially for `undefined' + %% and if found, error out with required_field_not_set + %% specifying expected type + ?expr(''('', ['' | Path]), + Replacements); + repeated -> + ?expr(if is_list('') -> + [''(Elem, ['' | Path]) + || Elem <- '']; + true -> + mk_type_error( + {invalid_list_of, ''}, '', Path) + end, + Replacements); + optional -> + ?expr(if '' == undefined -> ok; + true -> ''('', ['' | Path]) + end, + Replacements) + end; +field_verifier(FVar, + #gpb_oneof{name=FName, fields=OFields}) -> + ?expr( + case '' of + undefined -> + ok; + '' -> + ''('', ['', '' | Path]); + _ -> + mk_type_error(invalid_oneof, '', ['' | Path]) + end, + [replace_tree('', FVar), + replace_term('', FName), + repeat_clauses( + '', + [begin + FVerifierFn = case Type of + {msg,FMsgName} -> mk_fn(v_msg_, FMsgName); + {enum,EnumName} -> mk_fn(v_enum_, EnumName); + Type -> mk_fn(v_type_, Type) + end, + OFVar = prefix_var("O", FVar), + [replace_tree('', ?expr({'',''})), + replace_term('', FVerifierFn), + replace_tree('', OFVar), + replace_term('', OFName)] + end + || #?gpb_field{name=OFName, type=Type} <- OFields])]). + can_occur_as_sub_msg(MsgName, #anres{used_types=UsedTypes}) -> sets:is_element({msg,MsgName}, UsedTypes). @@ -2933,6 +3138,16 @@ field_tree(#?gpb_field{}=F, Opts) -> ?gpb_field, lists:zip(FNames, [erl_parse:abstract(FValue) || FValue <- FValues]), + Opts); +field_tree(#gpb_oneof{fields=OFields}=F, Opts) -> + [gpb_oneof | FValues] = tuple_to_list(F), + FNames = record_info(fields, gpb_oneof), + mapping_create( + gpb_oneof, + [if FName == fields -> {FName, fields_tree(OFields, Opts)}; + FName /= fields -> {FName, erl_parse:abstract(FValue)} + end + || {FName, FValue} <- lists:zip(FNames, FValues)], Opts). format_fetch_msg_defs([]) -> @@ -3227,7 +3442,8 @@ format_hfields(Indent, Fields, CompileOpts, Defs) -> TypeSpecs = get_type_specs_by_opts(CompileOpts), string:join( lists:map( - fun({I, #?gpb_field{name=Name, fnum=FNum, opts=FOpts, occurrence=Occur}=Field}) -> + fun({I, #?gpb_field{name=Name, fnum=FNum, opts=FOpts, + occurrence=Occur}=Field}) -> DefaultStr = case proplists:get_value(default, FOpts, '$no') of '$no' -> case Occur of repeated -> ?f(" = []"); @@ -3254,7 +3470,27 @@ format_hfields(Indent, Fields, CompileOpts, Defs) -> TypeComment = type_to_comment(Field, TypeSpecs), ?f("~s~s% = ~w~s~s", [FieldTxt2, LineUpStr2, FNum, - [", " || TypeComment /= ""], TypeComment]) + [", " || TypeComment /= ""], TypeComment]); + ({I, #gpb_oneof{name=Name}=Field}) -> + TypeStr = ?f("~s", [type_to_typestr(Field, Defs)]), + CommaSep = if I < length(Fields) -> ","; + true -> "" %% last entry + end, + FieldTxt1 = indent(Indent, ?f("~w", [Name])), + FieldTxt2 = if TypeSpecs -> + LineUp = lineup(iolist_size(FieldTxt1), 32), + ?f("~s~s:: ~s~s", [FieldTxt1, LineUp, + TypeStr, CommaSep]); + not TypeSpecs -> + ?f("~s~s", [FieldTxt1, CommaSep]) + end, + LineUpCol2 = if TypeSpecs -> 52; + not TypeSpecs -> 40 + end, + LineUpStr2 = lineup(iolist_size(FieldTxt2), LineUpCol2), + TypeComment = type_to_comment(Field, TypeSpecs), + ?f("~s~s% ~s", + [FieldTxt2, LineUpStr2, TypeComment]) end, index_seq(Fields)), "\n"). @@ -3268,7 +3504,13 @@ type_to_typestr(#?gpb_field{type=Type, occurrence=Occurrence}, Defs) -> required -> type_to_typestr_2(Type, Defs); repeated -> "[" ++ type_to_typestr_2(Type, Defs) ++ "]"; optional -> type_to_typestr_2(Type, Defs) ++ " | 'undefined'" - end. + end; +type_to_typestr(#gpb_oneof{fields=OFields}, Defs) -> + string:join( + ["undefined" + | [?f("{~s, ~s}", [Name, type_to_typestr_2(Type, Defs)]) + || #?gpb_field{name=Name, type=Type} <- OFields]], + " | "). type_to_typestr_2(sint32, _Defs) -> "integer()"; type_to_typestr_2(sint64, _Defs) -> "integer()"; @@ -3308,12 +3550,15 @@ type_to_comment(#?gpb_field{type=Type}, true=_TypeSpec) -> {enum,E} -> "enum "++atom_to_list(E); _ -> "" end; -type_to_comment(#?gpb_field{type=Type, occurrence=Occurrence}, false=_TypeSpec) -> +type_to_comment(#?gpb_field{type=Type, occurrence=Occurrence}, false) -> case Occurrence of required -> ?f("~w", [Type]); repeated -> "[" ++ ?f("~w", [Type]) ++ "]"; optional -> ?f("~w (optional)", [Type]) - end. + end; +type_to_comment(#gpb_oneof{}, _) -> + "oneof". + lineup(CurrentCol, TargetCol) when CurrentCol < TargetCol -> lists:duplicate(TargetCol - CurrentCol, $\s); @@ -4294,9 +4539,11 @@ compile_to_binary(Mod, MsgDefs, ErlCode, PossibleNifCode, Opts) -> || Ts <- FormToks], {AttrForms, CodeForms} = split_forms_at_first_code(Forms), FieldDef = field_record_to_attr_form(), - RpcDef = rpc_record_to_attr_form(), + OneofDef = oneof_record_to_attr_form(), + RpcDef = rpc_record_to_attr_form(), + RecordBaseDefs = [FieldDef, OneofDef, RpcDef], MsgRecordForms = msgdefs_to_record_attrs(MsgDefs), - AllForms = AttrForms ++ [FieldDef] ++ [RpcDef] ++ MsgRecordForms ++ CodeForms, + AllForms = AttrForms ++ RecordBaseDefs ++ MsgRecordForms ++ CodeForms, combine_erl_and_possible_nif(compile:forms(AllForms, Opts), PossibleNifCode). @@ -4329,6 +4576,9 @@ split_forms_at_first_code_2([{function, _, _Name, _, _Clauses}|_]=Code, Acc) -> field_record_to_attr_form() -> record_to_attr(?gpb_field, record_info(fields, ?gpb_field)). +oneof_record_to_attr_form() -> + record_to_attr(gpb_oneof, record_info(fields, gpb_oneof)). + rpc_record_to_attr_form() -> record_to_attr(?gpb_rpc, record_info(fields, ?gpb_rpc)). @@ -4353,7 +4603,9 @@ gpb_field_to_record_field(#?gpb_field{name=FName, opts=Opts}) -> case proplists:get_value(default, Opts) of undefined -> {FName}; Default -> {FName, Default} - end. + end; +gpb_field_to_record_field(#gpb_oneof{name=FName}) -> + {FName}. combine_erl_and_possible_nif(ErlCompilationResult, '$not_generated'=_Nif) -> ErlCompilationResult; @@ -4475,6 +4727,9 @@ var_b_n(N) -> var_n("B", N). var_n(S, N) -> erl_syntax:variable(?ff("~s~w", [S, N])). +prefix_var(Prefix, Var) -> + erl_syntax:variable(Prefix ++ erl_syntax:variable_literal(Var)). + enum_to_binary_fields(Value) -> <> = <>, varint_to_binary_fields(N). @@ -4487,6 +4742,12 @@ varint_to_binary_fields(IntValue) -> [erl_syntax:binary_field(?expr('', [replace_term('', N)]), []) || N <- binary_to_list(gpb:encode_varint(IntValue))]. +get_field_name(#?gpb_field{name=FName}) -> FName; +get_field_name(#gpb_oneof{name=FName}) -> FName. + +get_field_occurrence(#?gpb_field{occurrence=Occurrence}) -> Occurrence; +get_field_occurrence(#gpb_oneof{}) -> optional. + is_packed(#?gpb_field{opts=Opts}) -> lists:member(packed, Opts). diff --git a/test/gpb_compile_tests.erl b/test/gpb_compile_tests.erl index c2aedcda..2a8d1d04 100644 --- a/test/gpb_compile_tests.erl +++ b/test/gpb_compile_tests.erl @@ -119,12 +119,19 @@ field_pass_as_params_test() -> " repeated fixed32 f4 = 4 [packed];", " repeated uint32 f5 = 5;", " repeated uint32 f6 = 5 [packed];", - " optional string f7 = 7;" - " optional m2 f8 = 8; }"], + " optional string f7 = 7;", + " optional m2 f8 = 8;", + " oneof o1 { m2 x1 = 15;", + " uint32 y1 = 16; };", + " oneof o2 { m2 x2 = 25;", + " uint32 y2 = 26; }", + " oneof o3 { m2 x3 = 35;", + " uint32 y3 = 36; }", + "}"], Msg = {m1, 4711, undefined, %% f1,f2 [4713,4714], [4715,4716], %% f3,f4 [4717,4718], [4719,4720], %% f5,f6 - "abc", {m2,33}}, + "abc", {m2,33}, {x1,{m2,45}}, {y2,226}, undefined}, lists:foreach( fun(Opts) -> ?assertMatch({Msg,_}, diff --git a/test/gpb_tests.erl b/test/gpb_tests.erl index 01343003..a7b48071 100644 --- a/test/gpb_tests.erl +++ b/test/gpb_tests.erl @@ -113,6 +113,17 @@ skipping_unknown_32bit_field_test() -> type=int32, occurrence=optional, opts=[]}]}]). +skipping_with_oneof_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}, + #?gpb_field{name=a2, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}], + #m1{a = undefined} = decode_msg(<<32,150,1>>, m1, Defs). + decode_msg_simple_occurrence_test() -> #m1{a = undefined} = decode_msg(<<>>, @@ -346,6 +357,18 @@ decode_of_field_fails_for_invalid_varints_test() -> opts=[packed]}]}])), ok. +decoding_oneof_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}, + #?gpb_field{name=a2, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}], + #m1{a = undefined} = decode_msg(<<>>, m1, Defs), + #m1{a = {a1, 150}} = decode_msg(<<8, 150, 1>>, m1, Defs), + #m1{a = {a2, 150}} = decode_msg(<<16, 150, 1>>, m1, Defs). %% ------------------------------------------------------------- @@ -463,6 +486,19 @@ encode_double_test() -> type=double, occurrence=required, opts=[]}]}]). +encode_oneof_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}, + #?gpb_field{name=a2, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}], + <<>> = encode_msg(#m1{a=undefined}, Defs), + <<8,150,1>> = encode_msg(#m1{a={a1,150}}, Defs), + <<16,150,1>> = encode_msg(#m1{a={a2,150}}, Defs). + %% ------------------------------------------------------------- merging_second_required_integer_overrides_first_test() -> @@ -532,6 +568,40 @@ merging_optional_messages_recursively2_test() -> type=uint32, occurrence=optional, opts=[]}]}]). +merge_oneof_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}, + #?gpb_field{name=a2, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}], + #m1{a = undefined} = merge_msgs(#m1{a=undefined}, #m1{a=undefined}, Defs), + #m1{a = {a1, 150}} = merge_msgs(#m1{a={a1, 150}}, #m1{a=undefined}, Defs), + #m1{a = {a1, 150}} = merge_msgs(#m1{a=undefined}, #m1{a={a1, 150}}, Defs), + #m1{a = {a1, 151}} = merge_msgs(#m1{a={a1, 150}}, #m1{a={a1, 151}}, Defs), + #m1{a = {a2, 150}} = merge_msgs(#m1{a={a1, 150}}, #m1{a={a2, 150}}, Defs). + +merge_oneof_msg_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=x, fnum=1, rnum=#m1.a, + type={msg,m2},occurrence=optional, + opts=[]}, + #?gpb_field{name=y, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}, + {{msg,m2}, [#?gpb_field{name=b, fnum=1, rnum=#m2.b, + type=fixed32, occurrence=repeated}]}], + %% Will get b=[1,2] since messages are merged + #m1{a = {x, #m2{b=[1,2]}}} = merge_msgs(#m1{a={x,#m2{b=[1]}}}, + #m1{a={x,#m2{b=[2]}}}, + Defs), + %% Different oneof fields ==> no merge + #m1{a = {y,150}} = merge_msgs(#m1{a={x,#m2{b=[1]}}}, + #m1{a={y,150}}, + Defs). %% ------------------------------------------------------------- @@ -764,6 +834,22 @@ verify_invalid_optional_submsg_fails_test() -> ?verify_gpb_err(verify_msg(#m4{x = 1, y = #m1{a=1}}, MsgDefs)), ok = verify_msg(#m4{x = #m1{a=1}, y = undefined}, MsgDefs). +verify_invalid_oneof_test() -> + Defs = [{{msg,m1}, [#gpb_oneof{ + name=a, rnum=#m1.a, + fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}, + #?gpb_field{name=a2, fnum=2, rnum=#m1.a, + type=int32, occurrence=optional, + opts=[]}]}]}], + ok = verify_msg(#m1{a=undefined}, Defs), + ok = verify_msg(#m1{a={a1,150}}, Defs), + ?verify_gpb_err(verify_msg(#m1{a=150}, Defs)), + ?verify_gpb_err(verify_msg(#m1{a={a1,150,3}}, Defs)), + ?verify_gpb_err(verify_msg(#m1{a={a3,150}}, Defs)), + ?verify_gpb_err(verify_msg(#m1{a={a1,false}}, Defs)). + verify_path_when_failure_test() -> MsgDefs = [{{msg,m1}, [#?gpb_field{name=a,fnum=1,rnum=#m1.a, type={msg,m2}, @@ -798,95 +884,3 @@ version_test() -> {ok, [{application, App, PL}]} = file:consult(AppFile), ?assertEqual(S, proplists:get_value(vsn, PL)), ok. - --ifndef(gpb_compile_common_tests). %% non-shared code below vvvvvvvvv - -skipping_with_oneof_test() -> - Defs = [{{msg,m1}, [#gpb_oneof{ - name=a, rnum=#m1.a, - fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}, - #?gpb_field{name=a2, fnum=2, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}]}]}], - #m1{a = undefined} = decode_msg(<<32,150,1>>, m1, Defs). - -encode_oneof_test() -> - Defs = [{{msg,m1}, [#gpb_oneof{ - name=a, rnum=#m1.a, - fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}, - #?gpb_field{name=a2, fnum=2, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}]}]}], - <<>> = encode_msg(#m1{a=undefined}, Defs), - <<8,150,1>> = encode_msg(#m1{a={a1,150}}, Defs), - <<16,150,1>> = encode_msg(#m1{a={a2,150}}, Defs). - -decoding_oneof_test() -> - Defs = [{{msg,m1}, [#gpb_oneof{ - name=a, rnum=#m1.a, - fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}, - #?gpb_field{name=a2, fnum=2, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}]}]}], - #m1{a = undefined} = decode_msg(<<>>, m1, Defs), - #m1{a = {a1, 150}} = decode_msg(<<8, 150, 1>>, m1, Defs), - #m1{a = {a2, 150}} = decode_msg(<<16, 150, 1>>, m1, Defs). - -merge_oneof_test() -> - Defs = [{{msg,m1}, [#gpb_oneof{ - name=a, rnum=#m1.a, - fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}, - #?gpb_field{name=a2, fnum=2, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}]}]}], - #m1{a = undefined} = merge_msgs(#m1{a=undefined}, #m1{a=undefined}, Defs), - #m1{a = {a1, 150}} = merge_msgs(#m1{a={a1, 150}}, #m1{a=undefined}, Defs), - #m1{a = {a1, 150}} = merge_msgs(#m1{a=undefined}, #m1{a={a1, 150}}, Defs), - #m1{a = {a1, 151}} = merge_msgs(#m1{a={a1, 150}}, #m1{a={a1, 151}}, Defs), - #m1{a = {a2, 150}} = merge_msgs(#m1{a={a1, 150}}, #m1{a={a2, 150}}, Defs). - -merge_oneof_msg_test() -> - Defs = [{{msg,m1}, [#gpb_oneof{ - name=a, rnum=#m1.a, - fields=[#?gpb_field{name=x, fnum=1, rnum=#m1.a, - type={msg,m2},occurrence=optional, - opts=[]}, - #?gpb_field{name=y, fnum=2, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}]}]}, - {{msg,m2}, [#?gpb_field{name=b, fnum=1, rnum=#m2.b, - type=fixed32, occurrence=repeated}]}], - %% Will get b=[1,2] since messages are merged - #m1{a = {x, #m2{b=[1,2]}}} = merge_msgs(#m1{a={x,#m2{b=[1]}}}, - #m1{a={x,#m2{b=[2]}}}, - Defs), - %% Different oneof fields ==> no merge - #m1{a = {y,150}} = merge_msgs(#m1{a={x,#m2{b=[1]}}}, - #m1{a={y,150}}, - Defs). - -verify_invalid_oneof_test() -> - Defs = [{{msg,m1}, [#gpb_oneof{ - name=a, rnum=#m1.a, - fields=[#?gpb_field{name=a1, fnum=1, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}, - #?gpb_field{name=a2, fnum=2, rnum=#m1.a, - type=int32, occurrence=optional, - opts=[]}]}]}], - ok = verify_msg(#m1{a=undefined}, Defs), - ok = verify_msg(#m1{a={a1,150}}, Defs), - ?verify_gpb_err(verify_msg(#m1{a=150}, Defs)), - ?verify_gpb_err(verify_msg(#m1{a={a1,150,3}}, Defs)), - ?verify_gpb_err(verify_msg(#m1{a={a3,150}}, Defs)), - ?verify_gpb_err(verify_msg(#m1{a={a1,false}}, Defs)). - --endif. From da8bcea38bc68430e132653d92a935b27dcecb8e Mon Sep 17 00:00:00 2001 From: Tomas Abrahamsson Date: Tue, 21 Oct 2014 01:06:50 +0200 Subject: [PATCH 5/8] Handle oneof in nifs --- src/gpb_compile.erl | 286 +++++++++++++++++++++++++++---------- test/gpb_compile_tests.erl | 126 ++++++++++++---- 2 files changed, 307 insertions(+), 105 deletions(-) diff --git a/src/gpb_compile.erl b/src/gpb_compile.erl index 7b6fed80..6dc06d42 100644 --- a/src/gpb_compile.erl +++ b/src/gpb_compile.erl @@ -3618,6 +3618,7 @@ possibly_format_nif_cc(Mod, Defs, AnRes, Opts) -> format_nif_cc(Mod, Defs, AnRes, Opts) -> iolist_to_binary( [format_nif_cc_includes(Mod, Defs, Opts), + format_nif_cc_oneof_version_check_if_present(Defs), format_nif_cc_local_function_decls(Mod, Defs, Opts), format_nif_cc_mk_atoms(Mod, Defs, AnRes, Opts), format_nif_cc_utf8_conversion(Mod, Defs, AnRes, Opts), @@ -3649,6 +3650,29 @@ format_nif_cc_includes(Mod, Defs, _Opts) -> ["#include \n" || IsLiteRT], "\n"]. +format_nif_cc_oneof_version_check_if_present(Defs) -> + case contains_oneof(Defs) of + true -> + ["#if GOOGLE_PROTOBUF_VERSION < 2006000\n" + "#error The proto definitions contain 'oneof' fields.\n" + "#error This feature appeared in protobuf 2.6.0, but\n" + "#error it appears your protobuf is older. Please\n" + "#error update protobuf.\n" + "#endif\n"]; + false -> + "" + end. + +contains_oneof([{{msg,_}, Fields} | Rest]) -> + case lists:any(fun(F) -> is_record(F, gpb_oneof) end, Fields) of + false -> contains_oneof(Rest); + true -> true + end; +contains_oneof([_ | Rest]) -> + contains_oneof(Rest); +contains_oneof([]) -> + false. + format_nif_cc_local_function_decls(_Mod, Defs, _Opts) -> CPkg = get_cc_pkg(Defs), [[begin @@ -3668,12 +3692,13 @@ format_nif_cc_mk_atoms(_Mod, Defs, AnRes, _Opts) -> EnumAtoms = lists:flatten([[Sym || {Sym, _V} <- EnumDef] || {{enum, _}, EnumDef} <- Defs]), RecordAtoms = [MsgName || {{msg, MsgName}, _Fields} <- Defs], + OneofNames = collect_oneof_fields(Defs), MiscAtoms0 = [undefined], MiscAtoms1 = case is_any_field_of_type_bool(AnRes) of true -> MiscAtoms0 ++ [true, false]; false -> MiscAtoms0 end, - Atoms = lists:usort(EnumAtoms ++ RecordAtoms ++ MiscAtoms1), + Atoms = lists:usort(EnumAtoms ++ RecordAtoms ++ OneofNames ++ MiscAtoms1), AtomVars = [{mk_c_var(gpb_aa_, A), A} || A <- Atoms], [[?f("static ERL_NIF_TERM ~s;\n", [Var]) || {Var,_Atom} <- AtomVars], @@ -3685,6 +3710,14 @@ format_nif_cc_mk_atoms(_Mod, Defs, AnRes, _Opts) -> "}\n", "\n"]]. +collect_oneof_fields(Defs) -> + lists:usort( + lists:flatten( + [[[FOFName + || #?gpb_field{name=FOFName} <- OFields] + || #gpb_oneof{fields=OFields} <- Fields] + || {{msg,_}, Fields} <- Defs])). + format_nif_cc_utf8_conversion(_Mod, _Defs, AnRes, Opts) -> case is_any_field_of_type_string(AnRes) of true -> format_nif_cc_utf8_conversion_code(Opts); @@ -4063,7 +4096,7 @@ format_nif_cc_packer(CPkg, MsgName, Fields, Defs, Opts) -> "}\n", "\n"]. -format_nif_cc_field_packer(SrcVar, MsgVar, Field, Defs, Opts) -> +format_nif_cc_field_packer(SrcVar, MsgVar, #?gpb_field{}=Field, Defs, Opts) -> #?gpb_field{occurrence=Occurrence}=Field, case Occurrence of required -> @@ -4075,7 +4108,42 @@ format_nif_cc_field_packer(SrcVar, MsgVar, Field, Defs, Opts) -> repeated -> format_nif_cc_field_packer_repeated(SrcVar, MsgVar, Field, Defs, Opts) - end. + end; +format_nif_cc_field_packer(SrcVar, MsgVar, #gpb_oneof{}=Field, Defs, Opts) -> + #gpb_oneof{fields=OFields} = Field, + [split_indent_iolist( + 4, + ?f("if (!enif_is_identical(~s, gpb_aa_undefined))~n" + "{~n" + " int oarity;~n" + " const ERL_NIF_TERM *oelem;~n" + " if (!enif_get_tuple(env, ~s, &oarity, &oelem) || oarity != 2)~n" + " return 0;~n" + "~n" + " ~s~n" + "}~n", + [SrcVar, SrcVar, + format_nif_cc_oneof_packer("oelem[0]", "oelem[1]", + MsgVar, OFields, Defs, Opts)])), + "\n"]. + +format_nif_cc_oneof_packer(NameVar, SrcVar, MsgVar, OFields, Defs, Opts) -> + split_indent_iolist( + 4, + [[begin + Else = if I == 1 -> ""; + I > 1 -> "else " + end, + AtomVar = mk_c_var(gpb_aa_, Name), + [?f("~sif (enif_is_identical(~s, ~s))~n", [Else, NameVar, AtomVar]), + split_indent_iolist( + 4, + format_nif_cc_field_packer_single(SrcVar, MsgVar, OField, + Defs, Opts, set))] + end + || {I, #?gpb_field{name=Name}=OField} <- index_seq(OFields)], + "else\n" + " return 0;\n"]). format_nif_cc_field_packer_optional(SrcVar, MsgVar, Field, Defs, Opts) -> [?f(" if (!enif_is_identical(~s, gpb_aa_undefined))\n", [SrcVar]), @@ -4302,7 +4370,7 @@ format_nif_cc_unpacker(CPkg, MsgName, Fields, Defs) -> "\n", [begin DestVar = ?f("elem~w",[I]), - format_nif_cc_field_unpacker(DestVar, "m", Field, Defs) + format_nif_cc_field_unpacker(DestVar, "m", MsgName, Field, Defs) end || {I, Field} <- index_seq(Fields)], "\n", @@ -4312,7 +4380,8 @@ format_nif_cc_unpacker(CPkg, MsgName, Fields, Defs) -> "}\n", "\n"]. -format_nif_cc_field_unpacker(DestVar, MsgVar, Field, Defs) -> +format_nif_cc_field_unpacker(DestVar, MsgVar, _MsgName, #?gpb_field{}=Field, + Defs) -> #?gpb_field{occurrence=Occurrence}=Field, case Occurrence of required -> @@ -4321,86 +4390,126 @@ format_nif_cc_field_unpacker(DestVar, MsgVar, Field, Defs) -> format_nif_cc_field_unpacker_single(DestVar, MsgVar, Field, Defs); repeated -> format_nif_cc_field_unpacker_repeated(DestVar, MsgVar, Field, Defs) - end. - + end; +format_nif_cc_field_unpacker(DestVar, MsgVar, MsgName, #gpb_oneof{}=Field, + Defs) -> + #gpb_oneof{name=OFName, fields=OFields} = Field, + CPkg = get_cc_pkg(Defs), + CMsgType = CPkg ++ "::" ++ dot_replace_s(MsgName, "::"), + LCOFName = to_lower(OFName), + UCOFName = to_upper(OFName), + [split_indent_iolist( + 4, + [?f("switch (~s->~s_case())\n", [MsgVar, LCOFName]), + ?f("{\n"), + [begin + CamelCaseFOFName = camel_case(FOFName), + AtomVar = mk_c_var(gpb_aa_, FOFName), + split_indent_iolist( + 4, + [?f("case ~s::k~s:\n", [CMsgType, CamelCaseFOFName]), + ?f(" {\n"), + ?f(" ERL_NIF_TERM ores;\n"), + split_indent_iolist( + 8, + format_nif_cc_field_unpacker_by_type("ores", MsgVar, + OField, Defs)), + ?f(" ~s = enif_make_tuple2(env, ~s, ores);\n", + [DestVar, AtomVar]), + ?f(" }\n"), + ?f(" break;\n\n")]) + end + || #?gpb_field{name=FOFName}=OField <- OFields], + split_indent_iolist( + 4, + [?f("case ~s::~s_NOT_SET: /* FALL THROUGH */~n", [CMsgType, UCOFName]), + ?f("default:~n"), + ?f(" ~s = gpb_aa_undefined;\n", [DestVar])]), + ?f("}\n")]), + "\n"]. format_nif_cc_field_unpacker_single(DestVar, MsgVar, Field, Defs) -> - #?gpb_field{name=FName, type=FType} = Field, + #?gpb_field{name=FName} = Field, LCFName = to_lower(FName), [?f(" if (!~s->has_~s())\n", [MsgVar, LCFName]), ?f(" ~s = gpb_aa_undefined;\n", [DestVar]), ?f(" else\n"), indent_lines( - 8, - case FType of - float -> - [?f("~s = enif_make_double(env, (double)~s->~s());\n", - [DestVar, MsgVar, LCFName])]; - double -> - [?f("~s = enif_make_double(env, ~s->~s());\n", - [DestVar, MsgVar, LCFName])]; - _S32 when FType == sint32; - FType == int32; - FType == sfixed32 -> - [?f("~s = enif_make_int(env, ~s->~s());\n", - [DestVar, MsgVar, LCFName])]; - _S64 when FType == sint64; - FType == int64; - FType == sfixed64 -> - [?f("~s = enif_make_int64(env, (ErlNifSInt64)~s->~s());\n", - [DestVar, MsgVar, LCFName])]; - _U32 when FType == uint32; - FType == fixed32 -> - [?f("~s = enif_make_uint(env, ~s->~s());\n", - [DestVar, MsgVar, LCFName])]; - _U64 when FType == uint64; - FType == fixed64 -> - [?f("~s = enif_make_uint64(env, (ErlNifUInt64)~s->~s());\n", - [DestVar, MsgVar, LCFName])]; - bool -> - [?f("if (~s->~s())\n", [MsgVar, LCFName]), - ?f(" ~s = gpb_aa_true;\n", [DestVar]), - ?f("else\n"), - ?f(" ~s = gpb_aa_false;\n", [DestVar])]; - {enum, EnumName} -> - {value, {{enum,EnumName}, Enumerations}} = - lists:keysearch({enum,EnumName}, 1, Defs), - [] ++ - [?f("switch (~s->~s()) {\n", [MsgVar, LCFName])] ++ - [?f(" case ~s: ~s = ~s; break;\n", - [Sym, DestVar, mk_c_var(gpb_aa_, Sym)]) - || {Sym, _Value} <- Enumerations] ++ - [?f(" default: ~s = gpb_aa_undefined;\n", [DestVar])] ++ - [?f("}\n")]; - string -> - [?f("{\n"), - ?f(" const char *sData = ~s->~s().data();\n", - [ MsgVar, LCFName]), - ?f(" unsigned int sSize = ~s->~s().size();\n", - [ MsgVar, LCFName]), - ?f(" ~s = utf8_to_erl_string(env, sData, sSize);\n", - [ DestVar]), - ?f("}\n")]; - bytes -> - [?f("{\n"), - ?f(" unsigned char *data;\n"), - ?f(" unsigned int bSize = ~s->~s().size();\n", - [ MsgVar, LCFName]), - ?f(" const char *bData = ~s->~s().data();\n", - [ MsgVar, LCFName]), - ?f(" data = enif_make_new_binary(\n"), %% can data be NULL?? - ?f(" env,\n"), - ?f(" bSize,\n"), - ?f(" &~s);\n", [DestVar]), - ?f(" memmove(data, bData, bSize);\n"), - ?f("}\n")]; - {msg, Msg2Name} -> - UnpackFnName = mk_c_fn(u_msg_, Msg2Name), - [?f("~s = ~s(env, &~s->~s());\n", - [DestVar, UnpackFnName, MsgVar, LCFName])] - end), + 8, format_nif_cc_field_unpacker_by_type(DestVar, MsgVar, Field, Defs)), "\n"]. +format_nif_cc_field_unpacker_by_type(DestVar, MsgVar, Field, Defs) -> + #?gpb_field{name=FName, type=FType} = Field, + LCFName = to_lower(FName), + case FType of + float -> + [?f("~s = enif_make_double(env, (double)~s->~s());\n", + [DestVar, MsgVar, LCFName])]; + double -> + [?f("~s = enif_make_double(env, ~s->~s());\n", + [DestVar, MsgVar, LCFName])]; + _S32 when FType == sint32; + FType == int32; + FType == sfixed32 -> + [?f("~s = enif_make_int(env, ~s->~s());\n", + [DestVar, MsgVar, LCFName])]; + _S64 when FType == sint64; + FType == int64; + FType == sfixed64 -> + [?f("~s = enif_make_int64(env, (ErlNifSInt64)~s->~s());\n", + [DestVar, MsgVar, LCFName])]; + _U32 when FType == uint32; + FType == fixed32 -> + [?f("~s = enif_make_uint(env, ~s->~s());\n", + [DestVar, MsgVar, LCFName])]; + _U64 when FType == uint64; + FType == fixed64 -> + [?f("~s = enif_make_uint64(env, (ErlNifUInt64)~s->~s());\n", + [DestVar, MsgVar, LCFName])]; + bool -> + [?f("if (~s->~s())\n", [MsgVar, LCFName]), + ?f(" ~s = gpb_aa_true;\n", [DestVar]), + ?f("else\n"), + ?f(" ~s = gpb_aa_false;\n", [DestVar])]; + {enum, EnumName} -> + {value, {{enum,EnumName}, Enumerations}} = + lists:keysearch({enum,EnumName}, 1, Defs), + [] ++ + [?f("switch (~s->~s()) {\n", [MsgVar, LCFName])] ++ + [?f(" case ~s: ~s = ~s; break;\n", + [Sym, DestVar, mk_c_var(gpb_aa_, Sym)]) + || {Sym, _Value} <- Enumerations] ++ + [?f(" default: ~s = gpb_aa_undefined;\n", [DestVar])] ++ + [?f("}\n")]; + string -> + [?f("{\n"), + ?f(" const char *sData = ~s->~s().data();\n", + [ MsgVar, LCFName]), + ?f(" unsigned int sSize = ~s->~s().size();\n", + [ MsgVar, LCFName]), + ?f(" ~s = utf8_to_erl_string(env, sData, sSize);\n", + [ DestVar]), + ?f("}\n")]; + bytes -> + [?f("{\n"), + ?f(" unsigned char *data;\n"), + ?f(" unsigned int bSize = ~s->~s().size();\n", + [ MsgVar, LCFName]), + ?f(" const char *bData = ~s->~s().data();\n", + [ MsgVar, LCFName]), + ?f(" data = enif_make_new_binary(\n"), %% can data be NULL?? + ?f(" env,\n"), + ?f(" bSize,\n"), + ?f(" &~s);\n", [DestVar]), + ?f(" memmove(data, bData, bSize);\n"), + ?f("}\n")]; + {msg, Msg2Name} -> + UnpackFnName = mk_c_fn(u_msg_, Msg2Name), + [?f("~s = ~s(env, &~s->~s());\n", + [DestVar, UnpackFnName, MsgVar, LCFName])] + end. + + format_nif_cc_field_unpacker_repeated(DestVar, MsgVar, Field, Defs) -> #?gpb_field{name=FName, type=FType} = Field, LCFName = to_lower(FName), @@ -4521,6 +4630,31 @@ replace_tilde_s(<<>>, _ModBin, _VsnBin) -> to_lower(A) when is_atom(A) -> list_to_atom(string:to_lower(atom_to_list(A))). +to_upper(A) when is_atom(A) -> + list_to_atom(string:to_upper(atom_to_list(A))). + +camel_case(A) when is_atom(A) -> + list_to_atom(camel_case(atom_to_list(A), true)). + +-define(is_lower_case(C), $a =< C, C =< $z). +-define(is_upper_case(C), $A =< C, C =< $Z). +-define(is_digit(C), $0 =< C, C =< $9). +camel_case([LC | Tl], CapNextLetter) when ?is_lower_case(LC) -> + if CapNextLetter -> [capitalize_letter(LC) | camel_case(Tl, false)]; + not CapNextLetter -> [LC | camel_case(Tl, false)] + end; +camel_case([UC | Tl], _) when ?is_upper_case(UC) -> + [UC | camel_case(Tl, false)]; +camel_case([D | Tl], _) when ?is_digit(D) -> + [D | camel_case(Tl, true)]; +camel_case([_ | Tl], _) -> %% underscore and possibly more + camel_case(Tl, true); +camel_case([], _) -> + []. + +capitalize_letter(C) -> + C + ($A - $a). + %% -- compile to memory ----------------------------------------------------- compile_to_binary(Mod, MsgDefs, ErlCode, PossibleNifCode, Opts) -> diff --git a/test/gpb_compile_tests.erl b/test/gpb_compile_tests.erl index 2a8d1d04..690325f2 100644 --- a/test/gpb_compile_tests.erl +++ b/test/gpb_compile_tests.erl @@ -1065,6 +1065,7 @@ nif_compiles() -> end). nif_encode_decode() -> + ProtocCanOneof = check_protoc_can_do_oneof(), with_tmpdir( fun(TmpDir) -> NEDM = gpb_nif_test_ed1, @@ -1075,6 +1076,7 @@ nif_encode_decode() -> fun() -> nif_encode_decode_test_it(NEDM, Defs), nif_encode_decode_strings(NEDM, Defs), + [nif_encode_decode_oneof(NEDM, Defs) || ProtocCanOneof], ok end) end). @@ -1128,6 +1130,27 @@ nif_encode_decode_strings(NEDM, Defs) -> end, CodePoints). +nif_encode_decode_oneof(NEDM, Defs) -> + [#gpb_oneof{fields=OFields}] = [O || {{msg, oneof1}, [O]} <- Defs], + Alts = [{Name, mk_field_value(OF, Defs, small)} + || #?gpb_field{name=Name}=OF <- OFields] ++ [undefined], + lists:foreach(fun(Alt) -> + OrigMsg = {oneof1, Alt}, + %% to avoid errors in nif encode/decode + %% cancelling out each other and nif bugs go + %% undetected, cross-check with gpb:encode/decode_msg + MEncoded = NEDM:encode_msg(OrigMsg), + GEncoded = gpb:encode_msg(OrigMsg, Defs), + MMDecoded = NEDM:decode_msg(MEncoded, oneof1), + GMDecoded = gpb:decode_msg(MEncoded, oneof1, Defs), + MGDecoded = NEDM:decode_msg(GEncoded, oneof1), + ?assertEqual(OrigMsg, MMDecoded), + ?assertEqual(OrigMsg, GMDecoded), + ?assertEqual(OrigMsg, MGDecoded) + end, + Alts). + + compile_msg_defs(M, MsgDefs, TmpDir) -> [NifCcPath, PbCcPath, NifOPath, PbOPath, NifSoPath, ProtoPath] = [filename:join(TmpDir, lists:concat([M, Ext])) @@ -1239,6 +1262,36 @@ find_protoc() -> Protoc -> Protoc end. +check_protoc_can_do_oneof() -> + case get('$cached_check_protoc_can_do_oneof') of + undefined -> + CanIt = + case find_protoc_version() of + {ok, Vsn} -> + Vsn >= [2,6]; %% oneof appeared in 2.6.0 + {error, X} -> + ?debugFmt("Trouble finding protoc version in ~s~n", [X]), + false + end, + put('$cached_check_protoc_can_do_oneof', CanIt), + CanIt; + CanIt -> + CanIt + end. + +find_protoc_version() -> + Output = os:cmd(find_protoc() ++ " --version"), + find_protoc_version_aux(string:tokens(Output, " \t\r\n"), Output). + +find_protoc_version_aux(["libprotoc", VersionStr | _], All) -> + try {ok, [list_to_integer(X) || X <- string:tokens(VersionStr, ".")]} + catch error:badarg -> {error, {failed_to_interpret, VersionStr, All}} + end; +find_protoc_version_aux([_ | Rest], All) -> + find_protoc_version_aux(Rest, All); +find_protoc_version_aux([], All) -> + {error, {no_version_string_found, All}}. + get_cflags() -> Root = code:root_dir(), %% e.g. /usr/lib/erlang CIncDir = filename:join([Root, "usr", "include"]), @@ -1271,13 +1324,18 @@ format_enumerator({N,V}) -> format_field(#?gpb_field{name=FName, fnum=FNum, type=Type, occurrence=Occurrence}) -> - TypeStr = case Type of - {msg,Name2} -> Name2; - {enum,Name2} -> Name2; - Type -> Type - end, - f(" ~s ~s ~s = ~w;~n", [Occurrence, TypeStr, FName, FNum]). - + f(" ~s ~s ~s = ~w;~n", [Occurrence, format_type(Type), FName, FNum]); +format_field(#gpb_oneof{name=FName, fields=Fields}) -> + f(" oneof ~s {~n" + "~s" + " };~n", + [FName, + [f(" ~s ~s = ~w;~n", [format_type(Type), OFName, FNum]) + || #?gpb_field{name=OFName, fnum=FNum, type=Type} <- Fields]]). + +format_type({msg,Name}) -> Name; +format_type({enum,Name}) -> Name; +format_type(Type) -> Type. ccompile(F, A) -> Cmd = f(F, A), @@ -1295,38 +1353,44 @@ ccompile(F, A) -> end. mk_one_msg_field_of_each_type() -> + EachType = [sint32, sint64, int32, int64, uint32, + uint64, bool, fixed64, sfixed64, + double, string, bytes, fixed32, sfixed32, + float, {enum, ee}, {msg, submsg1}], EnumDef = {{enum, ee}, [{en1, 1}, {en2, 2}]}, SubMsgDef = {{msg, submsg1}, mk_fields_of_type([uint32], required)}, - TopMsgDef1 = {{msg, topmsg1}, mk_fields_of_type( - [sint32, sint64, int32, int64, uint32, - uint64, bool, fixed64, sfixed64, - double, string, bytes, fixed32, sfixed32, - float, {enum, ee}, {msg, submsg1}], - required)}, - TopMsgDef2 = {{msg, topmsg2}, mk_fields_of_type( - [sint32, sint64, int32, int64, uint32, - uint64, bool, fixed64, sfixed64, - double, string, bytes, fixed32, sfixed32, - float, {enum, ee}, {msg, submsg1}], - repeated)}, - TopMsgDef3 = {{msg, topmsg3}, mk_fields_of_type( - [sint32, sint64, int32, int64, uint32, - uint64, bool, fixed64, sfixed64, - double, string, bytes, fixed32, sfixed32, - float, {enum, ee}, {msg, submsg1}], - optional)}, + TopMsgDef1 = {{msg, topmsg1}, mk_fields_of_type(EachType, required)}, + TopMsgDef2 = {{msg, topmsg2}, mk_fields_of_type(EachType, repeated)}, + TopMsgDef3 = {{msg, topmsg3}, mk_fields_of_type(EachType, optional)}, + OneofMsg1 = {{msg, oneof1}, mk_oneof_fields_of_type(EachType, 1)}, StringMsg = {{msg,strmsg}, mk_fields_of_type([string], required)}, - [EnumDef, SubMsgDef, TopMsgDef1, TopMsgDef2, TopMsgDef3, StringMsg]. + [EnumDef, SubMsgDef, TopMsgDef1, TopMsgDef2, TopMsgDef3, StringMsg] ++ + [OneofMsg1 || check_protoc_can_do_oneof()]. mk_fields_of_type(Types, Occurrence) -> Types1 = [Type || Type <- Types, can_do_nif_type(Type)], - [#?gpb_field{name=list_to_atom(lists:concat([f,integer_to_list(I)])), + [#?gpb_field{name=list_to_atom(lists:concat([f,I])), rnum=I + 1, fnum=I, type=Type, occurrence=Occurrence, opts=[]} - || {I, Type} <- lists:zip(lists:seq(1,length(Types1)), Types1)]. + || {I, Type} <- index_seq(Types1)]. + +mk_oneof_fields_of_type(Types, Pos) -> + Types1 = [Type || Type <- Types, can_do_nif_type(Type)], + [#gpb_oneof{ + name = o, + rnum = Pos+1, + fields = [#?gpb_field{name=list_to_atom(lists:concat([f,I])), + rnum=Pos+1, + fnum=I, + type=Type, + occurrence=optional, + opts=[]} + || {I, Type} <- index_seq(Types1)]}]. + +index_seq(L) -> lists:zip(lists:seq(1, length(L)), L). can_do_nif_type(Type) -> if Type == int64; @@ -1364,7 +1428,11 @@ mk_msg(MsgName, Defs, Variant) -> R0 = erlang:make_tuple(length(Fields) + 1, undefined, [{1, MsgName}]), lists:foldl(fun(#?gpb_field{rnum=RNum}=Field, R) -> Value = mk_field_value(Field, Defs, Variant), - setelement(RNum, R, Value) + setelement(RNum, R, Value); + (#gpb_oneof{rnum=RNum, fields=[OField1 | _]}, R) -> + #?gpb_field{name=Name} = OField1, + Value = mk_field_value(OField1, Defs, Variant), + setelement(RNum, R, {Name, Value}) end, R0, Fields). From 1c033cd0642657209fd53457ff9b309d4d0ab5b7 Mon Sep 17 00:00:00 2001 From: Tomas Abrahamsson Date: Tue, 21 Oct 2014 01:43:58 +0200 Subject: [PATCH 6/8] Import a new gpb_descriptor.proto from protobuf 2.6.1 --- descr_src/gpb_descriptor.proto | 190 +++++++++++++++++++++++++++++---- 1 file changed, 172 insertions(+), 18 deletions(-) diff --git a/descr_src/gpb_descriptor.proto b/descr_src/gpb_descriptor.proto index 8be89e64..95ab65f4 100644 --- a/descr_src/gpb_descriptor.proto +++ b/descr_src/gpb_descriptor.proto @@ -1,10 +1,10 @@ -// This file is imported from protobuf r393 (after 2.4.1, before 2.4.2) +// This file is imported from protobuf 2.6.1 (20e0a61) // and renamed from descriptor.proto to gpb_descriptor.proto // to avoid collisions. // Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. -// http://code.google.com/p/protobuf/ +// https://developers.google.com/protocol-buffers/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -63,6 +63,11 @@ message FileDescriptorProto { // Names of files imported by this file. repeated string dependency = 3; + // Indexes of the public imported files in the dependency list above. + repeated int32 public_dependency = 10; + // Indexes of the weak imported files in the dependency list. + // For Google-internal migration only. Do not use. + repeated int32 weak_dependency = 11; // All top-level definitions in this file. repeated DescriptorProto message_type = 4; @@ -95,6 +100,8 @@ message DescriptorProto { } repeated ExtensionRange extension_range = 5; + repeated OneofDescriptorProto oneof_decl = 8; + optional MessageOptions options = 7; } @@ -105,13 +112,13 @@ message FieldDescriptorProto { // Order is weird for historical reasons. TYPE_DOUBLE = 1; TYPE_FLOAT = 2; - TYPE_INT64 = 3; // Not ZigZag encoded. Negative numbers - // take 10 bytes. Use TYPE_SINT64 if negative - // values are likely. + // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT64 if + // negative values are likely. + TYPE_INT64 = 3; TYPE_UINT64 = 4; - TYPE_INT32 = 5; // Not ZigZag encoded. Negative numbers - // take 10 bytes. Use TYPE_SINT32 if negative - // values are likely. + // Not ZigZag encoded. Negative numbers take 10 bytes. Use TYPE_SINT32 if + // negative values are likely. + TYPE_INT32 = 5; TYPE_FIXED64 = 6; TYPE_FIXED32 = 7; TYPE_BOOL = 8; @@ -142,7 +149,7 @@ message FieldDescriptorProto { optional Label label = 4; // If type_name is set, this need not be set. If both this and type_name - // are set, this must be either TYPE_ENUM or TYPE_MESSAGE. + // are set, this must be one of TYPE_ENUM, TYPE_MESSAGE or TYPE_GROUP. optional Type type = 5; // For message and enum types, this is the name of the type. If the name @@ -163,9 +170,20 @@ message FieldDescriptorProto { // TODO(kenton): Base-64 encode? optional string default_value = 7; + // If set, gives the index of a oneof in the containing type's oneof_decl + // list. This field is a member of that oneof. Extensions of a oneof should + // not set this since the oneof to which they belong will be inferred based + // on the extension range containing the extension's field number. + optional int32 oneof_index = 9; + optional FieldOptions options = 8; } +// Describes a oneof. +message OneofDescriptorProto { + optional string name = 1; +} + // Describes an enum type. message EnumDescriptorProto { optional string name = 1; @@ -203,6 +221,7 @@ message MethodDescriptorProto { optional MethodOptions options = 4; } + // =================================================================== // Options @@ -224,10 +243,15 @@ message MethodDescriptorProto { // through 99999. It is up to you to ensure that you do not use the // same number for multiple options. // * For options which will be published and used publicly by multiple -// independent entities, e-mail kenton@google.com to reserve extension -// numbers. Simply tell me how many you need and I'll send you back a -// set of numbers to use -- there's no need to explain how you intend to -// use them. If this turns out to be popular, a web service will be set up +// independent entities, e-mail protobuf-global-extension-registry@google.com +// to reserve extension numbers. Simply provide your project name (e.g. +// Object-C plugin) and your porject website (if available) -- there's no need +// to explain how you intend to use them. Usually you only need one extension +// number. You can declare multiple options with only one extension number by +// putting them in a sub-message. See the Custom Options section of the docs +// for examples: +// https://developers.google.com/protocol-buffers/docs/proto#options +// If this turns out to be popular, a web service will be set up // to automatically assign option numbers. @@ -256,11 +280,26 @@ message FileOptions { optional bool java_multiple_files = 10 [default=false]; // If set true, then the Java code generator will generate equals() and - // hashCode() methods for all messages defined in the .proto file. This is - // purely a speed optimization, as the AbstractMessage base class includes - // reflection-based implementations of these methods. + // hashCode() methods for all messages defined in the .proto file. + // - In the full runtime, this is purely a speed optimization, as the + // AbstractMessage base class includes reflection-based implementations of + // these methods. + //- In the lite runtime, setting this option changes the semantics of + // equals() and hashCode() to more closely match those of the full runtime; + // the generated methods compute their results based on field values rather + // than object identity. (Implementations should not assume that hashcodes + // will be consistent across runtimes or versions of the protocol compiler.) optional bool java_generate_equals_and_hash = 20 [default=false]; + // If set true, then the Java2 code generator will generate code that + // throws an exception whenever an attempt is made to assign a non-UTF-8 + // byte sequence to a string field. + // Message reflection will do the same. + // However, an extension field still accepts non-UTF-8 byte sequences. + // This option has no effect on when used with the lite runtime. + optional bool java_string_check_utf8 = 27 [default=false]; + + // Generated classes can be optimized for speed or code size. enum OptimizeMode { SPEED = 1; // Generate complete code for parsing, serialization, @@ -270,6 +309,9 @@ message FileOptions { } optional OptimizeMode optimize_for = 9 [default=SPEED]; + // Sets the Go package where structs generated from this .proto will be + // placed. There is no default. + optional string go_package = 11; @@ -287,6 +329,13 @@ message FileOptions { optional bool java_generic_services = 17 [default=false]; optional bool py_generic_services = 18 [default=false]; + // Is this file deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for everything in the file, or it will be completely ignored; in the very + // least, this is a formalization for deprecating files. + optional bool deprecated = 23 [default=false]; + + // The parser stores options it doesn't recognize here. See above. repeated UninterpretedOption uninterpreted_option = 999; @@ -320,6 +369,12 @@ message MessageOptions { // from proto1 easier; new code should avoid fields named "descriptor". optional bool no_standard_descriptor_accessor = 2 [default=false]; + // Is this message deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the message, or it will be completely ignored; in the very least, + // this is a formalization for deprecating messages. + optional bool deprecated = 3 [default=false]; + // The parser stores options it doesn't recognize here. See above. repeated UninterpretedOption uninterpreted_option = 999; @@ -348,6 +403,37 @@ message FieldOptions { optional bool packed = 2; + + // Should this field be parsed lazily? Lazy applies only to message-type + // fields. It means that when the outer message is initially parsed, the + // inner message's contents will not be parsed but instead stored in encoded + // form. The inner message will actually be parsed when it is first accessed. + // + // This is only a hint. Implementations are free to choose whether to use + // eager or lazy parsing regardless of the value of this option. However, + // setting this option true suggests that the protocol author believes that + // using lazy parsing on this field is worth the additional bookkeeping + // overhead typically needed to implement it. + // + // This option does not affect the public interface of any generated code; + // all method signatures remain the same. Furthermore, thread-safety of the + // interface is not affected by this option; const methods remain safe to + // call from multiple threads concurrently, while non-const methods continue + // to require exclusive access. + // + // + // Note that implementations may choose not to check required fields within + // a lazy sub-message. That is, calling IsInitialized() on the outher message + // may return true even if the inner message has missing required fields. + // This is necessary because otherwise the inner message would have to be + // parsed in order to perform the check, defeating the purpose of lazy + // parsing. An implementation which chooses not to check required fields + // must be consistent about it. That is, for any particular sub-message, the + // implementation must either *always* check its required fields, or *never* + // check its required fields, regardless of whether or not the message has + // been parsed. + optional bool lazy = 5 [default=false]; + // Is this field deprecated? // Depending on the target platform, this can emit Deprecated annotations // for accessors, or it will be completely ignored; in the very least, this @@ -368,6 +454,11 @@ message FieldOptions { // TODO: Fully-implement this, then remove the "experimental_" prefix. optional string experimental_map_key = 9; + // For Google-internal migration only. Do not use. + optional bool weak = 10 [default=false]; + + + // The parser stores options it doesn't recognize here. See above. repeated UninterpretedOption uninterpreted_option = 999; @@ -377,6 +468,16 @@ message FieldOptions { message EnumOptions { + // Set this option to true to allow mapping different tag names to the same + // value. + optional bool allow_alias = 2; + + // Is this enum deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the enum, or it will be completely ignored; in the very least, this + // is a formalization for deprecating enums. + optional bool deprecated = 3 [default=false]; + // The parser stores options it doesn't recognize here. See above. repeated UninterpretedOption uninterpreted_option = 999; @@ -385,6 +486,12 @@ message EnumOptions { } message EnumValueOptions { + // Is this enum value deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the enum value, or it will be completely ignored; in the very least, + // this is a formalization for deprecating enum values. + optional bool deprecated = 1 [default=false]; + // The parser stores options it doesn't recognize here. See above. repeated UninterpretedOption uninterpreted_option = 999; @@ -399,6 +506,12 @@ message ServiceOptions { // we were already using them long before we decided to release Protocol // Buffers. + // Is this service deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the service, or it will be completely ignored; in the very least, + // this is a formalization for deprecating services. + optional bool deprecated = 33 [default=false]; + // The parser stores options it doesn't recognize here. See above. repeated UninterpretedOption uninterpreted_option = 999; @@ -413,6 +526,12 @@ message MethodOptions { // we were already using them long before we decided to release Protocol // Buffers. + // Is this method deprecated? + // Depending on the target platform, this can emit Deprecated annotations + // for the method, or it will be completely ignored; in the very least, + // this is a formalization for deprecating methods. + optional bool deprecated = 33 [default=false]; + // The parser stores options it doesn't recognize here. See above. repeated UninterpretedOption uninterpreted_option = 999; @@ -420,6 +539,7 @@ message MethodOptions { extensions 1000 to max; } + // A message representing a option the parser does not recognize. This only // appears in options protos created by the compiler::Parser class. // DescriptorPool resolves these when building Descriptor objects. Therefore, @@ -531,7 +651,41 @@ message SourceCodeInfo { // 1 to each before displaying to a user. repeated int32 span = 2 [packed=true]; - // TODO(kenton): Record comments appearing before and after the - // declaration. + // If this SourceCodeInfo represents a complete declaration, these are any + // comments appearing before and after the declaration which appear to be + // attached to the declaration. + // + // A series of line comments appearing on consecutive lines, with no other + // tokens appearing on those lines, will be treated as a single comment. + // + // Only the comment content is provided; comment markers (e.g. //) are + // stripped out. For block comments, leading whitespace and an asterisk + // will be stripped from the beginning of each line other than the first. + // Newlines are included in the output. + // + // Examples: + // + // optional int32 foo = 1; // Comment attached to foo. + // // Comment attached to bar. + // optional int32 bar = 2; + // + // optional string baz = 3; + // // Comment attached to baz. + // // Another line attached to baz. + // + // // Comment attached to qux. + // // + // // Another line attached to qux. + // optional double qux = 4; + // + // optional string corge = 5; + // /* Block comment attached + // * to corge. Leading asterisks + // * will be removed. */ + // /* Block comment attached to + // * grault. */ + // optional int32 grault = 6; + optional string leading_comments = 3; + optional string trailing_comments = 4; } } From d9d41fb294b8e7f7e701dc274e6217f8dfb08633 Mon Sep 17 00:00:00 2001 From: Tomas Abrahamsson Date: Tue, 21 Oct 2014 01:44:37 +0200 Subject: [PATCH 7/8] Handle oneof in the descriptor Handle oneof in the self-describing descriptor format. --- descr_src/gpb_compile_descr.erl | 46 ++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/descr_src/gpb_compile_descr.erl b/descr_src/gpb_compile_descr.erl index f38f8483..e09f55a0 100644 --- a/descr_src/gpb_compile_descr.erl +++ b/descr_src/gpb_compile_descr.erl @@ -52,6 +52,10 @@ defs_to_descr_2(Name, Defs) -> source_code_info = undefined %% #'SourceCodeInfo'{} | undefined }. +get_all_oneofs(Defs) -> + lists:flatten([[Name || #gpb_oneof{name=Name} <- Fields] + || {{msg,_}, Fields} <- Defs]). + defs_to_package(Defs) -> %% There can be at most 1 package definition %% The parser will reject any multiple package definitions @@ -61,6 +65,7 @@ defs_to_package(Defs) -> end. defs_to_msgtype(Defs) -> + AllOneofs = get_all_oneofs(Defs), %% There is a slight bit of mismatch here: the DescriptorProto %% contains fields for `nested_type' and `enum_type', and defines %% a name resolution scheme, but the gpb parser already un-nests, @@ -71,16 +76,26 @@ defs_to_msgtype(Defs) -> %% save also the unprocessed parse results. [#'DescriptorProto'{ name = atom_to_ustring(MsgName), - field = field_defs_to_mgstype_fields(Fields), + field = field_defs_to_mgstype_fields(Fields, AllOneofs), extension = [], nested_type = [], enum_type = [], extension_range = [], - options = undefined + options = undefined, + oneof_decl = oneof_decl(AllOneofs) } || {{msg,MsgName}, Fields} <- Defs]. -field_defs_to_mgstype_fields(Fields) -> +field_defs_to_mgstype_fields(Fields, AllOneofs) -> + lists:append([field_def_to_msgtype_field(Field, AllOneofs) + || Field <- Fields]). + +field_def_to_msgtype_field(#?gpb_field{name=FName, + fnum=FNum, + type=Type, + occurrence=Occurrence, + opts=Opts}=Field, + _AllOneofs) -> [#'FieldDescriptorProto'{ name = atom_to_ustring(FName), number = FNum, @@ -88,12 +103,22 @@ field_defs_to_mgstype_fields(Fields) -> type = type_to_descr_type(Type), type_name = type_to_descr_type_name(Type), default_value = field_default_value(Field), - options = field_options(Opts)} - || #?gpb_field{name=FName, - fnum=FNum, - type=Type, - occurrence=Occurrence, - opts=Opts}=Field <- Fields]. + options = field_options(Opts)}]; +field_def_to_msgtype_field(#gpb_oneof{name=FName, + fields=OFields}, + AllOneofs) -> + OneofIndex = find_oneof_index(FName, AllOneofs), + [begin + [F] = field_def_to_msgtype_field(OField, AllOneofs), + F#'FieldDescriptorProto'{oneof_index = OneofIndex} + end + || OField <- OFields]. + +find_oneof_index(Name, Names) -> + find_pos(Name, Names, 0). + +find_pos(Name, [Name | _], Pos) -> Pos; +find_pos(Name, [_ | Rest], Pos) -> find_pos(Name, Rest, Pos+1). occurrence_def_to_descr_label(optional) -> 'LABEL_OPTIONAL'; occurrence_def_to_descr_label(required) -> 'LABEL_REQUIRED'; @@ -166,6 +191,9 @@ defs_to_enumtype(Defs) -> || {EName, EValue} <- Enumerators]} || {{enum,EnumName}, Enumerators} <- Defs]. +oneof_decl(AllOneofs) -> + [#'OneofDescriptorProto'{name=atom_to_ustring(Name)} || Name <- AllOneofs]. + atom_to_ustring(A) -> Utf8Str = atom_to_list(A), unicode:characters_to_list(list_to_binary(Utf8Str), utf8). From 48f866ce7a77c9e752439cf06bbde8408a98c28a Mon Sep 17 00:00:00 2001 From: Tomas Abrahamsson Date: Wed, 22 Oct 2014 01:18:37 +0200 Subject: [PATCH 8/8] Specify in README.md that oneof is supported --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d29e8926..b7b1fc01 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ Features of gpb - the 'packed' and 'default' options - generating metadata information - package namespacing (optional) + - oneof gpb reads but ignores or throws away: - options other than 'packed' or 'default'