From 02c17291e0711c5c32993d79aa9e5b2e201f3234 Mon Sep 17 00:00:00 2001 From: Judah Jacobson Date: Mon, 14 Aug 2017 15:35:02 -0700 Subject: [PATCH] Fix #28: preserve unknown proto3 enum values. TODO: consider making the default behavior just default, and add a `*'lax` combinator to access the underlying value. --- README.md | 4 +- .../src/Data/ProtoLens/Compiler/Generate.hs | 30 ++--- proto-lens-tests/tests/proto3.proto | 9 ++ proto-lens-tests/tests/proto3_test.hs | 13 ++- proto-lens/src/Data/ProtoLens/Message.hs | 107 ++++++++++++++++++ 5 files changed, 146 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 358999f3..2ec55023 100644 --- a/README.md +++ b/README.md @@ -92,12 +92,10 @@ will generate the haskell files `Proto/Project/{Foo,Bar}.hs`. libraries. - Unknown fields of proto2 messages are discarded during decoding. (This is the correct behavior for proto3.) -- Unknown enum values cause a decoding error, instead of being preserved +- Unknown proto2 enum values cause a decoding error, instead of being preserved round-trip. - Files with `import public` statements compile correctly, but don't explicitly reexport the definitions from those imports. -- Enum aliases (`option allow_alias = true`) are not supported; each enum value - must map to a distinct integer constant. # Troubleshooting diff --git a/proto-lens-protoc/src/Data/ProtoLens/Compiler/Generate.hs b/proto-lens-protoc/src/Data/ProtoLens/Compiler/Generate.hs index c34f9830..266c6a2e 100644 --- a/proto-lens-protoc/src/Data/ProtoLens/Compiler/Generate.hs +++ b/proto-lens-protoc/src/Data/ProtoLens/Compiler/Generate.hs @@ -126,7 +126,7 @@ generateModule modName imports syntaxType modifyImport definitions importedEnv allMessageFields :: SyntaxType -> Env QName -> MessageInfo Name -> [RecordField] allMessageFields syntaxType env info = map (plainRecordField syntaxType env) (messageFields info) - ++ map (oneofRecordField env) (messageOneofFields info) + ++ map (oneofRecordField syntaxType env) (messageOneofFields info) importSimple :: ModuleName -> ImportDecl () importSimple m = ImportDecl @@ -155,7 +155,7 @@ generateMessageDecls syntaxType env protoName info = -- foo :: Baz -- } [ dataDecl dataName - [recDecl dataName $ + [recDecl dataName [ (recordFieldName f, recordFieldType f) | f <- allFields ] @@ -173,7 +173,7 @@ generateMessageDecls syntaxType env protoName info = -- haskell: data Foo'Bar = Foo'Bar'c !Prelude.Float -- | Foo'Bar's !Sub [ dataDecl (oneofTypeName oneofInfo) - [ conDecl consName [hsFieldType env $ fieldDescriptor f] + [ conDecl consName [hsFieldType syntaxType env $ fieldDescriptor f] | c <- oneofCases oneofInfo , let f = caseField c , let consName = caseConstructorName c @@ -455,8 +455,8 @@ plainRecordField syntaxType env f = case fd ^. label of -- data Foo = Foo { _Foo_bar :: Map Bar Baz } -- type instance Field "foo" Foo = Map Bar Baz | Just (k,v) <- getMapFields env fd -> let - mapType = "Data.Map.Map" @@ hsFieldType env (fieldDescriptor k) - @@ hsFieldType env (fieldDescriptor v) + mapType = "Data.Map.Map" @@ hsType (fieldDescriptor k) + @@ hsType (fieldDescriptor v) in recordField mapType [LensInstance { lensSymbol = baseName @@ -473,9 +473,10 @@ plainRecordField syntaxType env f = case fd ^. label of }] where recordField = RecordField (haskellRecordFieldName $ plainFieldName f) + hsType = hsFieldType syntaxType env + baseType = hsType fd baseName = overloadedName $ plainFieldName f fd = fieldDescriptor f - baseType = hsFieldType env fd maybeType = "Prelude.Maybe" @@ baseType listType = tyList baseType rawAccessor = "Prelude.id" @@ -483,8 +484,8 @@ plainRecordField syntaxType env f = case fd ^. label of @@ hsFieldValueDefault env fd -oneofRecordField :: Env QName -> OneofInfo -> RecordField -oneofRecordField env oneofInfo +oneofRecordField :: SyntaxType -> Env QName -> OneofInfo -> RecordField +oneofRecordField syntax env oneofInfo = RecordField { recordFieldName = haskellRecordFieldName $ oneofFieldName oneofInfo , recordFieldType = @@ -528,7 +529,7 @@ oneofRecordField env oneofInfo | c <- oneofCases oneofInfo , let f = caseField c , let baseName = overloadedName $ plainFieldName f - , let baseType = hsFieldType env $ fieldDescriptor f + , let baseType = hsFieldType syntax env $ fieldDescriptor f , let maybeName = "maybe'" <> baseName ] @@ -542,8 +543,8 @@ getMapFields env f , [f1, f2] <- messageFields m = Just (f1, f2) | otherwise = Nothing -hsFieldType :: Env QName -> FieldDescriptorProto -> Type -hsFieldType env fd = case fd ^. type' of +hsFieldType :: SyntaxType -> Env QName -> FieldDescriptorProto -> Type +hsFieldType syntaxType env fd = case fd ^. type' of FieldDescriptorProto'TYPE_DOUBLE -> "Prelude.Double" FieldDescriptorProto'TYPE_FLOAT -> "Prelude.Float" FieldDescriptorProto'TYPE_INT64 -> "Data.Int.Int64" @@ -564,7 +565,10 @@ hsFieldType env fd = case fd ^. type' of FieldDescriptorProto'TYPE_BYTES -> "Data.ByteString.ByteString" FieldDescriptorProto'TYPE_UINT32 -> "Data.Word.Word32" FieldDescriptorProto'TYPE_ENUM - | Enum e <- definedFieldType fd env -> tyCon $ enumName e + | Enum e <- definedFieldType fd env -> + if syntaxType == Proto2 + then tyCon $ enumName e + else "Data.ProtoLens.Lax" @@ tyCon (enumName e) | otherwise -> error $ "expected TYPE_ENUM for type name" ++ unpack (fd ^. typeName) FieldDescriptorProto'TYPE_SFIXED32 -> "Data.Int.Int32" @@ -741,7 +745,7 @@ fieldDescriptorExpr syntaxType env n f = @@ (fieldTypeDescriptorExpr (fd ^. type') @::@ ("Data.ProtoLens.FieldTypeDescriptor" - @@ hsFieldType env fd)) + @@ hsFieldType syntaxType env fd)) @@ fieldAccessorExpr syntaxType env f) -- TODO: why is this type sig needed? @::@ diff --git a/proto-lens-tests/tests/proto3.proto b/proto-lens-tests/tests/proto3.proto index f8a764b6..9d4acaf7 100644 --- a/proto-lens-tests/tests/proto3.proto +++ b/proto-lens-tests/tests/proto3.proto @@ -29,3 +29,12 @@ message Strings { bytes bytes = 1; string string = 2; } + +message FooUnknown { + enum FooUnknownEnum { + Enum1 = 0; + Enum2 = 1; + Enum3 = 2; // A case not in Foo.FooEnum + } + FooUnknownEnum enum = 6; // Corresponds to Foo.enum +} diff --git a/proto-lens-tests/tests/proto3_test.hs b/proto-lens-tests/tests/proto3_test.hs index a9bacadc..f2bf6085 100644 --- a/proto-lens-tests/tests/proto3_test.hs +++ b/proto-lens-tests/tests/proto3_test.hs @@ -5,6 +5,7 @@ -- https://developers.google.com/open-source/licenses/bsd {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} module Main where import Data.ProtoLens @@ -15,6 +16,8 @@ import Proto.Proto3 ( Foo , Foo'FooEnum(..) , Foo'Sub(..) + , FooUnknown + , FooUnknown'FooUnknownEnum(..) , Strings ) import Proto.Proto3'Fields @@ -96,7 +99,15 @@ main = testMain [ testCase "int" $ (def :: Foo) @=? (def & a .~ 0) , testCase "bytes" $ (def :: Strings) @=? (def & bytes .~ "") , testCase "string" $ (def :: Strings) @=? (def & string .~ "") - , testCase "enum" $ (def :: Foo) @=? (def & enum .~ Foo'Enum1) + , testCase "enum" $ (def :: Foo) @=? (def & enum .~ Recognized Foo'Enum1) + ] + -- Proto3 enums should preserve unknown fields. + , testGroup "unknown enum" + [ testCase "roundTrip" $ do + let x = def & enum .~ Recognized FooUnknown'Enum3 :: FooUnknown + let (y :: Foo) = decodeMessageOrDie (encodeMessage x) + let z = decodeMessageOrDie (encodeMessage y) + x @=? z ] -- Unset proto3 messages are different than the default value. , testGroup "submessage" diff --git a/proto-lens/src/Data/ProtoLens/Message.hs b/proto-lens/src/Data/ProtoLens/Message.hs index f7256726..67aee53f 100644 --- a/proto-lens/src/Data/ProtoLens/Message.hs +++ b/proto-lens/src/Data/ProtoLens/Message.hs @@ -6,7 +6,9 @@ {-# LANGUAGE GADTs #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE PatternGuards #-} {-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE StandaloneDeriving #-} -- | Datatypes for reflection of protocol buffer messages. module Data.ProtoLens.Message ( @@ -22,7 +24,14 @@ module Data.ProtoLens.Message ( Packing(..), FieldTypeDescriptor(..), FieldDefault(..), + -- * Enums + -- $enums MessageEnum(..), + Lax(..), + lax, + laxDef, + UnrecognizedValue, + fromUnrecognizedValue, -- * Building protocol buffers Default(..), build, @@ -38,6 +47,7 @@ import Data.Int import qualified Data.Map as Map import Data.Map (Map) import Data.Maybe (fromMaybe) +import Data.Ord (comparing) import qualified Data.Text as T import Data.Word import Lens.Family2 (Lens', over) @@ -151,6 +161,12 @@ instance FieldDefault B.ByteString where instance FieldDefault T.Text where fieldDefault = T.empty +instance FieldDefault a => FieldDefault (Lax a) where + fieldDefault = Recognized fieldDefault + +instance Default a => Default (Lax a) where + def = Recognized def + -- | How a given repeated field is transmitted on the wire format. data Packing = Packed | Unpacked @@ -189,6 +205,97 @@ class (Enum a, Bounded a) => MessageEnum a where -- no corresponding value was defined in the .proto file. readEnum :: String -> Maybe a +-- Otherwise: data MyEnum s = ... where s is either Lax or Strict + +-- | A wrapped proto3 enum. This type is "Open" to include both +-- known cases and unknown integer values. +-- +-- In most cases, you will want to use `laxDef` to retrieve an `a`, + +data Lax a + = Recognized !a -- ^ A case defined in the .proto file. + | Unrecognized !(UnrecognizedValue a) -- ^ An integer value not defined in + -- the .proto file. + deriving (Show) + +-- Extract a known value from a @Lax a@. If the underlying value is +-- unrecognized, return the 'Default' (which is always the zero value for +-- proto3 enums). +-- +-- For more details, see the +-- . +-- +-- Note that, like `maybeLens`, this is does not satisfy the lens laws; +-- however, it only matters when explicitly checking whether a value is unknown. +laxDef :: Default a => Lens' (Lax a) a +laxDef = lens mk $ const Recognized + where + mk (Recognized a) = a + mk _ = def + +-- TODO: should laxDef be the default of everything instead? +-- - LABEL_OPTIONAL: if proto3 and enum, then foo and lax'foo... +-- there's no maybe'foo since it's proto3. +-- - WHAT ABOUT MAPS? ONEOF? +-- - enums can't be map keys, for this reason +-- - ONEOF is annoying... +-- - LABEL_REPEATED: foo and lax'foo + +instance MessageEnum a => Eq (Lax a) where + x == y = fromEnum x == fromEnum y + +instance MessageEnum a => Ord (Lax a) where + compare = comparing fromEnum + +-- guarantee: lax (unrecognizedValue u) == Unrecognized u +-- | An integral value which does not correspond to +-- +-- This type is guaranteed to be disjoint from @a@; that is, if @x :: a@ and +-- @y :: Unrecognized a@ then @fromEnum x /= fromUnrecognized y@. (We +-- can enfore this since the only way to get an 'Unrecognized' is by +-- calling 'lax'.) +newtype UnrecognizedValue a = UnrecognizedValue Int32 + deriving (Eq, Ord) + +instance Show (UnrecognizedValue a) where + show (UnrecognizedValue n) = show n + +-- | Extract an unrecognized integer value. +fromUnrecognizedValue :: UnrecognizedValue a -> Int32 +fromUnrecognizedValue (UnrecognizedValue n) = n + +-- | Convert a raw integer to an enum, depending on whether it was +-- defined in the .proto file. +-- +-- Its behavior is defined by: +-- +-- @ +-- lax (fromEnum (x :: a)) :: Lax a === Recognized x +-- lax (fromUnrecognized (x :: Unrecognized a)) :: Lax a === Unrecognized x +-- @ +lax :: MessageEnum a => Int32 -> Lax a +lax = toEnum . fromEnum + +instance MessageEnum a => Enum (Lax a) where + fromEnum (Recognized x) = fromEnum x + fromEnum (Unrecognized x) = fromEnum $ fromUnrecognizedValue x + toEnum n + | Just x <- maybeToEnum n = Recognized x + | otherwise = Unrecognized (UnrecognizedValue $ toEnum n) + +instance MessageEnum a => Bounded (Lax a) where + -- TODO: prevent overlap in case where it's known? + minBound = lax minBound + maxBound = lax maxBound + +instance MessageEnum a => MessageEnum (Lax a) where + maybeToEnum n = Just $ toEnum n + showEnum (Unrecognized n) = show n + showEnum (Recognized s) = showEnum s + readEnum s + | [(n::Int32,"")] <- reads s = Just $ lax n + | otherwise = Recognized <$> readEnum s + -- | Utility function for building a message from a default value. -- For example: --