Skip to content

Commit

Permalink
Merge pull request #153 from chpatrick/infinite-width
Browse files Browse the repository at this point in the history
Configurable encoding
  • Loading branch information
snoyberg committed Sep 17, 2018
2 parents 50a95e0 + 9819ecb commit c16c516
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 47 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
@@ -1,5 +1,9 @@
# ChangeLog for yaml

## 0.10.2.0

* Add `EncodeOptions` and `FormatOptions` to control the style of the encoded YAML. [#153](https://github.com/snoyberg/yaml/pull/153)

## 0.10.1.1

* Correctly declare libyaml dependency on system-libyaml flag [#151](https://github.com/snoyberg/yaml/pull/151)
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
@@ -1,5 +1,5 @@
name: yaml
version: 0.10.1.1
version: 0.10.2.0
synopsis: Support for parsing and rendering YAML documents.
description: README and API documentation are available at <https://www.stackage.org/package/yaml>
category: Data
Expand Down
139 changes: 101 additions & 38 deletions src/Data/Yaml.hs
Expand Up @@ -28,7 +28,9 @@ module Data.Yaml
#endif
( -- * Encoding
encode
, encodeWith
, encodeFile
, encodeFileWith
-- * Decoding
, decodeEither'
, decodeFileEither
Expand Down Expand Up @@ -66,6 +68,15 @@ module Data.Yaml
-- * Classes
, ToJSON (..)
, FromJSON (..)
-- * Custom encoding
, isSpecialString
, EncodeOptions
, defaultEncodeOptions
, setStringStyle
, setFormat
, FormatOptions
, defaultFormatOptions
, setWidth
-- * Deprecated
, decode
, decodeFile
Expand Down Expand Up @@ -100,65 +111,117 @@ import qualified Data.Text.Lazy as TL
import Data.Text.Lazy.Builder (toLazyText)
import qualified Data.Vector as V
import System.IO.Unsafe (unsafePerformIO)
import Data.Text (Text)

import Data.Yaml.Internal
import Text.Libyaml hiding (encode, decode, encodeFile, decodeFile)
import Text.Libyaml hiding (encode, decode, encodeFile, decodeFile, encodeWith, encodeFileWith)
import qualified Text.Libyaml as Y

-- |
-- @since 0.10.2.0
data EncodeOptions = EncodeOptions
{ encodeOptionsStringStyle :: Text -> ( Tag, Style )
, encodeOptionsFormat :: FormatOptions
}

-- | Set the string style in the encoded YAML. This is a function that decides
-- for each string the type of YAML string to output.
--
-- __WARNING__: You must ensure that special strings (like @"yes"@\/@"no"@\/@"null"@\/@"1234"@) are not encoded with the 'Plain' style, because
-- then they will be decoded as boolean, null or numeric values. You can use 'isSpecialString' to detect them.
--
-- By default, strings are encoded with the `Plain` style, except special strings, which are encoded with `SingleQuoted`.
--
-- @since 0.10.2.0
setStringStyle :: (Text -> ( Tag, Style )) -> EncodeOptions -> EncodeOptions
setStringStyle s opts = opts { encodeOptionsStringStyle = s }

-- | Set the encoding formatting for the encoded YAML. By default, this is `defaultFormatOptions`.
--
-- @since 0.10.2.0
setFormat :: FormatOptions -> EncodeOptions -> EncodeOptions
setFormat f opts = opts { encodeOptionsFormat = f }

-- | Determine whether a string must be quoted in YAML and can't appear as plain text.
-- Useful if you want to use 'setStringStyle'.
--
-- @since 0.10.2.0
isSpecialString :: Text -> Bool
isSpecialString s = s `HashSet.member` specialStrings || isNumeric s

-- |
-- @since 0.10.2.0
defaultEncodeOptions :: EncodeOptions
defaultEncodeOptions = EncodeOptions
{ encodeOptionsStringStyle = \s ->
-- Empty strings need special handling to ensure they get quoted. This avoids:
-- https://github.com/snoyberg/yaml/issues/24
if isSpecialString s
then ( NoTag, SingleQuoted )
else ( StrTag, PlainNoTag )
, encodeOptionsFormat = defaultFormatOptions
}

-- | Encode a value into its YAML representation.
encode :: ToJSON a => a -> ByteString
encode obj = unsafePerformIO $ runConduitRes
$ CL.sourceList (objToEvents $ toJSON obj)
.| Y.encode
encode = encodeWith defaultEncodeOptions

-- | Encode a value into its YAML representation with custom styling.
--
-- @since 0.10.2.0
encodeWith :: ToJSON a => EncodeOptions -> a -> ByteString
encodeWith opts obj = unsafePerformIO $ runConduitRes
$ CL.sourceList (objToEvents opts $ toJSON obj)
.| Y.encodeWith (encodeOptionsFormat opts)

-- | Encode a value into its YAML representation and save to the given file.
encodeFile :: ToJSON a => FilePath -> a -> IO ()
encodeFile fp obj = runConduitRes
$ CL.sourceList (objToEvents $ toJSON obj)
.| Y.encodeFile fp
encodeFile = encodeFileWith defaultEncodeOptions

objToEvents :: Value -> [Y.Event]
objToEvents o = (:) EventStreamStart
-- | Encode a value into its YAML representation with custom styling and save to the given file.
--
-- @since 0.10.2.0
encodeFileWith :: ToJSON a => EncodeOptions -> FilePath -> a -> IO ()
encodeFileWith opts fp obj = runConduitRes
$ CL.sourceList (objToEvents opts $ toJSON obj)
.| Y.encodeFileWith (encodeOptionsFormat opts) fp

objToEvents :: EncodeOptions -> Value -> [Y.Event]
objToEvents opts o = (:) EventStreamStart
. (:) EventDocumentStart
$ objToEvents' o
[ EventDocumentEnd
, EventStreamEnd
]
where
objToEvents' :: Value -> [Y.Event] -> [Y.Event]
--objToEvents' (Scalar s) rest = scalarToEvent s : rest
objToEvents' (Array list) rest =
EventSequenceStart NoTag AnySequence Nothing
: foldr objToEvents' (EventSequenceEnd : rest) (V.toList list)
objToEvents' (Object pairs) rest =
EventMappingStart NoTag AnyMapping Nothing
: foldr pairToEvents (EventMappingEnd : rest) (M.toList pairs)

objToEvents' (String "") rest = EventScalar "" NoTag SingleQuoted Nothing : rest

objToEvents' (String s) rest = EventScalar (encodeUtf8 s) tag style Nothing : rest
where
( tag, style ) = encodeOptionsStringStyle opts s
objToEvents' Null rest = EventScalar "null" NullTag PlainNoTag Nothing : rest
objToEvents' (Bool True) rest = EventScalar "true" BoolTag PlainNoTag Nothing : rest
objToEvents' (Bool False) rest = EventScalar "false" BoolTag PlainNoTag Nothing : rest
-- Use aeson's implementation which gets rid of annoying decimal points
objToEvents' n@Number{} rest = EventScalar (TE.encodeUtf8 $ TL.toStrict $ toLazyText $ encodeToTextBuilder n) IntTag PlainNoTag Nothing : rest

pairToEvents :: Pair -> [Y.Event] -> [Y.Event]
pairToEvents (k, v) = objToEvents' (String k) . objToEvents' v

{- FIXME
scalarToEvent :: YamlScalar -> Event
scalarToEvent (YamlScalar v t s) = EventScalar v t s Nothing
-}

objToEvents' :: Value -> [Y.Event] -> [Y.Event]
--objToEvents' (Scalar s) rest = scalarToEvent s : rest
objToEvents' (Array list) rest =
EventSequenceStart NoTag AnySequence Nothing
: foldr objToEvents' (EventSequenceEnd : rest) (V.toList list)
objToEvents' (Object pairs) rest =
EventMappingStart NoTag AnyMapping Nothing
: foldr pairToEvents (EventMappingEnd : rest) (M.toList pairs)

-- Empty strings need special handling to ensure they get quoted. This avoids:
-- https://github.com/snoyberg/yaml/issues/24
objToEvents' (String "") rest = EventScalar "" NoTag SingleQuoted Nothing : rest

objToEvents' (String s) rest =
event : rest
where
event
-- Make sure that special strings are encoded as strings properly.
-- See: https://github.com/snoyberg/yaml/issues/31
| s `HashSet.member` specialStrings || isNumeric s = EventScalar (encodeUtf8 s) NoTag SingleQuoted Nothing
| otherwise = EventScalar (encodeUtf8 s) StrTag PlainNoTag Nothing
objToEvents' Null rest = EventScalar "null" NullTag PlainNoTag Nothing : rest
objToEvents' (Bool True) rest = EventScalar "true" BoolTag PlainNoTag Nothing : rest
objToEvents' (Bool False) rest = EventScalar "false" BoolTag PlainNoTag Nothing : rest
-- Use aeson's implementation which gets rid of annoying decimal points
objToEvents' n@Number{} rest = EventScalar (TE.encodeUtf8 $ TL.toStrict $ toLazyText $ encodeToTextBuilder n) IntTag PlainNoTag Nothing : rest

pairToEvents :: Pair -> [Y.Event] -> [Y.Event]
pairToEvents (k, v) = objToEvents' (String k) . objToEvents' v

decode :: FromJSON a
=> ByteString
Expand Down
18 changes: 16 additions & 2 deletions src/Data/Yaml/Builder.hs
Expand Up @@ -14,8 +14,12 @@ module Data.Yaml.Builder
, scientific
, number
, toByteString
, toByteStringWith
, writeYamlFile
, writeYamlFileWith
, (.=)
, FormatOptions
, setWidth
) where

import Prelude hiding (null)
Expand Down Expand Up @@ -110,7 +114,17 @@ toSource :: (Monad m, ToYaml a) => a -> ConduitM i Event m ()
toSource = mapM_ yield . toEvents . toYaml

toByteString :: ToYaml a => a -> ByteString
toByteString yb = unsafePerformIO $ runConduitRes $ toSource yb .| encode
toByteString = toByteStringWith defaultFormatOptions

-- |
-- @since 0.10.2.0
toByteStringWith :: ToYaml a => FormatOptions -> a -> ByteString
toByteStringWith opts yb = unsafePerformIO $ runConduitRes $ toSource yb .| encodeWith opts

writeYamlFile :: ToYaml a => FilePath -> a -> IO ()
writeYamlFile fp yb = runConduitRes $ toSource yb .| encodeFile fp
writeYamlFile = writeYamlFileWith defaultFormatOptions

-- |
-- @since 0.10.2.0
writeYamlFileWith :: ToYaml a => FormatOptions -> FilePath -> a -> IO ()
writeYamlFileWith opts fp yb = runConduitRes $ toSource yb .| encodeFileWith opts fp
58 changes: 52 additions & 6 deletions src/Text/Libyaml.hs
Expand Up @@ -21,9 +21,14 @@ module Text.Libyaml
, Anchor
-- * Encoding and decoding
, encode
, encodeWith
, decode
, encodeFile
, decodeFile
, encodeFileWith
, FormatOptions
, defaultFormatOptions
, setWidth
-- * Error handling
, YamlException (..)
, YamlMark (..)
Expand Down Expand Up @@ -348,6 +353,9 @@ foreign import ccall unsafe "yaml_emitter_set_unicode"
foreign import ccall unsafe "yaml_emitter_set_output_file"
c_yaml_emitter_set_output_file :: Emitter -> File -> IO ()

foreign import ccall unsafe "yaml_emitter_set_width"
c_yaml_emitter_set_width :: Emitter -> CInt -> IO ()

foreign import ccall unsafe "yaml_emitter_emit"
c_yaml_emitter_emit :: Emitter -> EventRaw -> IO CInt

Expand Down Expand Up @@ -605,9 +613,34 @@ parserParseOne' parser = allocaBytes eventSize $ \er -> do
return $ Left $ YamlParseException problem context problemMark
else Right <$> getEvent er

-- | Contains options relating to the formatting (indendation, width) of the YAML output.
--
-- @since 0.10.2.0
data FormatOptions = FormatOptions
{ formatOptionsWidth :: Maybe Int
}

-- |
-- @since 0.10.2.0
defaultFormatOptions :: FormatOptions
defaultFormatOptions = FormatOptions
{ formatOptionsWidth = Just 80 -- by default the width is set to 0 in the C code, which gets turned into 80 in yaml_emitter_emit_stream_start
}

-- | Set the maximum number of columns in the YAML output, or 'Nothing' for infinite. By default, the limit is 80 characters.
--
-- @since 0.10.2.0
setWidth :: Maybe Int -> FormatOptions -> FormatOptions
setWidth w opts = opts { formatOptionsWidth = w }

encode :: MonadResource m => ConduitM Event o m ByteString
encode =
runEmitter alloc close
encode = encodeWith defaultFormatOptions

-- |
-- @since 0.10.2.0
encodeWith :: MonadResource m => FormatOptions -> ConduitM Event o m ByteString
encodeWith opts =
runEmitter opts alloc close
where
alloc emitter = do
fbuf <- mallocForeignPtrBytes bufferSize
Expand All @@ -620,11 +653,20 @@ encode =
fptr <- newForeignPtr_ $ castPtr ptr'
return $ B.fromForeignPtr fptr 0 $ fromIntegral len


encodeFile :: MonadResource m
=> FilePath
-> ConduitM Event o m ()
encodeFile filePath =
bracketP getFile c_fclose $ \file -> runEmitter (alloc file) (\u _ -> return u)
encodeFile = encodeFileWith defaultFormatOptions

-- |
-- @since 0.10.2.0
encodeFileWith :: MonadResource m
=> FormatOptions
-> FilePath
-> ConduitM Event o m ()
encodeFileWith opts filePath =
bracketP getFile c_fclose $ \file -> runEmitter opts (alloc file) (\u _ -> return u)
where
getFile = do
file <- openFile filePath write_flags "w"
Expand All @@ -635,10 +677,11 @@ encodeFile filePath =
alloc file emitter = c_yaml_emitter_set_output_file emitter file

runEmitter :: MonadResource m
=> (Emitter -> IO a) -- ^ alloc
=> FormatOptions
-> (Emitter -> IO a) -- ^ alloc
-> (() -> a -> IO b) -- ^ close
-> ConduitM Event o m b
runEmitter allocI closeI =
runEmitter opts allocI closeI =
bracketP alloc cleanup go
where
alloc = mask_ $ do
Expand All @@ -648,6 +691,9 @@ runEmitter allocI closeI =
#ifndef __NO_UNICODE__
c_yaml_emitter_set_unicode emitter 1
#endif
c_yaml_emitter_set_width emitter $ case formatOptionsWidth opts of
Nothing -> -1 --infinite
Just width -> fromIntegral width
a <- allocI emitter
return (emitter, a)
cleanup (emitter, _) = do
Expand Down

0 comments on commit c16c516

Please sign in to comment.