diff --git a/src/Text/Blaze/Renderer/XmlHtml.hs b/src/Text/Blaze/Renderer/XmlHtml.hs index 6d4c906..0e18832 100644 --- a/src/Text/Blaze/Renderer/XmlHtml.hs +++ b/src/Text/Blaze/Renderer/XmlHtml.hs @@ -3,15 +3,14 @@ -- -- Warning: because this renderer doesn't directly create the output, but -- rather an XML tree representation, it is impossible to render pre-escaped --- text. This means that @preEscapedString@ will produce the same output as --- @string@. This also applies to the functions @preEscapedText@, --- @preEscapedTextValue@... +-- text. -- module Text.Blaze.Renderer.XmlHtml (renderHtml, renderHtmlNodes) where import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Encoding as T +import Text.Blaze.Html import Text.Blaze.Internal import Text.XmlHtml @@ -52,14 +51,18 @@ fromChoiceString EmptyChoiceString = id renderNodes :: Html -> [Node] -> [Node] renderNodes = go [] where - go :: [(Text, Text)] -> HtmlM b -> [Node] -> [Node] + go :: [(Text, Text)] -> MarkupM a -> [Node] -> [Node] go attrs (Parent tag _ _ content) = (Element (getText tag) attrs (go [] content []) :) + go attrs (CustomParent tag content) = + (Element (fromChoiceStringText tag) attrs (go [] content []) :) go attrs (Leaf tag _ _) = (Element (getText tag) attrs [] :) + go attrs (CustomLeaf tag _) = + (Element (fromChoiceStringText tag) attrs [] :) go attrs (AddAttribute key _ value content) = go ((getText key, fromChoiceStringText value) : attrs) content - go attrs (AddCustomAttribute key _ value content) = + go attrs (AddCustomAttribute key value content) = go ((fromChoiceStringText key, fromChoiceStringText value) : attrs) content go _ (Content content) = fromChoiceString content diff --git a/src/Text/XmlHtml.hs b/src/Text/XmlHtml.hs index 9be2881..325bede 100644 --- a/src/Text/XmlHtml.hs +++ b/src/Text/XmlHtml.hs @@ -56,7 +56,9 @@ module Text.XmlHtml ( parseHTML, -- * Rendering - render + render, + XML.renderXmlFragment, + HTML.renderHtmlFragment ) where ------------------------------------------------------------------------------ diff --git a/src/Text/XmlHtml/HTML/Parse.hs b/src/Text/XmlHtml/HTML/Parse.hs index 571e170..93601f5 100644 --- a/src/Text/XmlHtml/HTML/Parse.hs +++ b/src/Text/XmlHtml/HTML/Parse.hs @@ -239,9 +239,10 @@ attrValue = quotedAttrValue <|> unquotedAttrValue attribute :: Parser (Text, Text) attribute = do n <- attrName - _ <- optional XML.whiteSpace v <- optional $ do - _ <- P.char '=' + _ <- P.try $ do + _ <- optional XML.whiteSpace + P.char '=' _ <- optional XML.whiteSpace attrValue return $ maybe (n,"") (n,) v diff --git a/src/Text/XmlHtml/HTML/Render.hs b/src/Text/XmlHtml/HTML/Render.hs index 0b49502..3e57ded 100644 --- a/src/Text/XmlHtml/HTML/Render.hs +++ b/src/Text/XmlHtml/HTML/Render.hs @@ -230,6 +230,17 @@ ambiguousAmpersand _ = False ------------------------------------------------------------------------------ +-- | Function for rendering HTML nodes without the overhead of creating a +-- Document structure. +renderHtmlFragment :: Encoding -> [Node] -> Builder +renderHtmlFragment _ [] = mempty +renderHtmlFragment e (n:ns) = + firstNode e n `mappend` (mconcat $ map (node e) ns) + + +------------------------------------------------------------------------------ +-- | HTML allows & so long as it is not "ambiguous" (i.e., looks like an +-- entity). So we have a special case for that. escaped :: [Char] -> Encoding -> Text -> Builder escaped _ _ "" = mempty escaped bad e t = diff --git a/src/Text/XmlHtml/TextParser.hs b/src/Text/XmlHtml/TextParser.hs index 676fb21..7dc0141 100644 --- a/src/Text/XmlHtml/TextParser.hs +++ b/src/Text/XmlHtml/TextParser.hs @@ -2,7 +2,19 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} -module Text.XmlHtml.TextParser where +module Text.XmlHtml.TextParser +( guessEncoding +, parse +, isValidChar +, parseText +, takeWhile0 +, takeWhile1 +, text +, scanText +, ScanState(..) + +, module Text.Parsec.Text +) where import Control.Applicative import Data.Char @@ -12,13 +24,12 @@ import Text.XmlHtml.Common import Data.Text (Text) import qualified Data.Text as T -import Text.Parsec (Parsec) import qualified Text.Parsec as P +import Text.Parsec.Text import Data.ByteString (ByteString) import qualified Data.ByteString as B - ------------------------------------------------------------------------------ -- | Get an initial guess at document encoding from the byte order mark. If -- the mark doesn't exist, guess UTF-8. Otherwise, guess according to the @@ -30,18 +41,6 @@ guessEncoding b | B.take 2 b == B.pack [ 0xFF, 0xFE ] = (UTF16LE, B.drop 2 b) | otherwise = (UTF8, b) - ------------------------------------------------------------------------------- --- | Specialized type for the parsers we use here. -type Parser = Parsec Text () - - ------------------------------------------------------------------------------- --- An (orphaned) instance for parsing Text with Parsec. -instance (Monad m) => P.Stream T.Text m Char where - uncons = return . T.uncons - - ------------------------------------------------------------------------------ parse :: (Encoding -> Parser a) -> String -> ByteString -> Either String a parse p src b = let (e, b') = guessEncoding b diff --git a/src/Text/XmlHtml/XML/Render.hs b/src/Text/XmlHtml/XML/Render.hs index 848fe85..35395d1 100644 --- a/src/Text/XmlHtml/XML/Render.hs +++ b/src/Text/XmlHtml/XML/Render.hs @@ -25,6 +25,15 @@ render e dt ns = byteOrder `mappend` (mconcat $ map (node e) (tail ns)) +------------------------------------------------------------------------------ +-- | Function for rendering XML nodes without the overhead of creating a +-- Document structure. +renderXmlFragment :: Encoding -> [Node] -> Builder +renderXmlFragment _ [] = mempty +renderXmlFragment e (n:ns) = + firstNode e n `mappend` (mconcat $ map (node e) ns) + + ------------------------------------------------------------------------------ xmlDecl :: Encoding -> Builder xmlDecl e = fromText e "" == Right (HtmlDocument UTF8 Nothing [Element "test" [("attr", "")] []]) +emptyAttr2 :: Bool +emptyAttr2 = parseHTML "" "
" + == Right (HtmlDocument UTF8 Nothing [Element "div" [("itemscope", ""), ("itemtype", "type")] []]) + unquotedAttr :: Bool unquotedAttr = parseHTML "" "" == Right (HtmlDocument UTF8 Nothing [Element "test" [("attr", "you&me")] []]) diff --git a/test/xmlhtml-testsuite.cabal b/test/xmlhtml-testsuite.cabal index bf8c6a0..b65e05d 100644 --- a/test/xmlhtml-testsuite.cabal +++ b/test/xmlhtml-testsuite.cabal @@ -10,19 +10,20 @@ Executable testsuite build-depends: HUnit == 1.2.*, QuickCheck >= 2.3.0.2, - attoparsec >= 0.10 && < 0.11, + attoparsec >= 0.10 && < 0.11, base == 4.*, - blaze-builder >= 0.2 && < 0.4, - blaze-html >= 0.3.2 && < 0.5, + blaze-builder >= 0.2 && <0.4, + blaze-html >= 0.5 && <0.6, + blaze-markup >= 0.5 && <0.6, bytestring == 0.9.*, - containers >= 0.3 && < 0.5, - directory >= 1.0 && < 1.3, - parsec >= 3.0 && < 3.2, - test-framework >= 0.3.1 && < 0.4, - test-framework-hunit >= 0.2.5 && < 0.3, - test-framework-quickcheck2 >= 0.2.6 && < 0.3, - text >= 0.11 && < 0.12, - unordered-containers >= 0.1.4 && < 0.2 + directory >= 1.0 && < 1.3, + containers >= 0.3 && <0.5, + parsec >= 3.1.2 && <3.2, + test-framework >= 0.6 && <0.7, + test-framework-hunit >= 0.2.7 && <0.3, + test-framework-quickcheck2 >= 0.2.12.1 && <0.3, + text >= 0.11 && <0.12, + unordered-containers >= 0.1.4 && <0.3 ghc-options: -O2 -Wall -fhpc -fwarn-tabs -funbox-strict-fields -threaded -fno-warn-unused-do-bind diff --git a/xmlhtml.cabal b/xmlhtml.cabal index 54af1c5..c2afec1 100644 --- a/xmlhtml.cabal +++ b/xmlhtml.cabal @@ -1,8 +1,8 @@ Name: xmlhtml -Version: 0.2.0.0 +Version: 0.2.0.1 Synopsis: XML parser and renderer with HTML 5 quirks mode Description: Contains renderers and parsers for both XML and HTML 5 - document fragments, which share data structures wo that + document fragments, which share data structures so that it's easy to work with both. Document fragments are bits of documents, which are not constrained by some of the high-level structure rules (in particular, they may @@ -727,6 +727,7 @@ Extra-source-files: test/resources/oasis/p46pass1.xml, test/resources/oasis/p02fail3.xml, test/resources/oasis/p44fail3.xml, + test/resources/oasis/p44fail3.xml.html.correct, test/resources/oasis/p14pass1.xml, test/resources/oasis/p03fail24.xml, test/resources/oasis/p10pass1.xml, @@ -821,12 +822,13 @@ Library Build-depends: base >= 4 && < 5, attoparsec >= 0.10 && < 0.11, blaze-builder >= 0.2 && < 0.4, - blaze-html >= 0.3.2 && < 0.5, + blaze-html >= 0.5 && < 0.6, + blaze-markup >= 0.5 && < 0.6, bytestring >= 0.9 && < 0.10, containers >= 0.3 && < 0.5, - parsec >= 3.0 && < 3.2, + parsec >= 3.1.2 && < 3.2, text >= 0.11 && < 0.12, - unordered-containers >= 0.1.4 && < 0.2 + unordered-containers >= 0.1.4 && < 0.3 extensions: OverloadedStrings,