Skip to content

Commit

Permalink
Merge branch 'master' into benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
gregorycollins committed May 26, 2012
2 parents 2b5316f + e7f0774 commit 17bbd9c
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 39 deletions.
13 changes: 8 additions & 5 deletions src/Text/Blaze/Renderer/XmlHtml.hs
Expand Up @@ -3,15 +3,14 @@
--
-- Warning: because this renderer doesn't directly create the output, but
-- rather an XML tree representation, it is impossible to render pre-escaped
-- text. This means that @preEscapedString@ will produce the same output as
-- @string@. This also applies to the functions @preEscapedText@,
-- @preEscapedTextValue@...
-- text.
--
module Text.Blaze.Renderer.XmlHtml (renderHtml, renderHtmlNodes) where

import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Text.Blaze.Html
import Text.Blaze.Internal
import Text.XmlHtml

Expand Down Expand Up @@ -52,14 +51,18 @@ fromChoiceString EmptyChoiceString = id
renderNodes :: Html -> [Node] -> [Node]
renderNodes = go []
where
go :: [(Text, Text)] -> HtmlM b -> [Node] -> [Node]
go :: [(Text, Text)] -> MarkupM a -> [Node] -> [Node]
go attrs (Parent tag _ _ content) =
(Element (getText tag) attrs (go [] content []) :)
go attrs (CustomParent tag content) =
(Element (fromChoiceStringText tag) attrs (go [] content []) :)
go attrs (Leaf tag _ _) =
(Element (getText tag) attrs [] :)
go attrs (CustomLeaf tag _) =
(Element (fromChoiceStringText tag) attrs [] :)
go attrs (AddAttribute key _ value content) =
go ((getText key, fromChoiceStringText value) : attrs) content
go attrs (AddCustomAttribute key _ value content) =
go attrs (AddCustomAttribute key value content) =
go ((fromChoiceStringText key, fromChoiceStringText value) : attrs)
content
go _ (Content content) = fromChoiceString content
Expand Down
4 changes: 3 additions & 1 deletion src/Text/XmlHtml.hs
Expand Up @@ -56,7 +56,9 @@ module Text.XmlHtml (
parseHTML,

-- * Rendering
render
render,
XML.renderXmlFragment,
HTML.renderHtmlFragment
) where

------------------------------------------------------------------------------
Expand Down
5 changes: 3 additions & 2 deletions src/Text/XmlHtml/HTML/Parse.hs
Expand Up @@ -239,9 +239,10 @@ attrValue = quotedAttrValue <|> unquotedAttrValue
attribute :: Parser (Text, Text)
attribute = do
n <- attrName
_ <- optional XML.whiteSpace
v <- optional $ do
_ <- P.char '='
_ <- P.try $ do
_ <- optional XML.whiteSpace
P.char '='
_ <- optional XML.whiteSpace
attrValue
return $ maybe (n,"") (n,) v
Expand Down
11 changes: 11 additions & 0 deletions src/Text/XmlHtml/HTML/Render.hs
Expand Up @@ -230,6 +230,17 @@ ambiguousAmpersand _ = False


------------------------------------------------------------------------------
-- | Function for rendering HTML nodes without the overhead of creating a
-- Document structure.
renderHtmlFragment :: Encoding -> [Node] -> Builder
renderHtmlFragment _ [] = mempty
renderHtmlFragment e (n:ns) =
firstNode e n `mappend` (mconcat $ map (node e) ns)


------------------------------------------------------------------------------
-- | HTML allows & so long as it is not "ambiguous" (i.e., looks like an
-- entity). So we have a special case for that.
escaped :: [Char] -> Encoding -> Text -> Builder
escaped _ _ "" = mempty
escaped bad e t =
Expand Down
29 changes: 14 additions & 15 deletions src/Text/XmlHtml/TextParser.hs
Expand Up @@ -2,7 +2,19 @@
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}

module Text.XmlHtml.TextParser where
module Text.XmlHtml.TextParser
( guessEncoding
, parse
, isValidChar
, parseText
, takeWhile0
, takeWhile1
, text
, scanText
, ScanState(..)

, module Text.Parsec.Text
) where

import Control.Applicative
import Data.Char
Expand All @@ -12,13 +24,12 @@ import Text.XmlHtml.Common
import Data.Text (Text)
import qualified Data.Text as T

import Text.Parsec (Parsec)
import qualified Text.Parsec as P
import Text.Parsec.Text

import Data.ByteString (ByteString)
import qualified Data.ByteString as B


------------------------------------------------------------------------------
-- | Get an initial guess at document encoding from the byte order mark. If
-- the mark doesn't exist, guess UTF-8. Otherwise, guess according to the
Expand All @@ -30,18 +41,6 @@ guessEncoding b
| B.take 2 b == B.pack [ 0xFF, 0xFE ] = (UTF16LE, B.drop 2 b)
| otherwise = (UTF8, b)


------------------------------------------------------------------------------
-- | Specialized type for the parsers we use here.
type Parser = Parsec Text ()


------------------------------------------------------------------------------
-- An (orphaned) instance for parsing Text with Parsec.
instance (Monad m) => P.Stream T.Text m Char where
uncons = return . T.uncons


------------------------------------------------------------------------------
parse :: (Encoding -> Parser a) -> String -> ByteString -> Either String a
parse p src b = let (e, b') = guessEncoding b
Expand Down
9 changes: 9 additions & 0 deletions src/Text/XmlHtml/XML/Render.hs
Expand Up @@ -25,6 +25,15 @@ render e dt ns = byteOrder
`mappend` (mconcat $ map (node e) (tail ns))


------------------------------------------------------------------------------
-- | Function for rendering XML nodes without the overhead of creating a
-- Document structure.
renderXmlFragment :: Encoding -> [Node] -> Builder
renderXmlFragment _ [] = mempty
renderXmlFragment e (n:ns) =
firstNode e n `mappend` (mconcat $ map (node e) ns)


------------------------------------------------------------------------------
xmlDecl :: Encoding -> Builder
xmlDecl e = fromText e "<?xml version=\"1.0\" encoding=\""
Expand Down
1 change: 1 addition & 0 deletions test/resources/oasis/p44fail3.xml.html.correct
@@ -0,0 +1 @@
Dashes are allowed in HTML attribute names.
7 changes: 7 additions & 0 deletions test/suite/Text/XmlHtml/Tests.hs
Expand Up @@ -14,8 +14,10 @@ import Test.Framework
import Test.Framework.Providers.HUnit
import Test.HUnit hiding (Test, Node)
import Text.Blaze
import Text.Blaze.Html
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import qualified Text.Blaze.Internal as H
import Text.Blaze.Renderer.XmlHtml
import Text.XmlHtml
import Text.XmlHtml.CursorTests
Expand Down Expand Up @@ -363,6 +365,7 @@ htmlParsingQuirkTests = [
testIt "laxAttrName " laxAttrName,
testCase "badAttrName " badAttrName,
testIt "emptyAttr " emptyAttr,
testIt "emptyAttr2 " emptyAttr2,
testIt "unquotedAttr " unquotedAttr,
testIt "laxAttrVal " laxAttrVal,
testIt "ampersandInText " ampersandInText,
Expand Down Expand Up @@ -439,6 +442,10 @@ emptyAttr :: Bool
emptyAttr = parseHTML "" "<test attr></test>"
== Right (HtmlDocument UTF8 Nothing [Element "test" [("attr", "")] []])

emptyAttr2 :: Bool
emptyAttr2 = parseHTML "" "<div itemscope itemtype=\"type\"></div>"
== Right (HtmlDocument UTF8 Nothing [Element "div" [("itemscope", ""), ("itemtype", "type")] []])

unquotedAttr :: Bool
unquotedAttr = parseHTML "" "<test attr=you&amp;me></test>"
== Right (HtmlDocument UTF8 Nothing [Element "test" [("attr", "you&me")] []])
Expand Down
23 changes: 12 additions & 11 deletions test/xmlhtml-testsuite.cabal
Expand Up @@ -10,19 +10,20 @@ Executable testsuite
build-depends:
HUnit == 1.2.*,
QuickCheck >= 2.3.0.2,
attoparsec >= 0.10 && < 0.11,
attoparsec >= 0.10 && < 0.11,
base == 4.*,
blaze-builder >= 0.2 && < 0.4,
blaze-html >= 0.3.2 && < 0.5,
blaze-builder >= 0.2 && <0.4,
blaze-html >= 0.5 && <0.6,
blaze-markup >= 0.5 && <0.6,
bytestring == 0.9.*,
containers >= 0.3 && < 0.5,
directory >= 1.0 && < 1.3,
parsec >= 3.0 && < 3.2,
test-framework >= 0.3.1 && < 0.4,
test-framework-hunit >= 0.2.5 && < 0.3,
test-framework-quickcheck2 >= 0.2.6 && < 0.3,
text >= 0.11 && < 0.12,
unordered-containers >= 0.1.4 && < 0.2
directory >= 1.0 && < 1.3,
containers >= 0.3 && <0.5,
parsec >= 3.1.2 && <3.2,
test-framework >= 0.6 && <0.7,
test-framework-hunit >= 0.2.7 && <0.3,
test-framework-quickcheck2 >= 0.2.12.1 && <0.3,
text >= 0.11 && <0.12,
unordered-containers >= 0.1.4 && <0.3

ghc-options: -O2 -Wall -fhpc -fwarn-tabs -funbox-strict-fields -threaded
-fno-warn-unused-do-bind
Expand Down
12 changes: 7 additions & 5 deletions xmlhtml.cabal
@@ -1,8 +1,8 @@
Name: xmlhtml
Version: 0.2.0.0
Version: 0.2.0.1
Synopsis: XML parser and renderer with HTML 5 quirks mode
Description: Contains renderers and parsers for both XML and HTML 5
document fragments, which share data structures wo that
document fragments, which share data structures so that
it's easy to work with both. Document fragments are bits
of documents, which are not constrained by some of the
high-level structure rules (in particular, they may
Expand Down Expand Up @@ -727,6 +727,7 @@ Extra-source-files:
test/resources/oasis/p46pass1.xml,
test/resources/oasis/p02fail3.xml,
test/resources/oasis/p44fail3.xml,
test/resources/oasis/p44fail3.xml.html.correct,
test/resources/oasis/p14pass1.xml,
test/resources/oasis/p03fail24.xml,
test/resources/oasis/p10pass1.xml,
Expand Down Expand Up @@ -821,12 +822,13 @@ Library
Build-depends: base >= 4 && < 5,
attoparsec >= 0.10 && < 0.11,
blaze-builder >= 0.2 && < 0.4,
blaze-html >= 0.3.2 && < 0.5,
blaze-html >= 0.5 && < 0.6,
blaze-markup >= 0.5 && < 0.6,
bytestring >= 0.9 && < 0.10,
containers >= 0.3 && < 0.5,
parsec >= 3.0 && < 3.2,
parsec >= 3.1.2 && < 3.2,
text >= 0.11 && < 0.12,
unordered-containers >= 0.1.4 && < 0.2
unordered-containers >= 0.1.4 && < 0.3

extensions:
OverloadedStrings,
Expand Down

0 comments on commit 17bbd9c

Please sign in to comment.