Skip to content
Browse files

Optimized the bytes parser.

  • Loading branch information...
1 parent d3b1450 commit e9d29a7814ce5f7e9a4b595f8e76acb31e34c88f @tibbe committed Jul 19, 2008
Showing with 59 additions and 21 deletions.
  1. +7 −7 Hyena/Http.hs
  2. +50 −13 Hyena/Parser.hs
  3. +2 −1 hyena.cabal
View
14 Hyena/Http.hs
@@ -36,6 +36,7 @@ module Hyena.Http
import Control.Monad (forM_)
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as C (map, pack, unpack)
+import Data.ByteString.Parser
import Data.Char (chr, digitToInt, isAlpha, isDigit, isSpace, ord, toLower)
import Data.Either (either)
import qualified Data.Map as M
@@ -46,7 +47,6 @@ import Network.Socket.ByteString (send)
import Network.Wai (Enumerator, Headers, Method(..))
import Hyena.BufferedSocket
-import Hyena.Parser
-- ---------------------------------------------------------------------
-- Request and response data types
@@ -173,9 +173,9 @@ c2w = fromIntegral . ord
-- | Parsers for different tokens in an HTTP request.
sp, digit, letter, nonSpace, notEOL :: Parser Word8
sp = byte $ c2w ' '
-digit = satisfy (isDigit . chr . fromIntegral)
-letter = satisfy (isAlpha . chr . fromIntegral)
-nonSpace = satisfy (not . isSpace . chr . fromIntegral)
+digit = satisfies (isDigit . chr . fromIntegral)
+letter = satisfies (isAlpha . chr . fromIntegral)
+nonSpace = satisfies (not . isSpace . chr . fromIntegral)
notEOL = noneOf $ map c2w "\r\n"
-- | Parser for request \"\r\n\" sequence.
@@ -185,8 +185,8 @@ crlf = bytes $ C.pack "\r\n"
-- | Parser that recognize if the current byte is an element of the
-- given sequence of bytes.
oneOf, noneOf :: [Word8] -> Parser Word8
-oneOf bs = satisfy (`elem` bs)
-noneOf bs = satisfy (`notElem` bs)
+oneOf bs = satisfies (`elem` bs)
+noneOf bs = satisfies (`notElem` bs)
-- | Parser for zero or more spaces.
spaces :: Parser [Word8]
@@ -259,7 +259,7 @@ fieldChars = fmap S.pack $ many fieldChar
-- | Parser for one header field byte.
fieldChar :: Parser Word8
-fieldChar = satisfy isFieldChar
+fieldChar = satisfies isFieldChar
where
isFieldChar b = (isDigit $ chr $ fromIntegral b) ||
(isAlpha $ chr $ fromIntegral b) ||
View
63 Hyena/Parser.hs
@@ -10,7 +10,7 @@
-- Stability : experimental
-- Portability : portable
--
--- An incremental LL(1) parser combinator library.
+-- A resumable LL(1) parser combinator library for 'ByteString's.
--
------------------------------------------------------------------------
@@ -22,7 +22,7 @@ module Hyena.Parser
runParser,
-- * Primitive parsers
- satisfy,
+ satisfies,
byte,
bytes,
@@ -31,9 +31,13 @@ module Hyena.Parser
import Control.Applicative
import qualified Data.ByteString as S
+import qualified Data.ByteString.Internal as S
import Data.Int (Int64)
import Data.Word (Word8)
-import Prelude hiding (fail, succ)
+import Foreign.ForeignPtr (withForeignPtr)
+import Foreign.Ptr (Ptr, plusPtr)
+import Foreign.Storable (peekByteOff)
+import Prelude hiding (fail, rem, succ)
-- ---------------------------------------------------------------------
-- The Parser type
@@ -128,11 +132,11 @@ runParser p bs = toResult $ unParser p (initState bs) finished failed
-- ---------------------------------------------------------------------
-- Primitive parsers
--- | The parser @satisfy p@ succeeds for any byte for which the
+-- | The parser @satisfies p@ succeeds for any byte for which the
-- supplied function @p@ returns 'True'. Returns the byte that is
-- actually parsed.
-satisfy :: (Word8 -> Bool) -> Parser Word8
-satisfy p =
+satisfies :: (Word8 -> Bool) -> Parser Word8
+satisfies p =
Parser $ \s@(S bs pos eof) succ fail ->
case S.uncons bs of
Just (b, bs') -> if p b
@@ -144,19 +148,52 @@ satisfy p =
case x of
Just bs' -> retry (S bs' pos eof)
Nothing -> fail (S bs pos True)
- where retry s' = unParser (satisfy p) s' succ fail
+ where retry s' = unParser (satisfies p) s' succ fail
-- | @byte b@ parses a single byte @b@. Returns the parsed byte
-- (i.e. @b@).
byte :: Word8 -> Parser Word8
-byte b = satisfy (== b)
+byte b = satisfies (== b)
-- | @bytes bs@ parses a sequence of bytes @bs@. Returns the parsed
-- bytes (i.e. @bs@).
bytes :: S.ByteString -> Parser S.ByteString
-bytes = fmap S.pack . go
- where
- go bs = case S.uncons bs of
- Just (b, bs') -> liftA2 (:) (byte b) (go bs')
- Nothing -> pure []
+bytes bs =
+ Parser $ \(S bs' pos eof) succ fail ->
+ let go rem inp
+ | len == remLen =
+ succ bs (S (S.drop len inp) newPos eof)
+ | len < remLen && inpLen >= remLen =
+ fail (S (S.drop len inp) newPos eof)
+ | otherwise =
+ IPartial $ \x ->
+ case x of
+ Just bs'' -> go (S.drop len rem) bs''
+ Nothing -> fail (S (S.empty) newPos True)
+ where
+ len = commonPrefixLen rem inp
+ remLen = S.length rem
+ newPos = pos + fromIntegral len
+ inpLen = S.length inp
+ in go bs bs'
+-- ---------------------------------------------------------------------
+-- Internal utilities
+
+-- | /O(n)/ @commonPrefixLen xs ys@ returns the length of the longest
+-- common prefix of @xs@ and @ys@.
+commonPrefixLen :: S.ByteString -> S.ByteString -> Int
+commonPrefixLen (S.PS fp1 off1 len1) (S.PS fp2 off2 len2) =
+ S.inlinePerformIO $
+ withForeignPtr fp1 $ \p1 ->
+ withForeignPtr fp2 $ \p2 ->
+ lcp (p1 `plusPtr` off1) (p2 `plusPtr` off2) 0 len1 len2
+
+lcp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Int
+lcp p1 p2 n len1 len2
+ | n == len1 = return len1
+ | n == len2 = return len2
+ | otherwise = do
+ a <- peekByteOff p1 n :: IO Word8
+ b <- peekByteOff p2 n
+ if a == b then lcp p1 p2 (n + 1) len1 len2 else return n
View
3 hyena.cabal
@@ -25,7 +25,8 @@ library
filepath
else
build-depends: base >= 2.1 && < 3
- build-depends: network >= 2.1 && < 2.3,
+ build-depends: bsparser >= 0.1 && < 0.2,
+ network >= 2.1 && < 2.3,
mtl >= 1 && < 1.2,
network-bytestring >= 0.1.1.2 && < 0.2,
unix

0 comments on commit e9d29a7

Please sign in to comment.
Something went wrong with that request. Please try again.