Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: ee349f001a
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 198 lines (168 sloc) 6.701 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
{-# LANGUAGE Rank2Types #-}

------------------------------------------------------------------------
-- |
-- Module : Hyena.Parser
-- Copyright : (c) Johan Tibell 2008
-- License : BSD3-style (see LICENSE)
--
-- Maintainer : johan.tibell@gmail.com
-- Stability : experimental
-- Portability : portable
--
-- A resumable LL(1) parser combinator library for 'ByteString's.
--
------------------------------------------------------------------------

module Hyena.Parser
    (
      -- * The Parser type
      Parser,
      Result(..),
      runParser,

      -- * Primitive parsers
      satisfies,
      byte,
      bytes,

      module Control.Applicative
    ) where

import Control.Applicative
import qualified Data.ByteString as S
import qualified Data.ByteString.Internal as S
import Data.Int (Int64)
import Data.Word (Word8)
import Foreign.ForeignPtr (withForeignPtr)
import Foreign.Ptr (Ptr, plusPtr)
import Foreign.Storable (peekByteOff)
import Prelude hiding (fail, rem, succ)
import Text.Show.Functions ()

-- ---------------------------------------------------------------------
-- The Parser type

-- | The parse state.
data S r = S
    {-# UNPACK #-} !S.ByteString
    {-# UNPACK #-} !Int64
    {-# UNPACK #-} !Bool
    {-# UNPACK #-} !(S r -> Result r)
         deriving Show

-- | Set the failure continuation.
setFail :: S r -> (S r -> Result r) -> S r
setFail (S bs pos eof _) = S bs pos eof
{-# INLINE setFail #-}

-- | A parse either succeeds, fails or returns a suspension with which
-- the parsing can be resumed.
data Result a = Finished a S.ByteString
              -- ^ Parsing succeeded and produced a value of type
              -- @a@. The returned 'S.ByteString' is the remaining
              -- unconsumed input.
              | Failed Int64
              -- ^ Parsing failed at the given position. Either
              -- because the parser didn't match the input or because
              -- an unexpected end of input was reached during
              -- parsing.
              | Partial (Maybe S.ByteString -> Result a)
              -- ^ The parsing needs more input to continue. Pass in
              -- @Just input@ to continue parsing and @Nothing@ to
              -- signal end of input. If @Nothing@ is passed the
              -- 'Result' is either 'Finished' or 'Failed'.
                deriving Show

-- | A parser takes a parse state, a success continuation and returns
-- a 'Result'.
newtype Parser a = Parser
    { unParser :: forall r. S r -> (a -> S r -> Result r) -> Result r }

-- ---------------------------------------------------------------------
-- Instances

instance Functor Parser where
    fmap f p = Parser $ \s succ -> unParser p s (succ . f)
    {-# INLINE fmap #-}

instance Applicative Parser where
    pure a = Parser $ \s succ -> succ a s
    {-# INLINE pure #-}

    p <*> p' = Parser $ \s succ ->
               let succ' f s' = unParser p' s' (succ . f)
               in unParser p s succ'
    {-# INLINE (<*>) #-}

instance Alternative Parser where
    empty = Parser $ \s@(S _ _ _ fail) _ -> fail s
    {-# INLINE empty #-}

    p <|> p' = Parser $ \s@(S _ _ _ fail) succ ->
               let fail' s' = unParser p' (setFail s' fail) succ
               in unParser p (setFail s fail') succ
    {-# INLINE (<|>) #-}

-- ---------------------------------------------------------------------
-- Running a parser

initState :: S.ByteString -> S r
initState bs = S bs 0 False failed
{-# INLINE initState #-}

-- | This is the final continuation that turns a successful parse into
-- a 'Result'.
finished :: a -> S r -> Result a
finished v (S bs _ _ _) = Finished v bs

-- | This is the final continuation that turns an unsuccessful parse
-- into a 'Result'.
failed :: S r -> Result a
failed (S _ pos _ _) = Failed pos

-- | TODO: Write documentation.
runParser :: Parser a -> S.ByteString -> Result a
runParser p bs = unParser p (initState bs) finished

-- ---------------------------------------------------------------------
-- Primitive parsers

-- | The parser @satisfies p@ succeeds for any byte for which the
-- supplied function @p@ returns 'True'. Returns the byte that is
-- actually parsed.
satisfies :: (Word8 -> Bool) -> Parser Word8
satisfies p =
    Parser $ \s@(S bs pos eof fail) succ ->
        case S.uncons bs of
          Just (b, bs') -> if p b
                           then succ b (S bs' (pos + 1) eof failed)
                           else fail s
          Nothing -> if eof
                           then fail s
                           else Partial $ \x ->
                               case x of
                                 Just bs' -> retry (S bs' pos eof fail)
                                 Nothing -> fail (S bs pos True fail)
            where retry s' = unParser (satisfies p) s' succ

-- | @byte b@ parses a single byte @b@. Returns the parsed byte
-- (i.e. @b@).
byte :: Word8 -> Parser Word8
byte b = satisfies (== b)

-- TODO: Check when we can let go of the failure continuation.

-- | @bytes bs@ parses a sequence of bytes @bs@. Returns the parsed
-- bytes (i.e. @bs@).
bytes :: S.ByteString -> Parser S.ByteString
bytes bs =
    Parser $ \(S bs' pos eof fail) succ ->
        let go rem inp
                | len == remLen =
                    succ bs (S (S.drop len inp) newPos eof failed)
                | len < remLen && inpLen >= remLen =
                    fail (S (S.drop len inp) newPos eof fail)
                | otherwise =
                    Partial $ \x ->
                        case x of
                          Just bs'' -> go (S.drop len rem) bs''
                          Nothing -> fail (S S.empty newPos True fail)
                where
                  len = commonPrefixLen rem inp
                  remLen = S.length rem
                  newPos = pos + fromIntegral len
                  inpLen = S.length inp
        in go bs bs'

-- ---------------------------------------------------------------------
-- Internal utilities

-- | /O(n)/ @commonPrefixLen xs ys@ returns the length of the longest
-- common prefix of @xs@ and @ys@.
commonPrefixLen :: S.ByteString -> S.ByteString -> Int
commonPrefixLen (S.PS fp1 off1 len1) (S.PS fp2 off2 len2) =
    S.inlinePerformIO $
     withForeignPtr fp1 $ \p1 ->
         withForeignPtr fp2 $ \p2 ->
             lcp (p1 `plusPtr` off1) (p2 `plusPtr` off2) 0 len1 len2

lcp :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> Int-> IO Int
lcp p1 p2 n len1 len2
    | n == len1 = return len1
    | n == len2 = return len2
    | otherwise = do
        a <- peekByteOff p1 n :: IO Word8
        b <- peekByteOff p2 n
        if a == b then lcp p1 p2 (n + 1) len1 len2 else return n
Something went wrong with that request. Please try again.