Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Copypasta FastSet from the attoparsec source (bos just made it private)

  • Loading branch information...
commit d1fddca663f4308de63bc19a776f544793d886e2 1 parent a91ed9b
@gregorycollins gregorycollins authored
View
1  .gitignore
@@ -1,4 +1,5 @@
*~
+cabal-dev
dist/
*.tix
.hpc
View
1  CONTRIBUTORS
@@ -8,3 +8,4 @@ Jacob Stanley <jystic@jystic.com>
Jonas Kramer <jkramer@nex.scrapping.cc>
Jurriën Stutterheim <j.stutterheim@me.com>
Jasper Van der Jeugt <m@jaspervdj.be>
+Bryan O'Sullivan <bos@serpentine.com>
View
5 snap-core.cabal
@@ -128,6 +128,7 @@ Library
other-modules:
Snap.Internal.Instances,
Snap.Internal.Iteratee.BoyerMooreHorspool,
+ Snap.Internal.Parsing.FastSet,
Snap.Internal.Routing,
Snap.Internal.Types,
Snap.Internal.Test.RequestBuilder,
@@ -135,7 +136,7 @@ Library
build-depends:
- attoparsec >= 0.8.0.2 && < 0.10,
+ attoparsec >= 0.8.0.2 && < 0.11,
attoparsec-enumerator >= 0.2.0.3,
base >= 4 && < 5,
base16-bytestring <= 0.2,
@@ -143,7 +144,7 @@ Library
blaze-builder-enumerator >= 0.2 && <0.3,
bytestring,
bytestring-nums,
- case-insensitive >= 0.3 && < 0.4,
+ case-insensitive >= 0.3 && < 0.5,
containers,
deepseq >= 1.1 && <1.3,
directory,
View
4 src/Snap/Internal/Parsing.hs
@@ -9,8 +9,6 @@ import Control.Arrow (first, second)
import Control.Monad
import Data.Attoparsec.Char8 hiding (Done, many)
import qualified Data.Attoparsec.Char8 as Atto
-import Data.Attoparsec.FastSet (FastSet)
-import qualified Data.Attoparsec.FastSet as FS
import Data.Bits
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as S
@@ -34,6 +32,8 @@ import Prelude hiding (head, take, takeWhile)
------------------------------------------------------------------------------
import Snap.Internal.Http.Types
+import Snap.Internal.Parsing.FastSet (FastSet)
+import qualified Snap.Internal.Parsing.FastSet as FS
------------------------------------------------------------------------------
View
118 src/Snap/Internal/Parsing/FastSet.hs
@@ -0,0 +1,118 @@
+{-# LANGUAGE BangPatterns, MagicHash #-}
+
+-----------------------------------------------------------------------------
+-- |
+-- Module : Snap.Internal.Parsing.FastSet
+-- Copyright : Bryan O'Sullivan 2008
+-- License : BSD3
+--
+-- Maintainer : bos@serpentine.com
+-- Stability : experimental
+-- Portability : unknown
+--
+-- Fast set membership tests for 'Word8' and 8-bit 'Char' values. The
+-- set representation is unboxed for efficiency. For small sets, we
+-- test for membership using a binary search. For larger sets, we use
+-- a lookup table.
+--
+-- Note: this module copied here from the attoparsec source because it was made
+-- private in version 0.10.
+--
+-----------------------------------------------------------------------------
+module Snap.Internal.Parsing.FastSet
+ (
+ -- * Data type
+ FastSet
+ -- * Construction
+ , fromList
+ , set
+ -- * Lookup
+ , memberChar
+ , memberWord8
+ -- * Debugging
+ , fromSet
+ -- * Handy interface
+ , charClass
+ ) where
+
+import Data.Bits ((.&.), (.|.))
+import Foreign.Storable (peekByteOff, pokeByteOff)
+import GHC.Base (Int(I#), iShiftRA#, narrow8Word#, shiftL#)
+import GHC.Word (Word8(W8#))
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as B8
+import qualified Data.ByteString.Internal as I
+import qualified Data.ByteString.Unsafe as U
+
+data FastSet = Sorted { fromSet :: !B.ByteString }
+ | Table { fromSet :: !B.ByteString }
+ deriving (Eq, Ord)
+
+instance Show FastSet where
+ show (Sorted s) = "FastSet Sorted " ++ show (B8.unpack s)
+ show (Table _) = "FastSet Table"
+
+-- | The lower bound on the size of a lookup table. We choose this to
+-- balance table density against performance.
+tableCutoff :: Int
+tableCutoff = 8
+
+-- | Create a set.
+set :: B.ByteString -> FastSet
+set s | B.length s < tableCutoff = Sorted . B.sort $ s
+ | otherwise = Table . mkTable $ s
+
+fromList :: [Word8] -> FastSet
+fromList = set . B.pack
+
+data I = I {-# UNPACK #-} !Int {-# UNPACK #-} !Word8
+
+shiftR :: Int -> Int -> Int
+shiftR (I# x#) (I# i#) = I# (x# `iShiftRA#` i#)
+
+shiftL :: Word8 -> Int -> Word8
+shiftL (W8# x#) (I# i#) = W8# (narrow8Word# (x# `shiftL#` i#))
+
+index :: Int -> I
+index i = I (i `shiftR` 3) (1 `shiftL` (i .&. 7))
+{-# INLINE index #-}
+
+-- | Check the set for membership.
+memberWord8 :: Word8 -> FastSet -> Bool
+memberWord8 w (Table t) =
+ let I byte bit = index (fromIntegral w)
+ in U.unsafeIndex t byte .&. bit /= 0
+memberWord8 w (Sorted s) = search 0 (B.length s - 1)
+ where search lo hi
+ | hi < lo = False
+ | otherwise =
+ let mid = (lo + hi) `div` 2
+ in case compare w (U.unsafeIndex s mid) of
+ GT -> search (mid + 1) hi
+ LT -> search lo (mid - 1)
+ _ -> True
+
+-- | Check the set for membership. Only works with 8-bit characters:
+-- characters above code point 255 will give wrong answers.
+memberChar :: Char -> FastSet -> Bool
+memberChar c = memberWord8 (I.c2w c)
+{-# INLINE memberChar #-}
+
+mkTable :: B.ByteString -> B.ByteString
+mkTable s = I.unsafeCreate 32 $ \t -> do
+ _ <- I.memset t 0 32
+ U.unsafeUseAsCStringLen s $ \(p, l) ->
+ let loop n | n == l = return ()
+ | otherwise = do
+ c <- peekByteOff p n :: IO Word8
+ let I byte bit = index (fromIntegral c)
+ prev <- peekByteOff t byte :: IO Word8
+ pokeByteOff t byte (prev .|. bit)
+ loop (n + 1)
+ in loop 0
+
+charClass :: String -> FastSet
+charClass = set . B8.pack . go
+ where go (a:'-':b:xs) = [a..b] ++ go xs
+ go (x:xs) = x : go xs
+ go _ = ""
View
4 test/snap-core-testsuite.cabal
@@ -23,7 +23,7 @@ Executable testsuite
build-depends:
QuickCheck >= 2.3.0.2,
- attoparsec >= 0.8.1 && < 0.10,
+ attoparsec >= 0.8.1 && < 0.11,
attoparsec-enumerator >= 0.2.0.3,
base >= 4 && < 5,
base16-bytestring == 0.1.*,
@@ -31,7 +31,7 @@ Executable testsuite
blaze-builder-enumerator >= 0.2 && <0.3,
bytestring,
bytestring-nums,
- case-insensitive >= 0.3 && < 0.4,
+ case-insensitive >= 0.3 && < 0.5,
cereal == 0.3.*,
containers,
deepseq >= 1.1 && <1.3,
Please sign in to comment.
Something went wrong with that request. Please try again.