Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

initial commit

  • Loading branch information...
commit 5c4dc33d0fae43b882b2a15cb5647310b8c04494 0 parents
Simon Meier authored

Showing 78 changed files with 101,284 additions and 0 deletions. Show diff stats Hide diff stats

  1. +5 0 .darcs-boring
  2. +3 0  .gitignore
  3. +2,025 0 Data/ByteString.hs
  4. +1,019 0 Data/ByteString/Char8.hs
  5. +593 0 Data/ByteString/Internal.hs
  6. +1,345 0 Data/ByteString/Lazy.hs
  7. +451 0 Data/ByteString/Lazy/Builder.hs
  8. +361 0 Data/ByteString/Lazy/Builder/ASCII.hs
  9. +804 0 Data/ByteString/Lazy/Builder/BasicEncoding.hs
  10. +287 0 Data/ByteString/Lazy/Builder/BasicEncoding/ASCII.hs
  11. +336 0 Data/ByteString/Lazy/Builder/BasicEncoding/Binary.hs
  12. +909 0 Data/ByteString/Lazy/Builder/BasicEncoding/Extras.hs
  13. +353 0 Data/ByteString/Lazy/Builder/BasicEncoding/Internal.hs
  14. +116 0 Data/ByteString/Lazy/Builder/BasicEncoding/Internal/Base16.hs
  15. +55 0 Data/ByteString/Lazy/Builder/BasicEncoding/Internal/Floating.hs
  16. +106 0 Data/ByteString/Lazy/Builder/BasicEncoding/Internal/UncheckedShifts.hs
  17. +125 0 Data/ByteString/Lazy/Builder/Extras.hs
  18. +1,005 0 Data/ByteString/Lazy/Builder/Internal.hs
  19. +854 0 Data/ByteString/Lazy/Builder/Internal.hs-darcs-backup0
  20. +878 0 Data/ByteString/Lazy/Char8.hs
  21. +253 0 Data/ByteString/Lazy/Internal.hs
  22. +308 0 Data/ByteString/Unsafe.hs
  23. +30 0 LICENSE
  24. +205 0 README
  25. +2 0  Setup.hs
  26. +71 0 TODO
  27. +263 0 bench/BenchAll.hs
  28. +118 0 bench/BoundsCheckFusion.hs
  29. +614 0 bench/CSV.hs
  30. +30 0 bench/LICENSE
  31. +72 0 bench/bench-bytestring-builder.cabal
  32. +264 0 bytestring.cabal
  33. +82 0 cbits/fpstring.c
  34. +215 0 cbits/itoa.c
  35. +6 0 include/fpstring.h
  36. +1 0  prologue.txt
  37. +334 0 tests/Bench.hs
  38. +145 0 tests/BenchUtils.hs
  39. +70 0 tests/FusionBench.hs
  40. +312 0 tests/FusionProperties.hs
  41. +88 0 tests/Hash.hs
  42. +213 0 tests/Makefile
  43. +2,467 0 tests/Properties.hs
  44. +203 0 tests/QuickCheckUtils.hs
  45. +32 0 tests/Rules.hs
  46. +67 0 tests/TestFramework.hs
  47. +27 0 tests/Units.hs
  48. +38,617 0 tests/Usr.Dict.Words
  49. +70 0 tests/Words.hs
  50. +339 0 tests/builder/Data/ByteString/Lazy/Builder/BasicEncoding/TestUtils.hs
  51. +337 0 tests/builder/Data/ByteString/Lazy/Builder/BasicEncoding/Tests.hs
  52. +641 0 tests/builder/Data/ByteString/Lazy/Builder/Tests.hs
  53. +21 0 tests/builder/TestSuite.hs
  54. +3,925 0 tests/data
  55. +23 0 tests/down-fuse.hs
  56. +11 0 tests/edit.hs
  57. +28 0 tests/fuse.hs
  58. +24 0 tests/groupby.hs
  59. +24 0 tests/iavor.hs
  60. +21 0 tests/inline.hs
  61. +65 0 tests/lazy-hclose.hs
  62. +24 0 tests/lazybuild.hs
  63. +24 0 tests/lazybuildcons.hs
  64. +9 0 tests/lazyio.hs
  65. +9 0 tests/lazylines.hs
  66. +17 0 tests/lazyread.hs
  67. +20 0 tests/letter_frequency.hs
  68. +16 0 tests/linesort.hs
  69. +15 0 tests/macros.m4
  70. +3 0  tests/pack.hs
  71. +33 0 tests/revcomp.hs
  72. +38,618 0 tests/spellcheck-input.txt
  73. +69 0 tests/spellcheck.hs
  74. +30 0 tests/sum.hs
  75. +72 0 tests/test-compare.hs
  76. +36 0 tests/unpack.hs
  77. +14 0 tests/wc.hs
  78. +7 0 tests/zipwith.hs
5 .darcs-boring
... ... @@ -0,0 +1,5 @@
  1 +^dist(/|$)
  2 +^setup(/|$)
  3 +^GNUmakefile$
  4 +^Makefile.local$
  5 +^.depend(.bak)?$
3  .gitignore
... ... @@ -0,0 +1,3 @@
  1 +dist
  2 +cabal-dev
  3 +_darcs/
2,025 Data/ByteString.hs
... ... @@ -0,0 +1,2025 @@
  1 +{-# LANGUAGE CPP #-}
  2 +#if __GLASGOW_HASKELL__
  3 +{-# LANGUAGE MagicHash, UnboxedTuples,
  4 + NamedFieldPuns, BangPatterns, RecordWildCards #-}
  5 +#endif
  6 +{-# OPTIONS_HADDOCK prune #-}
  7 +#if __GLASGOW_HASKELL__ >= 701
  8 +{-# LANGUAGE Trustworthy #-}
  9 +#endif
  10 +
  11 +-- |
  12 +-- Module : Data.ByteString
  13 +-- Copyright : (c) The University of Glasgow 2001,
  14 +-- (c) David Roundy 2003-2005,
  15 +-- (c) Simon Marlow 2005,
  16 +-- (c) Bjorn Bringert 2006,
  17 +-- (c) Don Stewart 2005-2008,
  18 +-- (c) Duncan Coutts 2006-2011
  19 +-- License : BSD-style
  20 +--
  21 +-- Maintainer : dons00@gmail.com, duncan@community.haskell.org
  22 +-- Stability : stable
  23 +-- Portability : portable
  24 +--
  25 +-- A time and space-efficient implementation of byte vectors using
  26 +-- packed Word8 arrays, suitable for high performance use, both in terms
  27 +-- of large data quantities, or high speed requirements. Byte vectors
  28 +-- are encoded as strict 'Word8' arrays of bytes, held in a 'ForeignPtr',
  29 +-- and can be passed between C and Haskell with little effort.
  30 +--
  31 +-- This module is intended to be imported @qualified@, to avoid name
  32 +-- clashes with "Prelude" functions. eg.
  33 +--
  34 +-- > import qualified Data.ByteString as B
  35 +--
  36 +-- Original GHC implementation by Bryan O\'Sullivan.
  37 +-- Rewritten to use 'Data.Array.Unboxed.UArray' by Simon Marlow.
  38 +-- Rewritten to support slices and use 'ForeignPtr' by David Roundy.
  39 +-- Rewritten again and extended by Don Stewart and Duncan Coutts.
  40 +--
  41 +
  42 +module Data.ByteString (
  43 +
  44 + -- * The @ByteString@ type
  45 + ByteString, -- abstract, instances: Eq, Ord, Show, Read, Data, Typeable, Monoid
  46 +
  47 + -- * Introducing and eliminating 'ByteString's
  48 + empty, -- :: ByteString
  49 + singleton, -- :: Word8 -> ByteString
  50 + pack, -- :: [Word8] -> ByteString
  51 + unpack, -- :: ByteString -> [Word8]
  52 +
  53 + -- * Basic interface
  54 + cons, -- :: Word8 -> ByteString -> ByteString
  55 + snoc, -- :: ByteString -> Word8 -> ByteString
  56 + append, -- :: ByteString -> ByteString -> ByteString
  57 + head, -- :: ByteString -> Word8
  58 + uncons, -- :: ByteString -> Maybe (Word8, ByteString)
  59 + last, -- :: ByteString -> Word8
  60 + tail, -- :: ByteString -> ByteString
  61 + init, -- :: ByteString -> ByteString
  62 + null, -- :: ByteString -> Bool
  63 + length, -- :: ByteString -> Int
  64 +
  65 + -- * Transforming ByteStrings
  66 + map, -- :: (Word8 -> Word8) -> ByteString -> ByteString
  67 + reverse, -- :: ByteString -> ByteString
  68 + intersperse, -- :: Word8 -> ByteString -> ByteString
  69 + intercalate, -- :: ByteString -> [ByteString] -> ByteString
  70 + transpose, -- :: [ByteString] -> [ByteString]
  71 +
  72 + -- * Reducing 'ByteString's (folds)
  73 + foldl, -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  74 + foldl', -- :: (a -> Word8 -> a) -> a -> ByteString -> a
  75 + foldl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  76 + foldl1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  77 +
  78 + foldr, -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  79 + foldr', -- :: (Word8 -> a -> a) -> a -> ByteString -> a
  80 + foldr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  81 + foldr1', -- :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  82 +
  83 + -- ** Special folds
  84 + concat, -- :: [ByteString] -> ByteString
  85 + concatMap, -- :: (Word8 -> ByteString) -> ByteString -> ByteString
  86 + any, -- :: (Word8 -> Bool) -> ByteString -> Bool
  87 + all, -- :: (Word8 -> Bool) -> ByteString -> Bool
  88 + maximum, -- :: ByteString -> Word8
  89 + minimum, -- :: ByteString -> Word8
  90 +
  91 + -- * Building ByteStrings
  92 + -- ** Scans
  93 + scanl, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  94 + scanl1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  95 + scanr, -- :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  96 + scanr1, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  97 +
  98 + -- ** Accumulating maps
  99 + mapAccumL, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  100 + mapAccumR, -- :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  101 +
  102 + -- ** Generating and unfolding ByteStrings
  103 + replicate, -- :: Int -> Word8 -> ByteString
  104 + unfoldr, -- :: (a -> Maybe (Word8, a)) -> a -> ByteString
  105 + unfoldrN, -- :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
  106 +
  107 + -- * Substrings
  108 +
  109 + -- ** Breaking strings
  110 + take, -- :: Int -> ByteString -> ByteString
  111 + drop, -- :: Int -> ByteString -> ByteString
  112 + splitAt, -- :: Int -> ByteString -> (ByteString, ByteString)
  113 + takeWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
  114 + dropWhile, -- :: (Word8 -> Bool) -> ByteString -> ByteString
  115 + span, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  116 + spanEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  117 + break, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  118 + breakEnd, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  119 + group, -- :: ByteString -> [ByteString]
  120 + groupBy, -- :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  121 + inits, -- :: ByteString -> [ByteString]
  122 + tails, -- :: ByteString -> [ByteString]
  123 +
  124 + -- ** Breaking into many substrings
  125 + split, -- :: Word8 -> ByteString -> [ByteString]
  126 + splitWith, -- :: (Word8 -> Bool) -> ByteString -> [ByteString]
  127 +
  128 + -- * Predicates
  129 + isPrefixOf, -- :: ByteString -> ByteString -> Bool
  130 + isSuffixOf, -- :: ByteString -> ByteString -> Bool
  131 + isInfixOf, -- :: ByteString -> ByteString -> Bool
  132 +
  133 + -- ** Search for arbitrary substrings
  134 + breakSubstring, -- :: ByteString -> ByteString -> (ByteString,ByteString)
  135 + findSubstring, -- :: ByteString -> ByteString -> Maybe Int
  136 + findSubstrings, -- :: ByteString -> ByteString -> [Int]
  137 +
  138 + -- * Searching ByteStrings
  139 +
  140 + -- ** Searching by equality
  141 + elem, -- :: Word8 -> ByteString -> Bool
  142 + notElem, -- :: Word8 -> ByteString -> Bool
  143 +
  144 + -- ** Searching with a predicate
  145 + find, -- :: (Word8 -> Bool) -> ByteString -> Maybe Word8
  146 + filter, -- :: (Word8 -> Bool) -> ByteString -> ByteString
  147 + partition, -- :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  148 +
  149 + -- * Indexing ByteStrings
  150 + index, -- :: ByteString -> Int -> Word8
  151 + elemIndex, -- :: Word8 -> ByteString -> Maybe Int
  152 + elemIndices, -- :: Word8 -> ByteString -> [Int]
  153 + elemIndexEnd, -- :: Word8 -> ByteString -> Maybe Int
  154 + findIndex, -- :: (Word8 -> Bool) -> ByteString -> Maybe Int
  155 + findIndices, -- :: (Word8 -> Bool) -> ByteString -> [Int]
  156 + count, -- :: Word8 -> ByteString -> Int
  157 +
  158 + -- * Zipping and unzipping ByteStrings
  159 + zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
  160 + zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
  161 + unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)
  162 +
  163 + -- * Ordered ByteStrings
  164 + sort, -- :: ByteString -> ByteString
  165 +
  166 + -- * Low level conversions
  167 + -- ** Copying ByteStrings
  168 + copy, -- :: ByteString -> ByteString
  169 +
  170 + -- ** Packing 'CString's and pointers
  171 + packCString, -- :: CString -> IO ByteString
  172 + packCStringLen, -- :: CStringLen -> IO ByteString
  173 +
  174 + -- ** Using ByteStrings as 'CString's
  175 + useAsCString, -- :: ByteString -> (CString -> IO a) -> IO a
  176 + useAsCStringLen, -- :: ByteString -> (CStringLen -> IO a) -> IO a
  177 +
  178 + -- * I\/O with 'ByteString's
  179 +
  180 + -- ** Standard input and output
  181 + getLine, -- :: IO ByteString
  182 + getContents, -- :: IO ByteString
  183 + putStr, -- :: ByteString -> IO ()
  184 + putStrLn, -- :: ByteString -> IO ()
  185 + interact, -- :: (ByteString -> ByteString) -> IO ()
  186 +
  187 + -- ** Files
  188 + readFile, -- :: FilePath -> IO ByteString
  189 + writeFile, -- :: FilePath -> ByteString -> IO ()
  190 + appendFile, -- :: FilePath -> ByteString -> IO ()
  191 +
  192 + -- ** I\/O with Handles
  193 + hGetLine, -- :: Handle -> IO ByteString
  194 + hGetContents, -- :: Handle -> IO ByteString
  195 + hGet, -- :: Handle -> Int -> IO ByteString
  196 + hGetSome, -- :: Handle -> Int -> IO ByteString
  197 + hGetNonBlocking, -- :: Handle -> Int -> IO ByteString
  198 + hPut, -- :: Handle -> ByteString -> IO ()
  199 + hPutNonBlocking, -- :: Handle -> ByteString -> IO ByteString
  200 + hPutStr, -- :: Handle -> ByteString -> IO ()
  201 + hPutStrLn, -- :: Handle -> ByteString -> IO ()
  202 +
  203 + breakByte
  204 +
  205 + ) where
  206 +
  207 +import qualified Prelude as P
  208 +import Prelude hiding (reverse,head,tail,last,init,null
  209 + ,length,map,lines,foldl,foldr,unlines
  210 + ,concat,any,take,drop,splitAt,takeWhile
  211 + ,dropWhile,span,break,elem,filter,maximum
  212 + ,minimum,all,concatMap,foldl1,foldr1
  213 + ,scanl,scanl1,scanr,scanr1
  214 + ,readFile,writeFile,appendFile,replicate
  215 + ,getContents,getLine,putStr,putStrLn,interact
  216 + ,zip,zipWith,unzip,notElem)
  217 +
  218 +import Data.ByteString.Internal
  219 +import Data.ByteString.Unsafe
  220 +
  221 +import qualified Data.List as List
  222 +
  223 +import Data.Word (Word8)
  224 +import Data.Maybe (isJust, listToMaybe)
  225 +
  226 +-- Control.Exception.assert not available in yhc or nhc
  227 +#ifndef __NHC__
  228 +import Control.Exception (finally, bracket, assert, throwIO)
  229 +#else
  230 +import Control.Exception (bracket, finally)
  231 +#endif
  232 +import Control.Monad (when)
  233 +
  234 +import Foreign.C.String (CString, CStringLen)
  235 +import Foreign.C.Types (CSize)
  236 +import Foreign.ForeignPtr
  237 +import Foreign.Marshal.Alloc (allocaBytes, mallocBytes, reallocBytes, finalizerFree)
  238 +import Foreign.Marshal.Array (allocaArray)
  239 +import Foreign.Ptr
  240 +import Foreign.Storable (Storable(..))
  241 +
  242 +-- hGetBuf and hPutBuf not available in yhc or nhc
  243 +import System.IO (stdin,stdout,hClose,hFileSize
  244 + ,hGetBuf,hPutBuf,openBinaryFile
  245 + ,IOMode(..))
  246 +import System.IO.Error (mkIOError, illegalOperationErrorType)
  247 +
  248 +import Data.Monoid (Monoid(..))
  249 +
  250 +#if !defined(__GLASGOW_HASKELL__)
  251 +import System.IO.Unsafe
  252 +import qualified System.Environment
  253 +import qualified System.IO (hGetLine)
  254 +import System.IO (hIsEOF)
  255 +#endif
  256 +
  257 +#if defined(__GLASGOW_HASKELL__)
  258 +
  259 +import System.IO (hGetBufNonBlocking, hPutBufNonBlocking)
  260 +
  261 +#if MIN_VERSION_base(4,3,0)
  262 +import System.IO (hGetBufSome)
  263 +#else
  264 +import System.IO (hWaitForInput, hIsEOF)
  265 +#endif
  266 +
  267 +#if __GLASGOW_HASKELL__ >= 611
  268 +import Data.IORef
  269 +import GHC.IO.Handle.Internals
  270 +import GHC.IO.Handle.Types
  271 +import GHC.IO.Buffer
  272 +import GHC.IO.BufferedIO as Buffered
  273 +import GHC.IO (unsafePerformIO)
  274 +import Data.Char (ord)
  275 +import Foreign.Marshal.Utils (copyBytes)
  276 +#else
  277 +import System.IO.Error (isEOFError)
  278 +import GHC.IOBase
  279 +import GHC.Handle
  280 +#endif
  281 +
  282 +import GHC.Prim (Word#)
  283 +import GHC.Base (build)
  284 +import GHC.Word hiding (Word8)
  285 +
  286 +#endif
  287 +
  288 +-- An alternative to Control.Exception (assert) for nhc98
  289 +#ifdef __NHC__
  290 +
  291 +import System.IO (Handle)
  292 +
  293 +#define assert assertS "__FILE__ : __LINE__"
  294 +assertS :: String -> Bool -> a -> a
  295 +assertS _ True = id
  296 +assertS s False = error ("assertion failed at "++s)
  297 +
  298 +-- An alternative to hWaitForInput
  299 +hWaitForInput :: Handle -> Int -> IO ()
  300 +hWaitForInput _ _ = return ()
  301 +#endif
  302 +
  303 +-- -----------------------------------------------------------------------------
  304 +--
  305 +-- Useful macros, until we have bang patterns
  306 +--
  307 +
  308 +#define STRICT1(f) f a | a `seq` False = undefined
  309 +#define STRICT2(f) f a b | a `seq` b `seq` False = undefined
  310 +#define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
  311 +#define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
  312 +#define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
  313 +
  314 +-- -----------------------------------------------------------------------------
  315 +-- Introducing and eliminating 'ByteString's
  316 +
  317 +-- | /O(1)/ The empty 'ByteString'
  318 +empty :: ByteString
  319 +empty = PS nullForeignPtr 0 0
  320 +
  321 +-- | /O(1)/ Convert a 'Word8' into a 'ByteString'
  322 +singleton :: Word8 -> ByteString
  323 +singleton c = unsafeCreate 1 $ \p -> poke p c
  324 +{-# INLINE [1] singleton #-}
  325 +
  326 +-- Inline [1] for intercalate rule
  327 +
  328 +--
  329 +-- XXX The use of unsafePerformIO in allocating functions (unsafeCreate) is critical!
  330 +--
  331 +-- Otherwise:
  332 +--
  333 +-- singleton 255 `compare` singleton 127
  334 +--
  335 +-- is compiled to:
  336 +--
  337 +-- case mallocByteString 2 of
  338 +-- ForeignPtr f internals ->
  339 +-- case writeWord8OffAddr# f 0 255 of _ ->
  340 +-- case writeWord8OffAddr# f 0 127 of _ ->
  341 +-- case eqAddr# f f of
  342 +-- False -> case compare (GHC.Prim.plusAddr# f 0)
  343 +-- (GHC.Prim.plusAddr# f 0)
  344 +--
  345 +--
  346 +
  347 +-- | /O(n)/ Convert a '[Word8]' into a 'ByteString'.
  348 +--
  349 +-- For applications with large numbers of string literals, pack can be a
  350 +-- bottleneck. In such cases, consider using packAddress (GHC only).
  351 +pack :: [Word8] -> ByteString
  352 +pack = packBytes
  353 +
  354 +-- | /O(n)/ Converts a 'ByteString' to a '[Word8]'.
  355 +unpack :: ByteString -> [Word8]
  356 +#if !defined(__GLASGOW_HASKELL__)
  357 +unpack = unpackBytes
  358 +#else
  359 +
  360 +unpack ps = build (unpackFoldr ps)
  361 +{-# INLINE unpack #-}
  362 +
  363 +--
  364 +-- Have unpack fuse with good list consumers
  365 +--
  366 +-- critical this isn't strict in the acc
  367 +-- as it will break in the presence of list fusion. this is a known
  368 +-- issue with seq and build/foldr rewrite rules, which rely on lazy
  369 +-- demanding to avoid bottoms in the list.
  370 +--
  371 +unpackFoldr :: ByteString -> (Word8 -> a -> a) -> a -> a
  372 +unpackFoldr (PS fp off len) f ch = withPtr fp $ \p -> do
  373 + let loop q n _ | q `seq` n `seq` False = undefined -- n.b.
  374 + loop _ (-1) acc = return acc
  375 + loop q n acc = do
  376 + a <- peekByteOff q n
  377 + loop q (n-1) (a `f` acc)
  378 + loop (p `plusPtr` off) (len-1) ch
  379 +{-# INLINE [0] unpackFoldr #-}
  380 +
  381 +{-# RULES
  382 +"ByteString unpack-list" [1] forall p .
  383 + unpackFoldr p (:) [] = unpackBytes p
  384 + #-}
  385 +
  386 +#endif
  387 +
  388 +-- ---------------------------------------------------------------------
  389 +-- Basic interface
  390 +
  391 +-- | /O(1)/ Test whether a ByteString is empty.
  392 +null :: ByteString -> Bool
  393 +null (PS _ _ l) = assert (l >= 0) $ l <= 0
  394 +{-# INLINE null #-}
  395 +
  396 +-- ---------------------------------------------------------------------
  397 +-- | /O(1)/ 'length' returns the length of a ByteString as an 'Int'.
  398 +length :: ByteString -> Int
  399 +length (PS _ _ l) = assert (l >= 0) $ l
  400 +{-# INLINE length #-}
  401 +
  402 +------------------------------------------------------------------------
  403 +
  404 +infixr 5 `cons` --same as list (:)
  405 +infixl 5 `snoc`
  406 +
  407 +-- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
  408 +-- complexity, as it requires a memcpy.
  409 +cons :: Word8 -> ByteString -> ByteString
  410 +cons c (PS x s l) = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
  411 + poke p c
  412 + memcpy (p `plusPtr` 1) (f `plusPtr` s) (fromIntegral l)
  413 +{-# INLINE cons #-}
  414 +
  415 +-- | /O(n)/ Append a byte to the end of a 'ByteString'
  416 +snoc :: ByteString -> Word8 -> ByteString
  417 +snoc (PS x s l) c = unsafeCreate (l+1) $ \p -> withForeignPtr x $ \f -> do
  418 + memcpy p (f `plusPtr` s) (fromIntegral l)
  419 + poke (p `plusPtr` l) c
  420 +{-# INLINE snoc #-}
  421 +
  422 +-- todo fuse
  423 +
  424 +-- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
  425 +-- An exception will be thrown in the case of an empty ByteString.
  426 +head :: ByteString -> Word8
  427 +head (PS x s l)
  428 + | l <= 0 = errorEmptyList "head"
  429 + | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p s
  430 +{-# INLINE head #-}
  431 +
  432 +-- | /O(1)/ Extract the elements after the head of a ByteString, which must be non-empty.
  433 +-- An exception will be thrown in the case of an empty ByteString.
  434 +tail :: ByteString -> ByteString
  435 +tail (PS p s l)
  436 + | l <= 0 = errorEmptyList "tail"
  437 + | otherwise = PS p (s+1) (l-1)
  438 +{-# INLINE tail #-}
  439 +
  440 +-- | /O(1)/ Extract the head and tail of a ByteString, returning Nothing
  441 +-- if it is empty.
  442 +uncons :: ByteString -> Maybe (Word8, ByteString)
  443 +uncons (PS x s l)
  444 + | l <= 0 = Nothing
  445 + | otherwise = Just (inlinePerformIO $ withForeignPtr x
  446 + $ \p -> peekByteOff p s,
  447 + PS x (s+1) (l-1))
  448 +{-# INLINE uncons #-}
  449 +
  450 +-- | /O(1)/ Extract the last element of a ByteString, which must be finite and non-empty.
  451 +-- An exception will be thrown in the case of an empty ByteString.
  452 +last :: ByteString -> Word8
  453 +last ps@(PS x s l)
  454 + | null ps = errorEmptyList "last"
  455 + | otherwise = inlinePerformIO $ withForeignPtr x $ \p -> peekByteOff p (s+l-1)
  456 +{-# INLINE last #-}
  457 +
  458 +-- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
  459 +-- An exception will be thrown in the case of an empty ByteString.
  460 +init :: ByteString -> ByteString
  461 +init ps@(PS p s l)
  462 + | null ps = errorEmptyList "init"
  463 + | otherwise = PS p s (l-1)
  464 +{-# INLINE init #-}
  465 +
  466 +-- | /O(n)/ Append two ByteStrings
  467 +append :: ByteString -> ByteString -> ByteString
  468 +append = mappend
  469 +{-# INLINE append #-}
  470 +
  471 +-- ---------------------------------------------------------------------
  472 +-- Transformations
  473 +
  474 +-- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
  475 +-- element of @xs@. This function is subject to array fusion.
  476 +map :: (Word8 -> Word8) -> ByteString -> ByteString
  477 +map f (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
  478 + create len $ map_ 0 (a `plusPtr` s)
  479 + where
  480 + map_ :: Int -> Ptr Word8 -> Ptr Word8 -> IO ()
  481 + STRICT3(map_)
  482 + map_ n p1 p2
  483 + | n >= len = return ()
  484 + | otherwise = do
  485 + x <- peekByteOff p1 n
  486 + pokeByteOff p2 n (f x)
  487 + map_ (n+1) p1 p2
  488 +{-# INLINE map #-}
  489 +
  490 +-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
  491 +reverse :: ByteString -> ByteString
  492 +reverse (PS x s l) = unsafeCreate l $ \p -> withForeignPtr x $ \f ->
  493 + c_reverse p (f `plusPtr` s) (fromIntegral l)
  494 +
  495 +-- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
  496 +-- 'ByteString' and \`intersperses\' that byte between the elements of
  497 +-- the 'ByteString'. It is analogous to the intersperse function on
  498 +-- Lists.
  499 +intersperse :: Word8 -> ByteString -> ByteString
  500 +intersperse c ps@(PS x s l)
  501 + | length ps < 2 = ps
  502 + | otherwise = unsafeCreate (2*l-1) $ \p -> withForeignPtr x $ \f ->
  503 + c_intersperse p (f `plusPtr` s) (fromIntegral l) c
  504 +
  505 +-- | The 'transpose' function transposes the rows and columns of its
  506 +-- 'ByteString' argument.
  507 +transpose :: [ByteString] -> [ByteString]
  508 +transpose ps = P.map pack (List.transpose (P.map unpack ps))
  509 +
  510 +-- ---------------------------------------------------------------------
  511 +-- Reducing 'ByteString's
  512 +
  513 +-- | 'foldl', applied to a binary operator, a starting value (typically
  514 +-- the left-identity of the operator), and a ByteString, reduces the
  515 +-- ByteString using the binary operator, from left to right.
  516 +--
  517 +-- This function is subject to array fusion.
  518 +--
  519 +foldl :: (a -> Word8 -> a) -> a -> ByteString -> a
  520 +foldl f v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  521 + lgo v (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  522 + where
  523 + STRICT3(lgo)
  524 + lgo z p q | p == q = return z
  525 + | otherwise = do c <- peek p
  526 + lgo (f z c) (p `plusPtr` 1) q
  527 +{-# INLINE foldl #-}
  528 +
  529 +-- | 'foldl\'' is like 'foldl', but strict in the accumulator.
  530 +-- However, for ByteStrings, all left folds are strict in the accumulator.
  531 +--
  532 +foldl' :: (a -> Word8 -> a) -> a -> ByteString -> a
  533 +foldl' = foldl
  534 +{-# INLINE foldl' #-}
  535 +
  536 +-- | 'foldr', applied to a binary operator, a starting value
  537 +-- (typically the right-identity of the operator), and a ByteString,
  538 +-- reduces the ByteString using the binary operator, from right to left.
  539 +foldr :: (Word8 -> a -> a) -> a -> ByteString -> a
  540 +foldr k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  541 + go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
  542 + where
  543 + STRICT3(go)
  544 + go z p q | p == q = return z
  545 + | otherwise = do c <- peek p
  546 + go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
  547 +{-# INLINE foldr #-}
  548 +
  549 +-- | 'foldr\'' is like 'foldr', but strict in the accumulator.
  550 +foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
  551 +foldr' k v (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  552 + go v (ptr `plusPtr` (s+l-1)) (ptr `plusPtr` (s-1))
  553 + where
  554 + STRICT3(go)
  555 + go z p q | p == q = return z
  556 + | otherwise = do c <- peek p
  557 + go (c `k` z) (p `plusPtr` (-1)) q -- tail recursive
  558 +{-# INLINE foldr' #-}
  559 +
  560 +-- | 'foldl1' is a variant of 'foldl' that has no starting value
  561 +-- argument, and thus must be applied to non-empty 'ByteStrings'.
  562 +-- This function is subject to array fusion.
  563 +-- An exception will be thrown in the case of an empty ByteString.
  564 +foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  565 +foldl1 f ps
  566 + | null ps = errorEmptyList "foldl1"
  567 + | otherwise = foldl f (unsafeHead ps) (unsafeTail ps)
  568 +{-# INLINE foldl1 #-}
  569 +
  570 +-- | 'foldl1\'' is like 'foldl1', but strict in the accumulator.
  571 +-- An exception will be thrown in the case of an empty ByteString.
  572 +foldl1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  573 +foldl1' f ps
  574 + | null ps = errorEmptyList "foldl1'"
  575 + | otherwise = foldl' f (unsafeHead ps) (unsafeTail ps)
  576 +{-# INLINE foldl1' #-}
  577 +
  578 +-- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
  579 +-- and thus must be applied to non-empty 'ByteString's
  580 +-- An exception will be thrown in the case of an empty ByteString.
  581 +foldr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  582 +foldr1 f ps
  583 + | null ps = errorEmptyList "foldr1"
  584 + | otherwise = foldr f (last ps) (init ps)
  585 +{-# INLINE foldr1 #-}
  586 +
  587 +-- | 'foldr1\'' is a variant of 'foldr1', but is strict in the
  588 +-- accumulator.
  589 +foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8
  590 +foldr1' f ps
  591 + | null ps = errorEmptyList "foldr1"
  592 + | otherwise = foldr' f (last ps) (init ps)
  593 +{-# INLINE foldr1' #-}
  594 +
  595 +-- ---------------------------------------------------------------------
  596 +-- Special folds
  597 +
  598 +-- | /O(n)/ Concatenate a list of ByteStrings.
  599 +concat :: [ByteString] -> ByteString
  600 +concat = mconcat
  601 +
  602 +-- | Map a function over a 'ByteString' and concatenate the results
  603 +concatMap :: (Word8 -> ByteString) -> ByteString -> ByteString
  604 +concatMap f = concat . foldr ((:) . f) []
  605 +
  606 +-- foldr (append . f) empty
  607 +
  608 +-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
  609 +-- any element of the 'ByteString' satisfies the predicate.
  610 +any :: (Word8 -> Bool) -> ByteString -> Bool
  611 +any _ (PS _ _ 0) = False
  612 +any f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  613 + go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  614 + where
  615 + STRICT2(go)
  616 + go p q | p == q = return False
  617 + | otherwise = do c <- peek p
  618 + if f c then return True
  619 + else go (p `plusPtr` 1) q
  620 +{-# INLINE any #-}
  621 +
  622 +-- todo fuse
  623 +
  624 +-- | /O(n)/ Applied to a predicate and a 'ByteString', 'all' determines
  625 +-- if all elements of the 'ByteString' satisfy the predicate.
  626 +all :: (Word8 -> Bool) -> ByteString -> Bool
  627 +all _ (PS _ _ 0) = True
  628 +all f (PS x s l) = inlinePerformIO $ withForeignPtr x $ \ptr ->
  629 + go (ptr `plusPtr` s) (ptr `plusPtr` (s+l))
  630 + where
  631 + STRICT2(go)
  632 + go p q | p == q = return True -- end of list
  633 + | otherwise = do c <- peek p
  634 + if f c
  635 + then go (p `plusPtr` 1) q
  636 + else return False
  637 +{-# INLINE all #-}
  638 +
  639 +------------------------------------------------------------------------
  640 +
  641 +-- | /O(n)/ 'maximum' returns the maximum value from a 'ByteString'
  642 +-- This function will fuse.
  643 +-- An exception will be thrown in the case of an empty ByteString.
  644 +maximum :: ByteString -> Word8
  645 +maximum xs@(PS x s l)
  646 + | null xs = errorEmptyList "maximum"
  647 + | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
  648 + c_maximum (p `plusPtr` s) (fromIntegral l)
  649 +{-# INLINE maximum #-}
  650 +
  651 +-- | /O(n)/ 'minimum' returns the minimum value from a 'ByteString'
  652 +-- This function will fuse.
  653 +-- An exception will be thrown in the case of an empty ByteString.
  654 +minimum :: ByteString -> Word8
  655 +minimum xs@(PS x s l)
  656 + | null xs = errorEmptyList "minimum"
  657 + | otherwise = inlinePerformIO $ withForeignPtr x $ \p ->
  658 + c_minimum (p `plusPtr` s) (fromIntegral l)
  659 +{-# INLINE minimum #-}
  660 +
  661 +------------------------------------------------------------------------
  662 +
  663 +-- | The 'mapAccumL' function behaves like a combination of 'map' and
  664 +-- 'foldl'; it applies a function to each element of a ByteString,
  665 +-- passing an accumulating parameter from left to right, and returning a
  666 +-- final value of this accumulator together with the new list.
  667 +mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  668 +mapAccumL f acc (PS fp o len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
  669 + gp <- mallocByteString len
  670 + acc' <- withForeignPtr gp $ \p -> mapAccumL_ acc 0 (a `plusPtr` o) p
  671 + return $! (acc', PS gp 0 len)
  672 + where
  673 + STRICT4(mapAccumL_)
  674 + mapAccumL_ s n p1 p2
  675 + | n >= len = return s
  676 + | otherwise = do
  677 + x <- peekByteOff p1 n
  678 + let (s', y) = f s x
  679 + pokeByteOff p2 n y
  680 + mapAccumL_ s' (n+1) p1 p2
  681 +{-# INLINE mapAccumL #-}
  682 +
  683 +-- | The 'mapAccumR' function behaves like a combination of 'map' and
  684 +-- 'foldr'; it applies a function to each element of a ByteString,
  685 +-- passing an accumulating parameter from right to left, and returning a
  686 +-- final value of this accumulator together with the new ByteString.
  687 +mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
  688 +mapAccumR f acc (PS fp o len) = inlinePerformIO $ withForeignPtr fp $ \a -> do
  689 + gp <- mallocByteString len
  690 + acc' <- withForeignPtr gp $ \p -> mapAccumR_ acc (len-1) (a `plusPtr` o) p
  691 + return $! (acc', PS gp 0 len)
  692 + where
  693 + STRICT4(mapAccumR_)
  694 + mapAccumR_ s n p q
  695 + | n < 0 = return s
  696 + | otherwise = do
  697 + x <- peekByteOff p n
  698 + let (s', y) = f s x
  699 + pokeByteOff q n y
  700 + mapAccumR_ s' (n-1) p q
  701 +{-# INLINE mapAccumR #-}
  702 +
  703 +-- ---------------------------------------------------------------------
  704 +-- Building ByteStrings
  705 +
  706 +-- | 'scanl' is similar to 'foldl', but returns a list of successive
  707 +-- reduced values from the left. This function will fuse.
  708 +--
  709 +-- > scanl f z [x1, x2, ...] == [z, z `f` x1, (z `f` x1) `f` x2, ...]
  710 +--
  711 +-- Note that
  712 +--
  713 +-- > last (scanl f z xs) == foldl f z xs.
  714 +--
  715 +scanl :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  716 +
  717 +scanl f v (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
  718 + create (len+1) $ \q -> do
  719 + poke q v
  720 + scanl_ v 0 (a `plusPtr` s) (q `plusPtr` 1)
  721 + where
  722 + STRICT4(scanl_)
  723 + scanl_ z n p q
  724 + | n >= len = return ()
  725 + | otherwise = do
  726 + x <- peekByteOff p n
  727 + let z' = f z x
  728 + pokeByteOff q n z'
  729 + scanl_ z' (n+1) p q
  730 +{-# INLINE scanl #-}
  731 +
  732 + -- n.b. haskell's List scan returns a list one bigger than the
  733 + -- input, so we need to snoc here to get some extra space, however,
  734 + -- it breaks map/up fusion (i.e. scanl . map no longer fuses)
  735 +
  736 +-- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
  737 +-- This function will fuse.
  738 +--
  739 +-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...]
  740 +scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  741 +scanl1 f ps
  742 + | null ps = empty
  743 + | otherwise = scanl f (unsafeHead ps) (unsafeTail ps)
  744 +{-# INLINE scanl1 #-}
  745 +
  746 +-- | scanr is the right-to-left dual of scanl.
  747 +scanr :: (Word8 -> Word8 -> Word8) -> Word8 -> ByteString -> ByteString
  748 +scanr f v (PS fp s len) = inlinePerformIO $ withForeignPtr fp $ \a ->
  749 + create (len+1) $ \q -> do
  750 + poke (q `plusPtr` len) v
  751 + scanr_ v (len-1) (a `plusPtr` s) q
  752 + where
  753 + STRICT4(scanr_)
  754 + scanr_ z n p q
  755 + | n < 0 = return ()
  756 + | otherwise = do
  757 + x <- peekByteOff p n
  758 + let z' = f x z
  759 + pokeByteOff q n z'
  760 + scanr_ z' (n-1) p q
  761 +{-# INLINE scanr #-}
  762 +
  763 +-- | 'scanr1' is a variant of 'scanr' that has no starting value argument.
  764 +scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString
  765 +scanr1 f ps
  766 + | null ps = empty
  767 + | otherwise = scanr f (last ps) (init ps) -- todo, unsafe versions
  768 +{-# INLINE scanr1 #-}
  769 +
  770 +-- ---------------------------------------------------------------------
  771 +-- Unfolds and replicates
  772 +
  773 +-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
  774 +-- the value of every element. The following holds:
  775 +--
  776 +-- > replicate w c = unfoldr w (\u -> Just (u,u)) c
  777 +--
  778 +-- This implemenation uses @memset(3)@
  779 +replicate :: Int -> Word8 -> ByteString
  780 +replicate w c
  781 + | w <= 0 = empty
  782 + | otherwise = unsafeCreate w $ \ptr ->
  783 + memset ptr c (fromIntegral w) >> return ()
  784 +
  785 +-- | /O(n)/, where /n/ is the length of the result. The 'unfoldr'
  786 +-- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a
  787 +-- ByteString from a seed value. The function takes the element and
  788 +-- returns 'Nothing' if it is done producing the ByteString or returns
  789 +-- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
  790 +-- and @b@ is the seed value for further production.
  791 +--
  792 +-- Examples:
  793 +--
  794 +-- > unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
  795 +-- > == pack [0, 1, 2, 3, 4, 5]
  796 +--
  797 +unfoldr :: (a -> Maybe (Word8, a)) -> a -> ByteString
  798 +unfoldr f = concat . unfoldChunk 32 64
  799 + where unfoldChunk n n' x =
  800 + case unfoldrN n f x of
  801 + (s, Nothing) -> s : []
  802 + (s, Just x') -> s : unfoldChunk n' (n+n') x'
  803 +{-# INLINE unfoldr #-}
  804 +
  805 +-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ByteString from a seed
  806 +-- value. However, the length of the result is limited by the first
  807 +-- argument to 'unfoldrN'. This function is more efficient than 'unfoldr'
  808 +-- when the maximum length of the result is known.
  809 +--
  810 +-- The following equation relates 'unfoldrN' and 'unfoldr':
  811 +--
  812 +-- > snd (unfoldrN n f s) == take n (unfoldr f s)
  813 +--
  814 +unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
  815 +unfoldrN i f x0
  816 + | i < 0 = (empty, Just x0)
  817 + | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
  818 + where STRICT3(go)
  819 + go p x n =
  820 + case f x of
  821 + Nothing -> return (0, n, Nothing)
  822 + Just (w,x')
  823 + | n == i -> return (0, n, Just x)
  824 + | otherwise -> do poke p w
  825 + go (p `plusPtr` 1) x' (n+1)
  826 +{-# INLINE unfoldrN #-}
  827 +
  828 +-- ---------------------------------------------------------------------
  829 +-- Substrings
  830 +
  831 +-- | /O(1)/ 'take' @n@, applied to a ByteString @xs@, returns the prefix
  832 +-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
  833 +take :: Int -> ByteString -> ByteString
  834 +take n ps@(PS x s l)
  835 + | n <= 0 = empty
  836 + | n >= l = ps
  837 + | otherwise = PS x s n
  838 +{-# INLINE take #-}
  839 +
  840 +-- | /O(1)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
  841 +-- elements, or @[]@ if @n > 'length' xs@.
  842 +drop :: Int -> ByteString -> ByteString
  843 +drop n ps@(PS x s l)
  844 + | n <= 0 = ps
  845 + | n >= l = empty
  846 + | otherwise = PS x (s+n) (l-n)
  847 +{-# INLINE drop #-}
  848 +
  849 +-- | /O(1)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
  850 +splitAt :: Int -> ByteString -> (ByteString, ByteString)
  851 +splitAt n ps@(PS x s l)
  852 + | n <= 0 = (empty, ps)
  853 + | n >= l = (ps, empty)
  854 + | otherwise = (PS x s n, PS x (s+n) (l-n))
  855 +{-# INLINE splitAt #-}
  856 +
  857 +-- | 'takeWhile', applied to a predicate @p@ and a ByteString @xs@,
  858 +-- returns the longest prefix (possibly empty) of @xs@ of elements that
  859 +-- satisfy @p@.
  860 +takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  861 +takeWhile f ps = unsafeTake (findIndexOrEnd (not . f) ps) ps
  862 +{-# INLINE takeWhile #-}
  863 +
  864 +-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
  865 +dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
  866 +dropWhile f ps = unsafeDrop (findIndexOrEnd (not . f) ps) ps
  867 +{-# INLINE dropWhile #-}
  868 +
  869 +-- instead of findIndexOrEnd, we could use memchr here.
  870 +
  871 +-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
  872 +--
  873 +-- Under GHC, a rewrite rule will transform break (==) into a
  874 +-- call to the specialised breakByte:
  875 +--
  876 +-- > break ((==) x) = breakByte x
  877 +-- > break (==x) = breakByte x
  878 +--
  879 +break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  880 +break p ps = case findIndexOrEnd p ps of n -> (unsafeTake n ps, unsafeDrop n ps)
  881 +#if __GLASGOW_HASKELL__
  882 +{-# INLINE [1] break #-}
  883 +#endif
  884 +
  885 +{-# RULES
  886 +"ByteString specialise break (x==)" forall x.
  887 + break ((==) x) = breakByte x
  888 +"ByteString specialise break (==x)" forall x.
  889 + break (==x) = breakByte x
  890 + #-}
  891 +
  892 +-- INTERNAL:
  893 +
  894 +-- | 'breakByte' breaks its ByteString argument at the first occurence
  895 +-- of the specified byte. It is more efficient than 'break' as it is
  896 +-- implemented with @memchr(3)@. I.e.
  897 +--
  898 +-- > break (=='c') "abcd" == breakByte 'c' "abcd"
  899 +--
  900 +breakByte :: Word8 -> ByteString -> (ByteString, ByteString)
  901 +breakByte c p = case elemIndex c p of
  902 + Nothing -> (p,empty)
  903 + Just n -> (unsafeTake n p, unsafeDrop n p)
  904 +{-# INLINE breakByte #-}
  905 +
  906 +-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
  907 +--
  908 +-- breakEnd p == spanEnd (not.p)
  909 +breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  910 +breakEnd p ps = splitAt (findFromEndUntil p ps) ps
  911 +
  912 +-- | 'span' @p xs@ breaks the ByteString into two segments. It is
  913 +-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
  914 +span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  915 +span p ps = break (not . p) ps
  916 +#if __GLASGOW_HASKELL__
  917 +{-# INLINE [1] span #-}
  918 +#endif
  919 +
  920 +-- | 'spanByte' breaks its ByteString argument at the first
  921 +-- occurence of a byte other than its argument. It is more efficient
  922 +-- than 'span (==)'
  923 +--
  924 +-- > span (=='c') "abcd" == spanByte 'c' "abcd"
  925 +--
  926 +spanByte :: Word8 -> ByteString -> (ByteString, ByteString)
  927 +spanByte c ps@(PS x s l) = inlinePerformIO $ withForeignPtr x $ \p ->
  928 + go (p `plusPtr` s) 0
  929 + where
  930 + STRICT2(go)
  931 + go p i | i >= l = return (ps, empty)
  932 + | otherwise = do c' <- peekByteOff p i
  933 + if c /= c'
  934 + then return (unsafeTake i ps, unsafeDrop i ps)
  935 + else go p (i+1)
  936 +{-# INLINE spanByte #-}
  937 +
  938 +{-# RULES
  939 +"ByteString specialise span (x==)" forall x.
  940 + span ((==) x) = spanByte x
  941 +"ByteString specialise span (==x)" forall x.
  942 + span (==x) = spanByte x
  943 + #-}
  944 +
  945 +-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
  946 +-- We have
  947 +--
  948 +-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
  949 +--
  950 +-- and
  951 +--
  952 +-- > spanEnd (not . isSpace) ps
  953 +-- > ==
  954 +-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
  955 +--
  956 +spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
  957 +spanEnd p ps = splitAt (findFromEndUntil (not.p) ps) ps
  958 +
  959 +-- | /O(n)/ Splits a 'ByteString' into components delimited by
  960 +-- separators, where the predicate returns True for a separator element.
  961 +-- The resulting components do not contain the separators. Two adjacent
  962 +-- separators result in an empty component in the output. eg.
  963 +--
  964 +-- > splitWith (=='a') "aabbaca" == ["","","bb","c",""]
  965 +-- > splitWith (=='a') [] == []
  966 +--
  967 +splitWith :: (Word8 -> Bool) -> ByteString -> [ByteString]
  968 +
  969 +#if defined(__GLASGOW_HASKELL__)
  970 +splitWith _pred (PS _ _ 0) = []
  971 +splitWith pred_ (PS fp off len) = splitWith0 pred# off len fp
  972 + where pred# c# = pred_ (W8# c#)
  973 +
  974 + STRICT4(splitWith0)
  975 + splitWith0 pred' off' len' fp' = withPtr fp $ \p ->
  976 + splitLoop pred' p 0 off' len' fp'
  977 +
  978 + splitLoop :: (Word# -> Bool)
  979 + -> Ptr Word8
  980 + -> Int -> Int -> Int
  981 + -> ForeignPtr Word8
  982 + -> IO [ByteString]
  983 +
  984 + splitLoop pred' p idx' off' len' fp'
  985 + | idx' >= len' = return [PS fp' off' idx']
  986 + | otherwise = do
  987 + w <- peekElemOff p (off'+idx')
  988 + if pred' (case w of W8# w# -> w#)
  989 + then return (PS fp' off' idx' :
  990 + splitWith0 pred' (off'+idx'+1) (len'-idx'-1) fp')
  991 + else splitLoop pred' p (idx'+1) off' len' fp'
  992 +{-# INLINE splitWith #-}
  993 +
  994 +#else
  995 +splitWith _ (PS _ _ 0) = []
  996 +splitWith p ps = loop p ps
  997 + where
  998 + STRICT2(loop)
  999 + loop q qs = if null rest then [chunk]
  1000 + else chunk : loop q (unsafeTail rest)
  1001 + where (chunk,rest) = break q qs
  1002 +#endif
  1003 +
  1004 +-- | /O(n)/ Break a 'ByteString' into pieces separated by the byte
  1005 +-- argument, consuming the delimiter. I.e.
  1006 +--
  1007 +-- > split '\n' "a\nb\nd\ne" == ["a","b","d","e"]
  1008 +-- > split 'a' "aXaXaXa" == ["","X","X","X",""]
  1009 +-- > split 'x' "x" == ["",""]
  1010 +--
  1011 +-- and
  1012 +--
  1013 +-- > intercalate [c] . split c == id
  1014 +-- > split == splitWith . (==)
  1015 +--
  1016 +-- As for all splitting functions in this library, this function does
  1017 +-- not copy the substrings, it just constructs new 'ByteStrings' that
  1018 +-- are slices of the original.
  1019 +--
  1020 +split :: Word8 -> ByteString -> [ByteString]
  1021 +split _ (PS _ _ 0) = []
  1022 +split w (PS x s l) = loop 0
  1023 + where
  1024 + STRICT1(loop)
  1025 + loop n =
  1026 + let q = inlinePerformIO $ withForeignPtr x $ \p ->
  1027 + memchr (p `plusPtr` (s+n))
  1028 + w (fromIntegral (l-n))
  1029 + in if q == nullPtr
  1030 + then [PS x (s+n) (l-n)]
  1031 + else let i = inlinePerformIO $ withForeignPtr x $ \p ->
  1032 + return (q `minusPtr` (p `plusPtr` s))
  1033 + in PS x (s+n) (i-n) : loop (i+1)
  1034 +
  1035 +{-# INLINE split #-}
  1036 +
  1037 +{-
  1038 +-- slower. but stays inside Haskell.
  1039 +split _ (PS _ _ 0) = []
  1040 +split (W8# w#) (PS fp off len) = splitWith' off len fp
  1041 + where
  1042 + splitWith' off' len' fp' = withPtr fp $ \p ->
  1043 + splitLoop p 0 off' len' fp'
  1044 +
  1045 + splitLoop :: Ptr Word8
  1046 + -> Int -> Int -> Int
  1047 + -> ForeignPtr Word8
  1048 + -> IO [ByteString]
  1049 +
  1050 + STRICT5(splitLoop)
  1051 + splitLoop p idx' off' len' fp'
  1052 + | idx' >= len' = return [PS fp' off' idx']
  1053 + | otherwise = do
  1054 + (W8# x#) <- peekElemOff p (off'+idx')
  1055 + if word2Int# w# ==# word2Int# x#
  1056 + then return (PS fp' off' idx' :
  1057 + splitWith' (off'+idx'+1) (len'-idx'-1) fp')
  1058 + else splitLoop p (idx'+1) off' len' fp'
  1059 +-}
  1060 +
  1061 +{-
  1062 +-- | Like 'splitWith', except that sequences of adjacent separators are
  1063 +-- treated as a single separator. eg.
  1064 +--
  1065 +-- > tokens (=='a') "aabbaca" == ["bb","c"]
  1066 +--
  1067 +tokens :: (Word8 -> Bool) -> ByteString -> [ByteString]
  1068 +tokens f = P.filter (not.null) . splitWith f
  1069 +{-# INLINE tokens #-}
  1070 +-}
  1071 +
  1072 +-- | The 'group' function takes a ByteString and returns a list of
  1073 +-- ByteStrings such that the concatenation of the result is equal to the
  1074 +-- argument. Moreover, each sublist in the result contains only equal
  1075 +-- elements. For example,
  1076 +--
  1077 +-- > group "Mississippi" = ["M","i","ss","i","ss","i","pp","i"]
  1078 +--
  1079 +-- It is a special case of 'groupBy', which allows the programmer to
  1080 +-- supply their own equality test. It is about 40% faster than
  1081 +-- /groupBy (==)/
  1082 +group :: ByteString -> [ByteString]
  1083 +group xs
  1084 + | null xs = []
  1085 + | otherwise = ys : group zs
  1086 + where
  1087 + (ys, zs) = spanByte (unsafeHead xs) xs
  1088 +
  1089 +-- | The 'groupBy' function is the non-overloaded version of 'group'.
  1090 +groupBy :: (Word8 -> Word8 -> Bool) -> ByteString -> [ByteString]
  1091 +groupBy k xs
  1092 + | null xs = []
  1093 + | otherwise = unsafeTake n xs : groupBy k (unsafeDrop n xs)
  1094 + where
  1095 + n = 1 + findIndexOrEnd (not . k (unsafeHead xs)) (unsafeTail xs)
  1096 +
  1097 +-- | /O(n)/ The 'intercalate' function takes a 'ByteString' and a list of
  1098 +-- 'ByteString's and concatenates the list after interspersing the first
  1099 +-- argument between each element of the list.
  1100 +intercalate :: ByteString -> [ByteString] -> ByteString
  1101 +intercalate s = concat . (List.intersperse s)
  1102 +{-# INLINE [1] intercalate #-}
  1103 +
  1104 +{-# RULES
  1105 +"ByteString specialise intercalate c -> intercalateByte" forall c s1 s2 .
  1106 + intercalate (singleton c) (s1 : s2 : []) = intercalateWithByte c s1 s2
  1107 + #-}
  1108 +
  1109 +-- | /O(n)/ intercalateWithByte. An efficient way to join to two ByteStrings
  1110 +-- with a char. Around 4 times faster than the generalised join.
  1111 +--
  1112 +intercalateWithByte :: Word8 -> ByteString -> ByteString -> ByteString
  1113 +intercalateWithByte c f@(PS ffp s l) g@(PS fgp t m) = unsafeCreate len $ \ptr ->
  1114 + withForeignPtr ffp $ \fp ->
  1115 + withForeignPtr fgp $ \gp -> do
  1116 + memcpy ptr (fp `plusPtr` s) (fromIntegral l)
  1117 + poke (ptr `plusPtr` l) c
  1118 + memcpy (ptr `plusPtr` (l + 1)) (gp `plusPtr` t) (fromIntegral m)
  1119 + where
  1120 + len = length f + length g + 1
  1121 +{-# INLINE intercalateWithByte #-}
  1122 +
  1123 +-- ---------------------------------------------------------------------
  1124 +-- Indexing ByteStrings
  1125 +
  1126 +-- | /O(1)/ 'ByteString' index (subscript) operator, starting from 0.
  1127 +index :: ByteString -> Int -> Word8
  1128 +index ps n
  1129 + | n < 0 = moduleError "index" ("negative index: " ++ show n)
  1130 + | n >= length ps = moduleError "index" ("index too large: " ++ show n
  1131 + ++ ", length = " ++ show (length ps))
  1132 + | otherwise = ps `unsafeIndex` n
  1133 +{-# INLINE index #-}
  1134 +
  1135 +-- | /O(n)/ The 'elemIndex' function returns the index of the first