From bdcbd882e1944f843c312f812722d1495a55bfc0 Mon Sep 17 00:00:00 2001 From: Ignat Insarov Date: Thu, 19 Aug 2021 23:00:56 +0500 Subject: [PATCH] Straighten folds and scans. (#364) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add strict right folds. * Add property checks. * Add benchmarks. * Inline strictness checks. * Straighten scans. * Fix whitespace. * Use `===` for equality. * Use infix operator for brevity. * Add bench marks for lazy scans. * Use standard recursion schemes. * Dodge import conflicts on older GHC versions. * Final considerations according to the last review. * Final considerations according to one more last review. * Add bench mark for lazy accumulating maps. * Throw away `mapAccum[LR]Chunks`. Turns out we do not really need it. We thought we need it to implement `scan[lr]`, but actually `mapAccum[LR]` is enough. --- Data/ByteString/Lazy.hs | 62 ++++++++++++++++++++++++++++++---- Data/ByteString/Lazy/Char8.hs | 52 +++++++++++++++++++++++++--- bench/BenchAll.hs | 56 +++++++++++++++++++++--------- tests/Properties.hs | 40 +++++++++++++++++++++- tests/Properties/ByteString.hs | 7 ++-- 5 files changed, 185 insertions(+), 32 deletions(-) diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs index 369248f8b..40edc11f4 100644 --- a/Data/ByteString/Lazy.hs +++ b/Data/ByteString/Lazy.hs @@ -92,7 +92,9 @@ module Data.ByteString.Lazy ( foldl1, foldl1', foldr, + foldr', foldr1, + foldr1', -- ** Special folds concat, @@ -106,9 +108,9 @@ module Data.ByteString.Lazy ( -- * Building ByteStrings -- ** Scans scanl, --- scanl1, --- scanr, --- scanr1, + scanl1, + scanr, + scanr1, -- ** Accumulating maps mapAccumL, @@ -460,6 +462,14 @@ foldr :: (Word8 -> a -> a) -> a -> ByteString -> a foldr k = foldrChunks (flip (S.foldr k)) {-# INLINE foldr #-} +-- | 'foldr'' is like 'foldr', but strict in the accumulator. +foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a +foldr' f a = go + where + go Empty = a + go (Chunk c cs) = S.foldr' f (foldr' f a cs) c +{-# INLINE foldr' #-} + -- | 'foldl1' is a variant of 'foldl' that has no starting value -- argument, and thus must be applied to non-empty 'ByteString's. foldl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 @@ -479,6 +489,13 @@ foldr1 f (Chunk c0 cs0) = go c0 cs0 where go c Empty = S.foldr1 f c go c (Chunk c' cs) = S.foldr f (go c' cs) c +-- | 'foldr1'' is like 'foldr1', but strict in the accumulator. +foldr1' :: (Word8 -> Word8 -> Word8) -> ByteString -> Word8 +foldr1' _ Empty = errorEmptyList "foldr1'" +foldr1' f (Chunk c0 cs0) = go c0 cs0 + where go c Empty = S.foldr1' f c + go c (Chunk c' cs) = S.foldr' f (go c' cs) c + -- --------------------------------------------------------------------- -- Special folds @@ -617,11 +634,44 @@ scanl -- ^ input of length n -> ByteString -- ^ output of length n+1 -scanl f z = snd . foldl k (z,singleton z) - where - k (c,acc) a = let n = f c a in (n, acc `snoc` n) +scanl function = fmap (uncurry (flip snoc)) . mapAccumL (\x y -> (function x y, x)) {-# INLINE scanl #-} +-- | 'scanl1' is a variant of 'scanl' that has no starting value argument. +-- +-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] +scanl1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString +scanl1 function byteStream = case uncons byteStream of + Nothing -> Empty + Just (firstByte, remainingBytes) -> scanl function firstByte remainingBytes + +-- | 'scanr' is similar to 'foldr', but returns a list of successive +-- reduced values from the right. +-- +-- > scanr f z [..., x{n-1}, xn] == [..., x{n-1} `f` (xn `f` z), xn `f` z, z] +-- +-- Note that +-- +-- > head (scanr f z xs) == foldr f z xs +-- > last (scanr f z xs) == z +-- +scanr + :: (Word8 -> Word8 -> Word8) + -- ^ element -> accumulator -> new accumulator + -> Word8 + -- ^ starting value of accumulator + -> ByteString + -- ^ input of length n + -> ByteString + -- ^ output of length n+1 +scanr function = fmap (uncurry cons) . mapAccumR (\x y -> (function y x, x)) + +-- | 'scanr1' is a variant of 'scanr' that has no starting value argument. +scanr1 :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString +scanr1 function byteStream = case unsnoc byteStream of + Nothing -> Empty + Just (initialBytes, lastByte) -> scanr function lastByte initialBytes + -- --------------------------------------------------------------------- -- Unfolds and replicates diff --git a/Data/ByteString/Lazy/Char8.hs b/Data/ByteString/Lazy/Char8.hs index 034fd1eca..30a8888a0 100644 --- a/Data/ByteString/Lazy/Char8.hs +++ b/Data/ByteString/Lazy/Char8.hs @@ -70,7 +70,9 @@ module Data.ByteString.Lazy.Char8 ( foldl1, foldl1', foldr, + foldr', foldr1, + foldr1', -- ** Special folds concat, @@ -84,9 +86,9 @@ module Data.ByteString.Lazy.Char8 ( -- * Building ByteStrings -- ** Scans scanl, --- scanl1, --- scanr, --- scanr1, + scanl1, + scanr, + scanr1, -- ** Accumulating maps mapAccumL, @@ -238,7 +240,7 @@ import Foreign.Storable (peek) import Prelude hiding (reverse,head,tail,last,init,null,length,map,lines,foldl,foldr,unlines ,concat,any,take,drop,splitAt,takeWhile,dropWhile,span,break,elem,filter - ,unwords,words,maximum,minimum,all,concatMap,scanl,scanl1,foldl1,foldr1 + ,unwords,words,maximum,minimum,all,concatMap,scanl,scanl1,scanr,scanr1,foldl1,foldr1 ,readFile,writeFile,appendFile,replicate,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem,repeat,iterate,interact,cycle) @@ -347,6 +349,10 @@ foldr :: (Char -> a -> a) -> a -> ByteString -> a foldr f = L.foldr (f . w2c) {-# INLINE foldr #-} +-- | 'foldr'' is like 'foldr', but strict in the accumulator. +foldr' :: (Char -> a -> a) -> a -> ByteString -> a +foldr' f = L.foldr' (f . w2c) + -- | 'foldl1' is a variant of 'foldl' that has no starting value -- argument, and thus must be applied to non-empty 'ByteString's. foldl1 :: (Char -> Char -> Char) -> ByteString -> Char @@ -363,6 +369,10 @@ foldr1 :: (Char -> Char -> Char) -> ByteString -> Char foldr1 f ps = w2c (L.foldr1 (\x y -> c2w (f (w2c x) (w2c y))) ps) {-# INLINE foldr1 #-} +-- | 'foldr1'' is like 'foldr1', but strict in the accumulator. +foldr1' :: (Char -> Char -> Char) -> ByteString -> Char +foldr1' f ps = w2c (L.foldr1' (\x y -> c2w (f (w2c x) (w2c y))) ps) + -- | Map a function over a 'ByteString' and concatenate the results concatMap :: (Char -> ByteString) -> ByteString -> ByteString concatMap f = L.concatMap (f . w2c) @@ -404,6 +414,40 @@ minimum = w2c . L.minimum scanl :: (Char -> Char -> Char) -> Char -> ByteString -> ByteString scanl f z = L.scanl (\a b -> c2w (f (w2c a) (w2c b))) (c2w z) +-- | 'scanl1' is a variant of 'scanl' that has no starting value argument. +-- +-- > scanl1 f [x1, x2, ...] == [x1, x1 `f` x2, ...] +scanl1 :: (Char -> Char -> Char) -> ByteString -> ByteString +scanl1 f = L.scanl1 f' + where f' accumulator value = c2w (f (w2c accumulator) (w2c value)) + +-- | 'scanr' is similar to 'foldr', but returns a list of successive +-- reduced values from the right. +-- +-- > scanr f z [..., x{n-1}, xn] == [..., x{n-1} `f` (xn `f` z), xn `f` z, z] +-- +-- Note that +-- +-- > head (scanr f z xs) == foldr f z xs +-- > last (scanr f z xs) == z +-- +scanr + :: (Char -> Char -> Char) + -- ^ element -> accumulator -> new accumulator + -> Char + -- ^ starting value of accumulator + -> ByteString + -- ^ input of length n + -> ByteString + -- ^ output of length n+1 +scanr f = L.scanr f' . c2w + where f' accumulator value = c2w (f (w2c accumulator) (w2c value)) + +-- | 'scanr1' is a variant of 'scanr' that has no starting value argument. +scanr1 :: (Char -> Char -> Char) -> ByteString -> ByteString +scanr1 f = L.scanr1 f' + where f' accumulator value = c2w (f (w2c accumulator) (w2c value)) + -- | The 'mapAccumL' function behaves like a combination of 'map' and -- 'foldl'; it applies a function to each element of a ByteString, -- passing an accumulating parameter from left to right, and returning a diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs index 4234d0473..c6572f6bb 100644 --- a/bench/BenchAll.hs +++ b/bench/BenchAll.hs @@ -221,6 +221,9 @@ sortInputs = map (`S.take` S.pack [122, 121 .. 32]) [10..25] foldInputs :: [S.ByteString] foldInputs = map (\k -> S.pack $ if k <= 6 then take (2 ^ k) [32..95] else concat (replicate (2 ^ (k - 6)) [32..95])) [0..16] +foldInputsLazy :: [L.ByteString] +foldInputsLazy = map (\k -> L.pack $ if k <= 6 then take (2 ^ k) [32..95] else concat (replicate (2 ^ (k - 6)) [32..95])) [0..16] + zeroes :: L.ByteString zeroes = L.replicate 10000 0 @@ -401,22 +404,43 @@ main = do , bench "one huge word" $ nf S8.words byteStringData ] , bgroup "folds" - [ bgroup "foldl'" $ map (\s -> bench (show $ S.length s) $ - nf (S.foldl' (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs - , bgroup "foldr'" $ map (\s -> bench (show $ S.length s) $ - nf (S.foldr' (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs - , bgroup "unfoldrN" $ map (\s -> bench (show $ S.length s) $ - nf (S.unfoldrN (S.length s) (\a -> Just (a, a + 1))) 0) foldInputs - , bgroup "mapAccumL" $ map (\s -> bench (show $ S.length s) $ - nf (S.mapAccumL (\acc x -> (acc + fromIntegral x, succ x)) (0 :: Int)) s) foldInputs - , bgroup "mapAccumR" $ map (\s -> bench (show $ S.length s) $ - nf (S.mapAccumR (\acc x -> (fromIntegral x + acc, succ x)) (0 :: Int)) s) foldInputs - , bgroup "scanl" $ map (\s -> bench (show $ S.length s) $ - nf (S.scanl (+) 0) s) foldInputs - , bgroup "scanr" $ map (\s -> bench (show $ S.length s) $ - nf (S.scanr (+) 0) s) foldInputs - , bgroup "filter" $ map (\s -> bench (show $ S.length s) $ - nf (S.filter odd) s) foldInputs + [ bgroup "strict" + [ bgroup "foldl'" $ map (\s -> bench (show $ S.length s) $ + nf (S.foldl' (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs + , bgroup "foldr'" $ map (\s -> bench (show $ S.length s) $ + nf (S.foldr' (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs + , bgroup "foldr1'" $ map (\s -> bench (show $ S.length s) $ + nf (S.foldr1' (\x acc -> fromIntegral x + acc)) s) foldInputs + , bgroup "unfoldrN" $ map (\s -> bench (show $ S.length s) $ + nf (S.unfoldrN (S.length s) (\a -> Just (a, a + 1))) 0) foldInputs + , bgroup "mapAccumL" $ map (\s -> bench (show $ S.length s) $ + nf (S.mapAccumL (\acc x -> (acc + fromIntegral x, succ x)) (0 :: Int)) s) foldInputs + , bgroup "mapAccumR" $ map (\s -> bench (show $ S.length s) $ + nf (S.mapAccumR (\acc x -> (fromIntegral x + acc, succ x)) (0 :: Int)) s) foldInputs + , bgroup "scanl" $ map (\s -> bench (show $ S.length s) $ + nf (S.scanl (+) 0) s) foldInputs + , bgroup "scanr" $ map (\s -> bench (show $ S.length s) $ + nf (S.scanr (+) 0) s) foldInputs + , bgroup "filter" $ map (\s -> bench (show $ S.length s) $ + nf (S.filter odd) s) foldInputs + ] + , bgroup "lazy" + [ bgroup "foldl'" $ map (\s -> bench (show $ L.length s) $ + nf (L.foldl' (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputsLazy + , bgroup "foldr'" $ map (\s -> bench (show $ L.length s) $ + nf (L.foldr' (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputsLazy + , bgroup "foldr1'" $ map (\s -> bench (show $ L.length s) $ + nf (L.foldr1' (\x acc -> fromIntegral x + acc)) s) foldInputsLazy + , bgroup "mapAccumL" $ map (\s -> bench (show $ L.length s) $ + nf (L.mapAccumL (\acc x -> (acc + fromIntegral x, succ x)) (0 :: Int)) s) foldInputsLazy + , bgroup "mapAccumR" $ map (\s -> bench (show $ L.length s) $ + nf (L.mapAccumR (\acc x -> (fromIntegral x + acc, succ x)) (0 :: Int)) s) foldInputsLazy + , bgroup "scanl" $ map (\s -> bench (show $ L.length s) $ + nf (L.scanl (+) 0) s) foldInputsLazy + , bgroup "scanr" $ map (\s -> bench (show $ L.length s) $ + nf (L.scanr (+) 0) s) foldInputsLazy + ] + ] , bgroup "findIndexOrLength" [ bench "takeWhile" $ nf (L.takeWhile even) zeroes diff --git a/tests/Properties.hs b/tests/Properties.hs index 9d1d1c6b4..27004f8e5 100644 --- a/tests/Properties.hs +++ b/tests/Properties.hs @@ -15,11 +15,11 @@ import Control.Concurrent import Control.Exception import System.Posix.Internals (c_unlink) +import qualified Data.List as List import Data.Char import Data.Word import Data.Maybe import Data.Int (Int64) -import Data.Monoid import Data.Semigroup import GHC.Exts (Int(..), newPinnedByteArray#, unsafeFreezeByteArray#) import GHC.ST (ST(..), runST) @@ -463,6 +463,12 @@ short_tests = , testProperty "pinned" prop_short_pinned ] +------------------------------------------------------------------------ +-- Strictness checks. + +explosiveTail :: L.ByteString -> L.ByteString +explosiveTail = (`L.append` error "Tail of this byte string is undefined!") + ------------------------------------------------------------------------ -- The entry point @@ -475,6 +481,7 @@ main = defaultMain $ testGroup "All" , testGroup "Misc" misc_tests , testGroup "IO" io_tests , testGroup "Short" short_tests + , testGroup "Strictness" strictness_checks ] io_tests = @@ -535,5 +542,36 @@ misc_tests = , testProperty "readIntegerUnsafe" prop_readIntegerUnsafe ] +strictness_checks = + [ testGroup "Lazy Word8" + [ testProperty "foldr is lazy" $ \ xs -> + List.genericTake (L.length xs) (L.foldr (:) [ ] (explosiveTail xs)) === L.unpack xs + , testProperty "foldr' is strict" $ expectFailure $ \ xs ys -> + List.genericTake (L.length xs) (L.foldr' (:) [ ] (explosiveTail (xs <> ys))) === L.unpack xs + , testProperty "foldr1 is lazy" $ \ xs -> L.length xs > 0 ==> + L.foldr1 const (explosiveTail (xs <> L.singleton 1)) === L.head xs + , testProperty "foldr1' is strict" $ expectFailure $ \ xs ys -> L.length xs > 0 ==> + L.foldr1' const (explosiveTail (xs <> L.singleton 1 <> ys)) === L.head xs + , testProperty "scanl is lazy" $ \ xs -> + L.take (L.length xs + 1) (L.scanl (+) 0 (explosiveTail (xs <> L.singleton 1))) === (L.pack . fmap (L.foldr (+) 0) . L.inits) xs + , testProperty "scanl1 is lazy" $ \ xs -> L.length xs > 0 ==> + L.take (L.length xs) (L.scanl1 (+) (explosiveTail (xs <> L.singleton 1))) === (L.pack . fmap (L.foldr1 (+)) . tail . L.inits) xs + ] + , testGroup "Lazy Char" + [ testProperty "foldr is lazy" $ \ xs -> + List.genericTake (D.length xs) (D.foldr (:) [ ] (explosiveTail xs)) === D.unpack xs + , testProperty "foldr' is strict" $ expectFailure $ \ xs ys -> + List.genericTake (D.length xs) (D.foldr' (:) [ ] (explosiveTail (xs <> ys))) === D.unpack xs + , testProperty "foldr1 is lazy" $ \ xs -> D.length xs > 0 ==> + D.foldr1 const (explosiveTail (xs <> D.singleton 'x')) === D.head xs + , testProperty "foldr1' is strict" $ expectFailure $ \ xs ys -> D.length xs > 0 ==> + D.foldr1' const (explosiveTail (xs <> D.singleton 'x' <> ys)) === D.head xs + , testProperty "scanl is lazy" $ \ xs -> let char1 +. char2 = toEnum (fromEnum char1 + fromEnum char2) in + D.take (D.length xs + 1) (D.scanl (+.) '\NUL' (explosiveTail (xs <> D.singleton '\SOH'))) === (D.pack . fmap (D.foldr (+.) '\NUL') . D.inits) xs + , testProperty "scanl1 is lazy" $ \ xs -> D.length xs > 0 ==> let char1 +. char2 = toEnum (fromEnum char1 + fromEnum char2) in + D.take (D.length xs) (D.scanl1 (+.) (explosiveTail (xs <> D.singleton '\SOH'))) === (D.pack . fmap (D.foldr1 (+.)) . tail . D.inits) xs + ] + ] + removeFile :: String -> IO () removeFile fn = void $ withCString fn c_unlink diff --git a/tests/Properties/ByteString.hs b/tests/Properties/ByteString.hs index a16558596..c014cb02a 100644 --- a/tests/Properties/ByteString.hs +++ b/tests/Properties/ByteString.hs @@ -410,10 +410,8 @@ tests = \f (toElem -> c) x -> B.foldl' ((toElem .) . f) c x === foldl' ((toElem .) . f) c (B.unpack x) , testProperty "foldr" $ \f (toElem -> c) x -> B.foldr ((toElem .) . f) c x === foldr ((toElem .) . f) c (B.unpack x) -#ifndef BYTESTRING_LAZY , testProperty "foldr'" $ \f (toElem -> c) x -> B.foldr' ((toElem .) . f) c x === foldr' ((toElem .) . f) c (B.unpack x) -#endif , testProperty "foldl cons" $ \x -> B.foldl (flip B.cons) B.empty x === B.reverse x @@ -432,10 +430,8 @@ tests = \f x -> not (B.null x) ==> B.foldl1' ((toElem .) . f) x === List.foldl1' ((toElem .) . f) (B.unpack x) , testProperty "foldr1" $ \f x -> not (B.null x) ==> B.foldr1 ((toElem .) . f) x === foldr1 ((toElem .) . f) (B.unpack x) -#ifndef BYTESTRING_LAZY , testProperty "foldr1'" $ -- there is not Data.List.foldr1' \f x -> not (B.null x) ==> B.foldr1' ((toElem .) . f) x === foldr1 ((toElem .) . f) (B.unpack x) -#endif , testProperty "foldl1 const" $ \x -> not (B.null x) ==> B.foldl1 const x === B.head x @@ -455,7 +451,6 @@ tests = , testProperty "scanl foldl" $ \f (toElem -> c) x -> not (B.null x) ==> B.last (B.scanl ((toElem .) . f) c x) === B.foldl ((toElem .) . f) c x -#ifndef BYTESTRING_LAZY , testProperty "scanr" $ \f (toElem -> c) x -> B.unpack (B.scanr ((toElem .) . f) c x) === scanr ((toElem .) . f) c (B.unpack x) , testProperty "scanl1" $ @@ -466,6 +461,8 @@ tests = \f x -> B.unpack (B.scanr1 ((toElem .) . f) x) === scanr1 ((toElem .) . f) (B.unpack x) , testProperty "scanr1 empty" $ \f -> B.scanr1 f B.empty === B.empty + +#ifndef BYTESTRING_LAZY , testProperty "sort" $ \x -> B.unpack (B.sort x) === List.sort (B.unpack x) #endif