From d7d87af4d6ad67db18f9ba940fd0f9d9b7ba96bb Mon Sep 17 00:00:00 2001 From: Masahiro Sakai Date: Fri, 7 May 2021 12:57:04 +0900 Subject: [PATCH 1/2] add a test case for decoding a long chunked string --- package.yaml | 1 + test/Base.hs | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/package.yaml b/package.yaml index 59f0218..9040996 100644 --- a/package.yaml +++ b/package.yaml @@ -42,3 +42,4 @@ tests: - tasty-hunit - tasty-quickcheck - tasty-th + - deepseq diff --git a/test/Base.hs b/test/Base.hs index 91e5222..bd71b75 100644 --- a/test/Base.hs +++ b/test/Base.hs @@ -4,14 +4,17 @@ {-# LANGUAGE TemplateHaskell #-} module Base (baseTestGroup) where +import Control.DeepSeq import qualified Data.ByteString.Lazy.Encoding as Enc import qualified Data.ByteString.Lazy.Encoding.Internal as Enc import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString as B import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL import Test.Tasty import Test.Tasty.QuickCheck +import Test.Tasty.HUnit import Test.Tasty.TH checkEncode :: Enc.TextEncoding -> (TL.Text -> BL.ByteString) -> Property @@ -62,6 +65,13 @@ prop_encode_decode_utf32le = checkRoundTrip Enc.utf32le prop_encode_decode_utf32be :: Property prop_encode_decode_utf32be = checkRoundTrip Enc.utf32be +case_decoding_long_chunked_string :: IO () +case_decoding_long_chunked_string = do + let ls = [32752, 32752, 32752, 32752, 32752, 32752, 32752, 32752, 32752, 18193] + bs = BL.fromChunks [B.pack (replicate l (fromIntegral (fromEnum 'a'))) | l <- ls] + let t = Enc.decode Enc.utf8 bs + deepseq t $ return () + -- --------------------------------------------------------------------- baseTestGroup :: TestTree From d0a2413b83ee9c3504f696d92cc4609a1c3db484 Mon Sep 17 00:00:00 2001 From: Masahiro Sakai Date: Sat, 8 May 2021 00:26:15 +0900 Subject: [PATCH 2/2] fix memory corruption bug The computation delayed by unsafeInterleaveIO uses a memory allocated by allocaArray outside of its scope. --- src/Data/ByteString/Lazy/Encoding/Internal.hs | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/Data/ByteString/Lazy/Encoding/Internal.hs b/src/Data/ByteString/Lazy/Encoding/Internal.hs index 482e671..4d89674 100644 --- a/src/Data/ByteString/Lazy/Encoding/Internal.hs +++ b/src/Data/ByteString/Lazy/Encoding/Internal.hs @@ -127,7 +127,7 @@ decodeWith Enc.TextEncoding{ .. } inBufSize outBufSize b = TL.fromChunks $ unsaf moveBytes (q `plusPtr` bufR buf) p (bufferAvailable buf) go (B.drop (bufferAvailable buf) b : bs) buf{ bufR = bufR buf + bufferAvailable buf } - flushOutBuf :: CharBuffer -> Ptr Word16 -> IO ([T.Text], CharBuffer) + flushOutBuf :: CharBuffer -> ForeignPtr Word16 -> IO ([T.Text], CharBuffer) flushOutBuf buf workspace | isEmptyBuffer buf = return ([], buf{ bufL=0, bufR=0 }) | charSize==2 = do @@ -136,7 +136,8 @@ decodeWith Enc.TextEncoding{ .. } inBufSize outBufSize b = TL.fromChunks $ unsaf p' = castPtr p t <- T.fromPtr (p' `plusPtr` bufL buf) (fromIntegral (bufferElems buf)) return ([t], buf{ bufL=0, bufR=0 }) - | otherwise = do + | otherwise = + withForeignPtr workspace $ \workspace' -> withBuffer buf $ \p -> do let p' :: Ptr Char p' = castPtr p @@ -145,18 +146,18 @@ decodeWith Enc.TextEncoding{ .. } inBufSize outBufSize b = TL.fromChunks $ unsaf | otherwise = do c <- liftM fromEnum $ peekElemOff p' i if c < 0x10000 then do - pokeElemOff workspace j (fromIntegral c) + pokeElemOff workspace' j (fromIntegral c) f (i+1) (j+1) else do let c' = c - 0x10000 - pokeElemOff workspace j (fromIntegral (c' `div` 0x400 + 0xd800)) - pokeElemOff workspace (j+1) (fromIntegral (c' `mod` 0x400 + 0xdc00)) + pokeElemOff workspace' j (fromIntegral (c' `div` 0x400 + 0xd800)) + pokeElemOff workspace' (j+1) (fromIntegral (c' `mod` 0x400 + 0xdc00)) f (i+1) (j+2) n <- f (bufL buf) 0 - t <- T.fromPtr workspace (fromIntegral n) + t <- T.fromPtr workspace' (fromIntegral n) return ([t], buf{ bufL=0, bufR=0 }) - loop :: [B.ByteString] -> Buffer Word8 -> CharBuffer -> Ptr Word16 -> IO [T.Text] + loop :: [B.ByteString] -> Buffer Word8 -> CharBuffer -> ForeignPtr Word16 -> IO [T.Text] loop bs inBuf outBuf workspace = do (bs', inBuf1) <- fillInBuf bs inBuf if isEmptyBuffer inBuf1 then do @@ -190,8 +191,5 @@ decodeWith Enc.TextEncoding{ .. } inBufSize outBufSize b = TL.fromChunks $ unsaf inBuf <- newByteBuffer inBufSize ReadBuffer outBuf <- newCharBuffer outBufSize WriteBuffer - if charSize == 2 then do - loop (BL.toChunks b) inBuf outBuf nullPtr - else do - allocaArray (outBufSize * 2) $ \workspace -> - loop (BL.toChunks b) inBuf outBuf workspace + workspace <- if charSize == 2 then newForeignPtr_ nullPtr else mallocForeignPtrArray (outBufSize * 2) + loop (BL.toChunks b) inBuf outBuf workspace