Skip to content

Commit

Permalink
Merge pull request #3 from msakai/fix-decoding-long-chunked-string
Browse files Browse the repository at this point in the history
Fix memory corruption bug
  • Loading branch information
msakai committed May 7, 2021
2 parents b8a126a + d0a2413 commit 5e693b0
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 12 deletions.
1 change: 1 addition & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ tests:
- tasty-hunit
- tasty-quickcheck
- tasty-th
- deepseq
22 changes: 10 additions & 12 deletions src/Data/ByteString/Lazy/Encoding/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ decodeWith Enc.TextEncoding{ .. } inBufSize outBufSize b = TL.fromChunks $ unsaf
moveBytes (q `plusPtr` bufR buf) p (bufferAvailable buf)
go (B.drop (bufferAvailable buf) b : bs) buf{ bufR = bufR buf + bufferAvailable buf }

flushOutBuf :: CharBuffer -> Ptr Word16 -> IO ([T.Text], CharBuffer)
flushOutBuf :: CharBuffer -> ForeignPtr Word16 -> IO ([T.Text], CharBuffer)
flushOutBuf buf workspace
| isEmptyBuffer buf = return ([], buf{ bufL=0, bufR=0 })
| charSize==2 = do
Expand All @@ -136,7 +136,8 @@ decodeWith Enc.TextEncoding{ .. } inBufSize outBufSize b = TL.fromChunks $ unsaf
p' = castPtr p
t <- T.fromPtr (p' `plusPtr` bufL buf) (fromIntegral (bufferElems buf))
return ([t], buf{ bufL=0, bufR=0 })
| otherwise = do
| otherwise =
withForeignPtr workspace $ \workspace' ->
withBuffer buf $ \p -> do
let p' :: Ptr Char
p' = castPtr p
Expand All @@ -145,18 +146,18 @@ decodeWith Enc.TextEncoding{ .. } inBufSize outBufSize b = TL.fromChunks $ unsaf
| otherwise = do
c <- liftM fromEnum $ peekElemOff p' i
if c < 0x10000 then do
pokeElemOff workspace j (fromIntegral c)
pokeElemOff workspace' j (fromIntegral c)
f (i+1) (j+1)
else do
let c' = c - 0x10000
pokeElemOff workspace j (fromIntegral (c' `div` 0x400 + 0xd800))
pokeElemOff workspace (j+1) (fromIntegral (c' `mod` 0x400 + 0xdc00))
pokeElemOff workspace' j (fromIntegral (c' `div` 0x400 + 0xd800))
pokeElemOff workspace' (j+1) (fromIntegral (c' `mod` 0x400 + 0xdc00))
f (i+1) (j+2)
n <- f (bufL buf) 0
t <- T.fromPtr workspace (fromIntegral n)
t <- T.fromPtr workspace' (fromIntegral n)
return ([t], buf{ bufL=0, bufR=0 })

loop :: [B.ByteString] -> Buffer Word8 -> CharBuffer -> Ptr Word16 -> IO [T.Text]
loop :: [B.ByteString] -> Buffer Word8 -> CharBuffer -> ForeignPtr Word16 -> IO [T.Text]
loop bs inBuf outBuf workspace = do
(bs', inBuf1) <- fillInBuf bs inBuf
if isEmptyBuffer inBuf1 then do
Expand Down Expand Up @@ -190,8 +191,5 @@ decodeWith Enc.TextEncoding{ .. } inBufSize outBufSize b = TL.fromChunks $ unsaf

inBuf <- newByteBuffer inBufSize ReadBuffer
outBuf <- newCharBuffer outBufSize WriteBuffer
if charSize == 2 then do
loop (BL.toChunks b) inBuf outBuf nullPtr
else do
allocaArray (outBufSize * 2) $ \workspace ->
loop (BL.toChunks b) inBuf outBuf workspace
workspace <- if charSize == 2 then newForeignPtr_ nullPtr else mallocForeignPtrArray (outBufSize * 2)
loop (BL.toChunks b) inBuf outBuf workspace
10 changes: 10 additions & 0 deletions test/Base.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
{-# LANGUAGE TemplateHaskell #-}
module Base (baseTestGroup) where

import Control.DeepSeq
import qualified Data.ByteString.Lazy.Encoding as Enc
import qualified Data.ByteString.Lazy.Encoding.Internal as Enc
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString as B
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

import Test.Tasty
import Test.Tasty.QuickCheck
import Test.Tasty.HUnit
import Test.Tasty.TH

checkEncode :: Enc.TextEncoding -> (TL.Text -> BL.ByteString) -> Property
Expand Down Expand Up @@ -62,6 +65,13 @@ prop_encode_decode_utf32le = checkRoundTrip Enc.utf32le
prop_encode_decode_utf32be :: Property
prop_encode_decode_utf32be = checkRoundTrip Enc.utf32be

case_decoding_long_chunked_string :: IO ()
case_decoding_long_chunked_string = do
let ls = [32752, 32752, 32752, 32752, 32752, 32752, 32752, 32752, 32752, 18193]
bs = BL.fromChunks [B.pack (replicate l (fromIntegral (fromEnum 'a'))) | l <- ls]
let t = Enc.decode Enc.utf8 bs
deepseq t $ return ()

-- ---------------------------------------------------------------------

baseTestGroup :: TestTree
Expand Down

0 comments on commit 5e693b0

Please sign in to comment.