Permalink
Browse files

ENH Check for overly long lines

This patch adds a control for overly long input lines, which can
otherwise use unbounded amounts of memory.
  • Loading branch information...
luispedro committed Feb 7, 2017
1 parent dad8b88 commit 11f0a43f95ed930b9b8062abc4c3b2948d67d595
Showing with 38 additions and 10 deletions.
  1. +3 −3 NGLess/Data/FastQ.hs
  2. +3 −3 NGLess/FileOrStream.hs
  3. +1 −1 NGLess/Interpret.hs
  4. +31 −3 NGLess/Utils/Conduit.hs
@@ -143,14 +143,14 @@ fqDecodeC enc = groupC 4 =$= CL.mapM parseShortReads
fqDecode :: FastQEncoding -> BL.ByteString -> NGLess [ShortRead]
fqDecode enc s = C.runConduit $
CL.sourceList (BL.toChunks s)
=$= linesC
=$= linesCBounded
=$= fqDecodeC enc
=$= CL.consume
statsFromFastQ :: FilePath -> FastQEncoding -> NGLessIO FQStatistics
statsFromFastQ fp enc =
conduitPossiblyCompressedFile fp
=$= linesC
=$= linesCBounded
=$= getPairedLines
$$ fqStatsC
where
@@ -171,7 +171,7 @@ encodingFor fp = do
minLc _ = throwDataError ("Malformed FASTQ file: '" ++ fp ++ "': number of lines is not a multiple of 4")
(c,m) <- conduitPossiblyCompressedFile fp
=$= linesC
=$= linesCBounded
=$= groupC 4
=$= CL.isolate 100
=$= CL.mapM minLc
@@ -1,4 +1,4 @@
{- Copyright 2016 NGLess Authors
{- Copyright 2016-2017 NGLess Authors
- License: MIT
-}
@@ -43,8 +43,8 @@ asFile (Stream fp istream) = do
asStream :: FileOrStream -> (FilePath, C.Source NGLessIO ByteLine)
asStream (Stream fp istream) = (fp, istream)
asStream (File fp) = (fp, C.sourceFile fp =$= linesC)
asStream (File fp) = (fp, C.sourceFile fp =$= linesCBounded)
asSamStream (File fname) = (fname, samBamConduit fname =$= linesC)
asSamStream (File fname) = (fname, samBamConduit fname =$= linesCBounded)
asSamStream (Stream fname istream) = (fname, istream)
@@ -363,7 +363,7 @@ executePreprocess (NGOReadSet name rs) args (Block [Variable var] block) = do
let asSource "" _ = C.yieldMany []
asSource fp q =
let input = conduitPossiblyCompressedFile fp
=$= linesC
=$= linesCBounded
=$= fqDecodeC enc
=$= C.conduitVector 4096
in if qcInput
@@ -1,6 +1,6 @@
{- Copyright 2013-2016 NGLess Authors
{- Copyright 2013-2017 NGLess Authors
- License: MIT -}
{-# LANGUAGE ScopedTypeVariables, CPP #-}
{-# LANGUAGE ScopedTypeVariables, FlexibleContexts, CPP #-}
module Utils.Conduit
( ByteLine(..)
@@ -9,6 +9,7 @@ module Utils.Conduit
, asyncMapC
, asyncMapEitherC
, linesC
, linesCBounded
, groupC
, awaitJust
, asyncGzipTo
@@ -38,7 +39,7 @@ import Data.Conduit ((=$=), ($$))
import qualified Data.Sequence as Seq
import Data.Sequence ((|>), ViewL(..))
import Control.Monad (unless, forM_)
import Control.Monad (unless, forM_, when)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Error.Class (MonadError(..))
import Control.Monad.Trans.Resource (MonadResource)
@@ -47,9 +48,24 @@ import Control.DeepSeq
import System.IO
import Data.List (isSuffixOf)
import NGLess.NGError
-- | This just signals that a "line" is expected.
newtype ByteLine = ByteLine { unwrapByteLine :: B.ByteString }
linesBounded maxLineSize = continue 0 []
where
continue n toks
| n > maxLineSize = throwDataError ("Line too long (longer than " ++ show maxLineSize ++ " characters.")
| otherwise = C.await >>= \case
Nothing -> when (n > 0) $ C.yield (B.concat $ reverse toks)
Just tok -> emit n toks tok
emit n toks tok = case B.elemIndex 10 tok of
Nothing -> continue (n + B.length tok) (tok:toks)
Just ix -> let (start,rest) = B.splitAt ix tok in do
C.yield (B.concat $ reverse (start:toks))
emit 0 [] (B.tail rest)
linesC :: (Monad m) => C.Conduit B.ByteString m ByteLine
linesC =
CB.lines
@@ -62,6 +78,18 @@ linesC =
=$= CL.map ByteLine
{-# INLINE linesC #-}
linesCBounded :: (MonadError NGError m) => C.Conduit B.ByteString m ByteLine
linesCBounded =
linesBounded 8192
#ifdef WINDOWS
=$= CL.map (\line ->
if not (B.null line) && B.index line (B.length line - 1) == 13
then B.take (B.length line - 1) line
else line)
#endif
=$= CL.map ByteLine
byteLineSinkHandle :: (MonadIO m) => Handle -> C.Sink ByteLine m ()
byteLineSinkHandle h = CL.map unwrapByteLine =$= C.unlinesAscii =$= C.sinkHandle h

0 comments on commit 11f0a43

Please sign in to comment.