Skip to content

Commit

Permalink
ENH Close FDs after FQ encoding detection
Browse files Browse the repository at this point in the history
Previously, file descriptors were left open until NGLess exited.
  • Loading branch information
luispedro committed Jan 7, 2019
1 parent b22189e commit 96b2be3
Show file tree
Hide file tree
Showing 15 changed files with 44 additions and 29 deletions.
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Version 0.10.0+
* Fix bug with orf_find & prots_out argument
* Call bwa/minimap2 with interleaved fastq files
* Add --verbose flag to check-install mode
* Avoid leaving open file descriptors after FastQ encoding detection

Version 0.10.0 2018-11-12 by luispedro
* Fix to lock1's return value when used with paths (#68 - reopen)
Expand Down
1 change: 1 addition & 0 deletions NGLess/Data/FastQ.hs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import Control.DeepSeq (NFData(..))
import Data.Conduit ((.|))
import Data.Monoid ((<>))
import Control.Monad (forM_)
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)
import Control.Monad.Except
import Control.Monad.Trans.Resource
import Control.Exception
Expand Down
9 changes: 4 additions & 5 deletions NGLess/ExternalModules.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{- Copyright 2015-2018 NGLess Authors
{- Copyright 2015-2019 NGLess Authors
- License: MIT
-}

Expand All @@ -15,6 +15,7 @@ import qualified Data.List.Utils as LU
import qualified Data.Conduit as C
import qualified Data.Conduit.Combinators as CC
import Data.Conduit ((.|))
import Data.Conduit.Algorithms.Async (withPossiblyCompressedFile)
import Control.Monad.Extra (whenJust)
import GHC.Conc (getNumCapabilities)
import Data.Yaml ((.!=), (.:?), (.:))
Expand All @@ -39,7 +40,6 @@ import NGLess.NGLEnvironment
import FileManagement
import Utils.Samtools
import Configuration
import Utils.Conduit
import FileOrStream
import Utils.Suggestion
import Utils.Utils
Expand Down Expand Up @@ -376,9 +376,8 @@ asBamFile fname

uncompressFile :: FilePath -> NGLessIO FilePath
uncompressFile f = makeNGLTempFile f "uncompress_" (takeBaseName f) $ \hout ->
C.runConduit $
conduitPossiblyCompressedFile f
.| CC.sinkHandle hout
withPossiblyCompressedFile f $ \src ->
C.runConduit (src .| CC.sinkHandle hout)

argsArguments :: Command -> KwArgsValues -> NGLessIO [String]
argsArguments cmd args = concat <$> forM (additional cmd) a1
Expand Down
3 changes: 2 additions & 1 deletion NGLess/Interpret.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{- Copyright 2013-2018 NGLess Authors
{- Copyright 2013-2019 NGLess Authors
- License: MIT
-}
{-# LANGUAGE FlexibleContexts, CPP #-}
Expand Down Expand Up @@ -68,6 +68,7 @@ import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.List as CL
import qualified Data.Conduit as C
import Data.Conduit ((.|))
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)
import qualified Control.Concurrent.Async as A
import qualified Control.Concurrent.STM.TBMQueue as TQ
import qualified Data.Conduit.TQueue as CA
Expand Down
3 changes: 2 additions & 1 deletion NGLess/Interpretation/Count.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{- Copyright 2015-2018 NGLess Authors
{- Copyright 2015-2019 NGLess Authors
- License: MIT
-}
{-# LANGUAGE FlexibleContexts, CPP #-}
Expand Down Expand Up @@ -39,6 +39,7 @@ import qualified Data.Conduit as C
import qualified Data.Conduit.Combinators as CC
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Algorithms.Utils as CAlg
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)
import Data.Conduit ((.|))
import qualified Data.Strict.Tuple as TU
import Data.Strict.Tuple (Pair(..))
Expand Down
36 changes: 21 additions & 15 deletions NGLess/Interpretation/FastQ.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ import qualified Data.Conduit.List as CL
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Control.Concurrent.Async as A
import Data.Conduit ((.|))
import Data.Conduit.Algorithms.Async (withPossiblyCompressedFile)
import Control.Monad.Trans.Resource (runResourceT)
import Control.Exception (try)
import Control.Monad.Except
import Data.Conduit ((.|))
import Data.Maybe
import Data.Word

Expand Down Expand Up @@ -65,18 +67,21 @@ encodingFor fp = do
_ -> throwDataError ("Malformed file '" ++ fp ++ "': number of lines is not a multiple of 4.")


C.runConduit $
conduitPossiblyCompressedFile fp
.| linesC
.| CL.chunksOf 4
.| encodingC 255 0
withPossiblyCompressedFile fp $ \src ->
C.runConduit $
src
.| linesC
.| CL.chunksOf 4
.| encodingC 255 0

-- | Checks if file has no content
--
-- Note that this is more than checking if the file is empty: a compressed file
-- with no content will not be empty.
checkNoContent fp = C.runConduitRes $
conduitPossiblyCompressedFile fp
checkNoContent :: FilePath -> NGLessIO Bool
checkNoContent fp = runResourceT $ withPossiblyCompressedFile fp $ \src ->
C.runConduit $
src
.| linesC
.| CL.isolate 1
.| CL.fold (\_ _ -> False) True
Expand All @@ -93,13 +98,14 @@ drop10 = loop (0 :: Int)

performSubsample :: FilePath -> Handle -> IO ()
performSubsample f h = do
C.runConduitRes $
conduitPossiblyCompressedFile f
.| CB.lines
.| drop10
.| C.take 100000
.| C.unlinesAscii
.| asyncGzipTo h
runResourceT $ withPossiblyCompressedFile f $ \src ->
C.runConduit $
src
.| CB.lines
.| drop10
.| C.take 100000
.| C.unlinesAscii
.| asyncGzipTo h
hClose h

optionalSubsample :: FilePath -> NGLessIO FilePath
Expand Down
1 change: 1 addition & 0 deletions NGLess/Interpretation/Map.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as CC
import qualified Data.Conduit as C
import Data.Conduit ((.|))
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)
import Control.Monad.Extra (unlessM)
import Data.List (sort)

Expand Down
5 changes: 3 additions & 2 deletions NGLess/Interpretation/Unique.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{- Copyright 2013-2018 NGLess Authors
{- Copyright 2013-2019 NGLess Authors
- License: MIT
-}

Expand All @@ -21,7 +21,8 @@ import qualified Data.ByteString as B
import qualified Data.Conduit.Combinators as CC
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import Data.Conduit ((.|))
import Data.Conduit ((.|))
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)

import FileManagement (createTempDir, makeNGLTempFile)
import Data.FastQ
Expand Down
1 change: 1 addition & 0 deletions NGLess/Interpretation/Write.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as CC
import qualified Data.Conduit.Combinators as C
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)
#ifndef WINDOWS
-- bzlib cannot compile on Windows (as of 2016/07/05)
import qualified Data.Conduit.BZlib as CBZ2
Expand Down
1 change: 1 addition & 0 deletions NGLess/StandardModules/Mappers/Soap.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import qualified Data.Conduit.List as CL
import qualified Data.Conduit as C
import qualified UnliftIO as U
import Data.Conduit ((.|))
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)
import Control.Monad.Extra (guard, allM, whenM)
import GHC.Conc (getNumCapabilities, setNumCapabilities)
import Data.List (isSuffixOf)
Expand Down
3 changes: 2 additions & 1 deletion NGLess/StandardModules/Mocat.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{- Copyright 2016-2017 NGLess Authors
{- Copyright 2016-2019 NGLess Authors
- License: MIT
-}

Expand All @@ -14,6 +14,7 @@ import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.List as CL
import qualified Data.Conduit as C
import Data.Conduit ((.|))
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)
import Control.Monad.Extra (unlessM)
import System.Directory (doesDirectoryExist)
import System.FilePath
Expand Down
3 changes: 2 additions & 1 deletion NGLess/StandardModules/Parallel.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{- Copyright 2016-2018 NGLess Authors
{- Copyright 2016-2019 NGLess Authors
- License: MIT
-}

Expand Down Expand Up @@ -35,6 +35,7 @@ import qualified Control.Concurrent.STM.TBMQueue as TQ
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.TQueue as CA
import qualified Data.Conduit.Algorithms as CAlg
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)
import Control.Monad.ST (runST)
import Control.Monad.Except (throwError)
import Control.Monad.Extra (allM, unlessM)
Expand Down
3 changes: 1 addition & 2 deletions NGLess/Utils/Conduit.hs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
{- Copyright 2013-2018 NGLess Authors
{- Copyright 2013-2019 NGLess Authors
- License: MIT -}
{-# LANGUAGE ScopedTypeVariables, FlexibleContexts, CPP #-}

module Utils.Conduit
( ByteLine(..)
, byteLineSinkHandle
, byteLineVSinkHandle
, conduitPossiblyCompressedFile
, asyncMapC
, asyncMapEitherC
, linesUnBoundedC
Expand Down
2 changes: 1 addition & 1 deletion NGLess/Utils/Samtools.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.Process as CP
import qualified Control.Concurrent.Async as A
import Data.Conduit ((.|))
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)
import Control.Monad.Except
import Control.Concurrent (getNumCapabilities)
import Data.List (isSuffixOf)
Expand All @@ -30,7 +31,6 @@ import FileManagement
import NGLess.NGError

import Utils.Utils (readProcessErrorWithExitCode)
import Utils.Conduit

-- | reads a SAM (possibly compressed) or BAM file (in the latter case by using
-- 'samtools view' under the hood)
Expand Down
1 change: 1 addition & 0 deletions Tests-Src/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import qualified Data.Conduit as C
import Data.Conduit ((.|))
import Control.Monad.State.Strict (execState, modify')
import Data.Convertible (convert)
import Data.Conduit.Algorithms.Async (conduitPossiblyCompressedFile)

import Language
import Interpret
Expand Down

0 comments on commit 96b2be3

Please sign in to comment.