Skip to content

Commit

Permalink
BUG Correctly handle subdirs in paths for collect()
Browse files Browse the repository at this point in the history
The partial files path needs to be sanitized.

fixes #141
  • Loading branch information
luispedro committed Dec 4, 2020
1 parent f1296c0 commit 549c775
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 4 deletions.
1 change: 1 addition & 0 deletions ChangeLog
@@ -1,6 +1,7 @@
Version 1.2.0+
* Add early check that block assignments are always to block variables
* Use ZStd compression for temporary files from preprocess()
* Correctly handle subpaths in samples for collect (fixes #141)

Version 1.2.0 2020-07-12 by luispedro
* Add load_fastq_directory to builtin functions
Expand Down
18 changes: 14 additions & 4 deletions NGLess/StandardModules/Parallel.hs
Expand Up @@ -118,8 +118,9 @@ setupHashDirectory basename hash = do
unsafeCharMap = [('/', '_'),
('\\', '_')]

sanitizeLock :: T.Text -> T.Text
sanitizeLock = T.map (\x -> fromMaybe x (lookup x unsafeCharMap))
-- | Remove '/' and '\' from filenames
sanitizePath :: T.Text -> T.Text
sanitizePath = T.map (\x -> fromMaybe x (lookup x unsafeCharMap))

executeLock1 (NGOList entries) kwargs = do
entries' <- mapM (stringOrTypeError "lock1") entries
Expand All @@ -128,7 +129,7 @@ executeLock1 (NGOList entries) kwargs = do
-- Keep a map of 'sane -> original' names used for locks to backtrace
-- what file was locked and return the unsanitized name
-- See also https://github.com/ngless-toolkit/ngless/issues/68
let saneentries = sanitizeLock <$> entries'
let saneentries = sanitizePath <$> entries'
lockmap = zip saneentries entries'
(e,rk) <- getLock lockdir saneentries
outputListLno' InfoOutput ["lock1: Obtained lock file: '", lockdir </> T.unpack e ++ ".lock", "'"]
Expand Down Expand Up @@ -158,6 +159,12 @@ lockName = (++ ".lock") . T.unpack
finishedName = (++ ".finished") . T.unpack
failedName = (++ ".failed") . T.unpack

-- | Create a lock file
getLock :: FilePath
-- ^ directory where to create locks
-> [T.Text]
-- ^ keys to attempt to ock
-> NGLessIO (T.Text, ReleaseKey)
getLock basedir fs = do
existing <- liftIO $ getDirectoryContents basedir
let notfinished = flip filter fs $ \fname -> finishedName fname `notElem` existing
Expand Down Expand Up @@ -220,7 +227,8 @@ executeCollect (NGOCounts istream) kwargs = do
.| CL.map unwrapByteLine
.| C.unlinesAscii
.| CAlg.asyncGzipTo gzout
let partialfile entry = hashdir </> "partial." ++ T.unpack entry <.> "tsv.gz"
let partialfile entry = hashdir </> "partial." ++ T.unpack (sanitizePath entry) <.> "tsv.gz"
outputListLno' TraceOutput ["Collect will write partial file to ", partialfile current]
liftIO $ do
hClose gzout
syncFile gzfp
Expand All @@ -246,7 +254,9 @@ executeCollect (NGOCounts istream) kwargs = do

if canCollect
then do
outputListLno' TraceOutput ["Can collect"]
newfp <- pasteCounts comment False allentries (map partialfile allentries)
outputListLno' TraceOutput ["Pasted. Will move result to ", T.unpack ofile]
moveOrCopyCompress True newfp (T.unpack ofile ++ (if isSubsample then ".subsample" else ""))
else do
outputListLno' TraceOutput ["Cannot collect (not all files present yet), wrote partial file to ", partialfile current]
Expand Down
9 changes: 9 additions & 0 deletions docs/sources/whatsnew.rst
Expand Up @@ -3,6 +3,15 @@ What's New (History)
====================


Unreleased development version
------------------------------

Bugfixes
~~~~~~~~
- Fix cases where sample names contain ``/`` and ``collect()`` (`issue 141
<https://github.com/ngless-toolkit/ngless/issues/141>`__)


Version 1.2.0
-------------

Expand Down

0 comments on commit 549c775

Please sign in to comment.