Skip to content

Commit

Permalink
ENH Enable downloading annotation files from URLs
Browse files Browse the repository at this point in the history
In v1.1, only the FASTA files were supported
  • Loading branch information
luispedro committed Apr 1, 2020
1 parent 3cccb78 commit c576081
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 19 deletions.
3 changes: 2 additions & 1 deletion ChangeLog
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
Version 1.1.0+
* Edit-me
* Enable specifying *all* module resources by URL with download on first
use

Version 1.1.0 2020-01-25 by luispedro
* Reintroduce zstd compression (after fixes upstream)
Expand Down
46 changes: 28 additions & 18 deletions NGLess/ReferenceDatabases.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{- Copyright 2013-2019 NGLess Authors
{- Copyright 2013-2020 NGLess Authors
- License: MIT
-}
module ReferenceDatabases
Expand Down Expand Up @@ -81,30 +81,40 @@ findReference allrefs rn = find (\ref -> (refName ref == rn) || maybe False (==
isBuiltinReference :: T.Text -> Bool
isBuiltinReference rn = isJust $ findReference builtinReferences rn

-- Download if it's a URL
downloadIfUrl :: FilePath -- ^ base directory
-> FilePath -- ^ local filename
-> Maybe FilePath
-> NGLessIO (Maybe FilePath)
downloadIfUrl _ _ Nothing = return Nothing
downloadIfUrl basedir fname (Just path)
| isUrl path = do
let local = basedir </> "cached" </> fname
liftIO $ createDirectoryIfMissing True (basedir </> "cached")
unlessM (liftIO $ doesFileExist local) $
withLockFile LockParameters
{ lockFname = local ++ ".download.lock"
, maxAge = 300
, whenExistsStrategy = IfLockedRetry { nrLockRetries = 37*60, timeBetweenRetries = 60 }
, mtimeUpdate = True
} $ do
-- recheck with lock
unlessM (liftIO $ doesFileExist local) $
downloadFile path local
return (Just local)
| otherwise = return (Just path)

moduleDirectReference :: T.Text -> NGLessIO (Maybe ReferenceFilePaths)
moduleDirectReference rname = do
mods <- loadedModules
findM mods $ \m ->
findM (modReferences m) $ \case
ExternalReference eref fafile gtffile mapfile
| eref == rname -> do
fafile' <- if isUrl fafile
then do
let local = modPath m </> "cached" </> T.unpack rname <.> "fna.gz"
liftIO $ createDirectoryIfMissing True (modPath m </> "cached")
unlessM (liftIO $ doesFileExist local) $
withLockFile LockParameters
{ lockFname = local ++ ".download.lock"
, maxAge = 300
, whenExistsStrategy = IfLockedRetry { nrLockRetries = 37*60, timeBetweenRetries = 60 }
, mtimeUpdate = True
} $ do
-- recheck with lock
unlessM (liftIO $ doesFileExist local) $
downloadFile fafile local
return local
else return fafile
return . Just $! ReferenceFilePaths (Just fafile') gtffile mapfile
fafile' <- downloadIfUrl (modPath m) (T.unpack rname <.> "fna.gz") (Just fafile)
gtffile' <- downloadIfUrl (modPath m) (T.unpack rname <.> "gff.gz") gtffile
mapfile' <- downloadIfUrl (modPath m) (T.unpack rname <.> "tsv.gz") mapfile
return . Just $! ReferenceFilePaths fafile' gtffile' mapfile'
_ -> return Nothing

referencePath = "Sequence/BWAIndex/reference.fa.gz"
Expand Down
7 changes: 7 additions & 0 deletions docs/sources/whatsnew.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
What's New (History)
====================


Version 1.2.0 (Unreleased)
--------------------------

- Modules can now specify their annotation as a URL that NGLess downloads on a
"as needed" basis: in version 1.1, only FASTA files were supported.

Version 1.1.0
-------------

Expand Down

0 comments on commit c576081

Please sign in to comment.