Skip to content

Commit

Permalink
ENH Add run_for_all_samples
Browse files Browse the repository at this point in the history
This is needed so that types work: having the return type depend on the
input type is (not yet) supported in NGLess
  • Loading branch information
luispedro committed Jul 21, 2022
1 parent 0890c53 commit c69baf7
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 32 deletions.
76 changes: 46 additions & 30 deletions NGLess/StandardModules/Parallel.hs
Original file line number Diff line number Diff line change
Expand Up @@ -575,18 +575,30 @@ pasteHiddenFunction = Function



runForAllFunction = Function
{ funcName = FuncName "run_for_all"
, funcArgType = Just (NGLUnion [NGList NGLString, NGList NGLReadSet])
, funcArgChecks = []
, funcRetType = NGLString
, funcKwArgs =
[ ArgInformation "tag" False NGLString []
]
, funcAllowsAutoComprehension = False
, funcChecks = []
}

runForAllFunctions =
[ Function
{ funcName = FuncName "run_for_all"
, funcArgType = Just (NGList NGLString)
, funcArgChecks = []
, funcRetType = NGLString
, funcKwArgs =
[ ArgInformation "tag" False NGLString []
]
, funcAllowsAutoComprehension = False
, funcChecks = []
}
, Function
{ funcName = FuncName "run_for_all_samples"
, funcArgType = Just (NGList NGLReadSet)
, funcArgChecks = []
, funcRetType = NGLReadSet
, funcKwArgs =
[ ArgInformation "tag" False NGLString []
]
, funcAllowsAutoComprehension = False
, funcChecks = []
}
]


parallelTransform :: Bool -> [(Int, Expression)] -> NGLessIO [(Int, Expression)]
Expand All @@ -597,7 +609,7 @@ addLockHash script = pureTransform addLockHash' script
where
addLockHash' :: Expression -> Expression
addLockHash' (FunctionCall fn@(FuncName fname) expr kwargs block)
| fname `elem` ["lock1", "run_for_all"] =
| fname `elem` ["lock1", "run_for_all", "run_for_all_samples"] =
FunctionCall fn expr ((Variable "__hash", ConstStr h):kwargs) block
where
h = T.pack . MD5.md5s . MD5.Str . show $ map snd script
Expand All @@ -609,23 +621,25 @@ processRunForAll True = processRunForAll' Nothing

processRunForAll' _ [] = return []
processRunForAll' Nothing ((lno,expr):rest) = case expr of
Assignment v (FunctionCall (FuncName "run_for_all") slist kwargs _) -> do
let save_match = Assignment (Variable "$parallel$iterator") (Lookup (Just NGLString) v)
save_list = Assignment (Variable "$parallel$list") slist
set_tag = do
tag <- lookup (Variable "tag") kwargs
return (lno,
FunctionCall (FuncName "set_parallel_tag") tag [] Nothing)
rest' <- processRunForAll' (Just (lno, slist)) rest
let res = ((lno,expr):(lno,save_match):(lno,save_list):rest')
case set_tag of
Nothing -> return res
Just t -> return (t:res)
Assignment v (FunctionCall (FuncName fname) slist kwargs _)
| fname `elem` ["run_for_all", "run_for_all_samples"] -> do
let save_match = Assignment (Variable "$parallel$iterator") (Lookup (Just NGLString) v)
save_list = Assignment (Variable "$parallel$list") slist
set_tag = do
tag <- lookup (Variable "tag") kwargs
return (lno,
FunctionCall (FuncName "set_parallel_tag") tag [] Nothing)
rest' <- processRunForAll' (Just (lno, slist)) rest
let res = ((lno,expr):(lno,save_match):(lno,save_list):rest')
case set_tag of
Nothing -> return res
Just t -> return (t:res)
_ -> do
((lno,expr):) <$> processRunForAll' Nothing rest
processRunForAll' (Just prev) ((lno,e):rest) = case e of
Assignment _ (FunctionCall (FuncName "run_for_all") _ _ _) ->
throwScriptError ("The function 'run_for_all' can only be called once (seen on lines "++show prev++" and "++show lno++")")
Assignment _ (FunctionCall (FuncName fname) _ _ _)
| fname `elem` ["run_for_all", "run_for_all_samples"] -> do
throwScriptError ("The functions 'run_for_all'/'run_for_all_samples' can only be called once (seen on lines "++show prev++" and "++show lno++")")
FunctionCall fn@(FuncName "collect") expr kwargs block -> do
let kwargs' = (Variable "allneeded", Lookup (Just NGLString) (Variable "$parallel$list"))
:(Variable "current", Lookup (Just NGLString) (Variable "$parallel$iterator"))
Expand All @@ -639,8 +653,9 @@ processRunForAll' (Just prev) ((lno,e):rest) = case e of

checkNoRunForAll = mapM checkNoRunForAll1
where
checkNoRunForAll1 (_,Assignment _ (FunctionCall (FuncName "run_for_all") _ _ _)) =
throwScriptError "Function 'run_for_all' is only available in parallel module version 1.1+. Please upgrade your import"
checkNoRunForAll1 (_,Assignment _ (FunctionCall (FuncName fname) _ _ _))
| fname `elem` ["run_for_all", "run_for_all_samples"] =
throwScriptError ("Function '"++T.unpack fname++"' is only available in parallel module version 1.1+. Please upgrade your import")
checkNoRunForAll1 e = return e

processSetParallelTag :: [(Int, Expression)] -> NGLessIO [(Int, Expression)]
Expand Down Expand Up @@ -673,13 +688,14 @@ loadModule v
, collectFunction includeForAll
, setTagFunction
, pasteHiddenFunction
] ++ (if includeForAll then [runForAllFunction] else [])
] ++ (if includeForAll then runForAllFunctions else [])
, modTransform = parallelTransform includeForAll
, runFunction = \case
"lock1" -> executeLock1OrForAll "lock1"
"collect" -> executeCollect
"set_parallel_tag" -> executeSetTag
"run_for_all" -> executeLock1OrForAll "run_for_all"
"run_for_all_samples" -> executeLock1OrForAll "run_for_all_samples"
"__paste" -> executePaste
_ -> error "Bad function name"
}
Expand Down
10 changes: 8 additions & 2 deletions docs/sources/yaml-list.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,24 @@ You can load a sample list with the `load_sample_list` function:
...


It can also be used with the [parallel module](stdlib.html) module's `run_for_all` function
It can also be used with the [parallel module](stdlib.html) module's
`run_for_all_samples` function. For example:

ngless "1.5"
import "parallel" version "1.1"
input = run_for_all(load_sample_list('list.yaml'))
input = run_for_all_samples(load_sample_list('list.yaml'))

input = preprocess(input) using |read|:
read = substrim(read, min_quality=25)
if len(read) < 45:
discard

write(input, ofile='outputs' </> input.name() + '.fq.xz')
...

Note how we used the `.name()` method in the readset object to get the name of
the selected sample.

## Loading a single sample from an YAML file

The function `load_sample_from_yaml` (which takes a YAML file and a mandatory
Expand Down

0 comments on commit c69baf7

Please sign in to comment.