Skip to content

Commit

Permalink
Merge branch 'contain-containers'
Browse files Browse the repository at this point in the history
  • Loading branch information
pveber committed Jun 16, 2020
2 parents 99bb824 + 5140e5c commit 5e31409
Show file tree
Hide file tree
Showing 24 changed files with 432 additions and 460 deletions.
5 changes: 4 additions & 1 deletion doc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,7 @@ help:
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

bistro_doc.tex: Makefile
@$(SPHINXBUILD) -b latex "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
319 changes: 159 additions & 160 deletions lib/bioinfo/bistro_bioinfo.ml

Large diffs are not rendered by default.

17 changes: 8 additions & 9 deletions lib/bioinfo/bistro_bioinfo.mli
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ end
(** {3 NGS utilities} *)

module Bedtools : sig
val img : Shell_dsl.container_image list
val img : container_image list

type 'a input

Expand Down Expand Up @@ -558,7 +558,7 @@ module Samtools : sig
end

module Picardtools : sig
val img : Shell_dsl.container_image list
val img : container_image list

val markduplicates :
?remove_duplicates:bool ->
Expand All @@ -575,7 +575,7 @@ module Picardtools : sig
end

module Sra_toolkit : sig
val img : Shell_dsl.container_image list
val img : container_image list

val fastq_dump :
[`id of string | `idw of string workflow | `file of sra file] ->
Expand Down Expand Up @@ -683,7 +683,7 @@ module Bowtie : sig
?n:int -> ?v:int ->
?maxins:int ->
[`bowtie_index] directory ->
'a file list SE_or_PE.t ->
sanger_fastq file list SE_or_PE.t ->
sam file
end

Expand Down Expand Up @@ -753,7 +753,7 @@ module Tophat : sig
end

module Hisat2 : sig
val img : Shell_dsl.container_image list
val img : container_image list

val hisat2_build :
?large_index:bool ->
Expand Down Expand Up @@ -820,7 +820,7 @@ module Kallisto : sig
method f5 : [`tpm] * float
end

val img : Shell_dsl.container_image list
val img : container_image list
val index : fasta file list -> index file
val quant :
?bias:bool ->
Expand Down Expand Up @@ -899,7 +899,7 @@ module Quast : sig
end

module Busco : sig
val img : Shell_dsl.container_image list
val img : container_image list

type db = [
| `bacteria
Expand Down Expand Up @@ -962,8 +962,7 @@ end
(** {3 Differential analysis} *)

module DESeq2 : sig

val img : Shell_dsl.container_image list
val img : container_image list

class type table = object
inherit tsv
Expand Down
18 changes: 7 additions & 11 deletions lib/bistro.ml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ type 'a workflow = 'a Workflow.t
type 'a file = (#regular_file_t as 'a) path workflow
type 'a directory = < directory_t ; contents : 'a > path workflow

type container_image = Workflow.container_image

module Workflow = struct
include Workflow

Expand Down Expand Up @@ -238,12 +240,9 @@ end
module Shell_dsl = struct
include Template_dsl

type command = Workflow.shell_command
type container_image = Command.container_image

let within_container images cmd = Command.Within_container (images, cmd)
type command = Workflow.token Command.t

let gen_cmd prog_expr ?img ?stdin ?stdout ?stderr args =
let gen_cmd prog_expr ?stdin ?stdout ?stderr args =
let stdout_expr =
match stdout with
| None -> []
Expand All @@ -265,10 +264,7 @@ module Shell_dsl = struct
|> List.intersperse ~sep:(string " ")
|> List.concat
in
let cmd = Command.Simple_command tokens in
match img with
| None -> cmd
| Some image -> within_container image cmd
Command.Simple_command tokens

let cmd p = gen_cmd [ S p ]

Expand Down Expand Up @@ -297,7 +293,7 @@ module Shell_dsl = struct

let ( % ) f g x = g (f x)

let docker_image = Command.docker_image
let docker_image = Workflow.docker_image

let bash ?img script = cmd "bash" ?img [ file_dump script ]
let bash script = cmd "bash" [ file_dump script ]
end
10 changes: 3 additions & 7 deletions lib/bistro.mli
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ type 'a directory = < directory_t ; contents : 'a > path workflow

(** {2 Building shell-based workflow} *)

type container_image

(** Representation of scripts *)
module Template_dsl : sig
type template
Expand Down Expand Up @@ -125,13 +127,11 @@ end
module Shell_dsl : sig
type template = Template_dsl.template
type command
type container_image

include module type of Template_dsl with type template := template

val cmd :
string ->
?img:container_image list ->
?stdin:template -> ?stdout:template -> ?stderr:template ->
template list -> command
(** Command-line constructor, e.g.
Expand All @@ -145,7 +145,6 @@ module Shell_dsl : sig
@param stderr adds a ["2> /some/path"] token at the end of the command *)

val bash :
?img:container_image list ->
template ->
command
(** Run a bash script, best used with [%script {|...|}] *)
Expand Down Expand Up @@ -181,10 +180,6 @@ module Shell_dsl : sig
val rm_rf : template -> command
val mv : template -> template -> command

val within_container : container_image list -> command -> command
(** [docker cmd] transforms [cmd] so that it can be executed in a
Docker container. *)

val docker_image :
?tag:string ->
?registry:string ->
Expand All @@ -208,6 +203,7 @@ module Workflow : sig
?mem:int workflow ->
?np:int ->
?version:int ->
?img:container_image list ->
Shell_dsl.command list -> 'a path workflow
(** Constructor for a workflow that execute a shell script. Its main
argument is a list of {!Shell_dsl.cmd} values. Other arguments
Expand Down
2 changes: 1 addition & 1 deletion lib/engine/db.ml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ let container_image_identifier img =
(Option.value_map tag ~default:"" ~f:(( ^ ) "_"))
(Bistro_internals.Workflow.digest img)
in
match (img : Bistro_internals.Command.container_image) with
match (img : Bistro_internals.Workflow.container_image) with
| Docker_image i -> f i.account i.name i.tag
| Singularity_image i -> f i.account i.name i.tag

Expand Down
4 changes: 2 additions & 2 deletions lib/engine/db.mli
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ val cache : t -> id -> string
val stdout : t -> id -> string
val stderr : t -> id -> string
val build : t -> id -> string
val singularity_image : t -> Bistro_internals.Command.container_image -> string
val singularity_image : t -> Bistro_internals.Workflow.container_image -> string

val container_image_identifier : Bistro_internals.Command.container_image -> string
val container_image_identifier : Bistro_internals.Workflow.container_image -> string

val fold_cache :
t ->
Expand Down
2 changes: 1 addition & 1 deletion lib/engine/docker.ml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ let mount_options ~host_paths ~container_paths =
)
|> String.concat ~sep:" "

let image_url (image : Command.Docker_image.t) =
let image_url (image : Workflow.Docker_image.t) =
sprintf "%s%s/%s%s"
(Option.value_map ~default:"" ~f:(sprintf "%s/") image.registry)
image.account
Expand Down
2 changes: 1 addition & 1 deletion lib/engine/docker.mli
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ val mount_options :
container_paths:string list ->
string

val image_url : Command.Docker_image.t -> string
val image_url : Workflow.Docker_image.t -> string

val chown_command :
path:string ->
Expand Down
57 changes: 26 additions & 31 deletions lib/engine/execution_env.ml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,11 @@ let make ~db ~allowed_containers ~np ~mem ~id =
uid = Unix.getuid () ;
}

let docker_cache_dir = "/bistro/data"
let container_cache_dir = "/bistro/data"

(* This type is used to allow a form of compression, by noticing some
mounted files are in the same directory, and mount that directory
only once. This is particularly useful for /bistro/data *)
type container_mount = {
mount_host_location : string ;
mount_container_location : string ;
Expand All @@ -61,13 +64,13 @@ let container_mount
| Cache_id id ->
{
mount_host_location = Db.cache_dir db ;
mount_container_location = docker_cache_dir ;
file_container_location = Filename.concat docker_cache_dir id
mount_container_location = container_cache_dir ;
file_container_location = Filename.concat container_cache_dir id
}

| FS_path path ->
let id = Misc.digest path in
let container_path = Filename.concat docker_cache_dir id in
let container_path = Filename.concat container_cache_dir id in
{
mount_host_location = path ;
mount_container_location = container_path ;
Expand All @@ -77,10 +80,10 @@ let container_mount
| Cd (Cache_id id, sel) ->
{
mount_host_location = Db.cache_dir db ;
mount_container_location = docker_cache_dir ;
mount_container_location = container_cache_dir ;
file_container_location =
List.reduce_exn ~f:Filename.concat [
docker_cache_dir ;
container_cache_dir ;
id ;
Path.to_string sel
]
Expand All @@ -89,10 +92,10 @@ let container_mount
let id = Misc.digest path in
{
mount_host_location = path ;
mount_container_location = Filename.concat docker_cache_dir id ;
mount_container_location = Filename.concat container_cache_dir id ;
file_container_location =
List.reduce_exn ~f:Filename.concat [
docker_cache_dir ;
container_cache_dir ;
id ;
Path.to_string sel
]
Expand All @@ -102,11 +105,11 @@ let container_mount

let dockerize env = {
db = env.db ;
tmp_dir = "/bistro" ;
tmp_dir = "/bistro/build" ;
allowed_containers = [] ;
dest = "/bistro/dest" ;
tmp = "/bistro/tmp" ;
file_dump = (fun toks -> Filename.concat docker_cache_dir (Misc.digest toks)) ;
dest = "/bistro/build/dest" ;
tmp = "/bistro/build/tmp" ;
file_dump = (fun toks -> Filename.concat container_cache_dir (Misc.digest toks)) ;
dep = (fun u -> (container_mount env.db u).file_container_location) ;
np = env.np ;
mem = env.mem ;
Expand All @@ -119,46 +122,38 @@ let allows_docker env = List.mem ~equal:Poly.equal env.allowed_containers `Docke

let singularize env = {
env with allowed_containers = [] ;
tmp_dir = "/bistro/build" ;
dest = "/bistro/build/dest" ;
tmp = "/bistro/build/tmp" ;
file_dump = (fun toks -> Filename.concat container_cache_dir (Misc.digest toks)) ;
dep = (fun u -> (container_mount env.db u).file_container_location) ;
}

let find_docker_image env =
List.find_map env ~f:Command.(function
List.find_map env ~f:Workflow.(function
| Docker_image i -> Some i
| Singularity_image _ -> None
)

let find_singularity_image env =
List.find_map env ~f:Command.(function
List.find_map env ~f:Workflow.(function
| Docker_image _ -> None
| Singularity_image i -> Some i
)

let rec choose_container allowed_containers images =
match allowed_containers with
| [] -> `Plain
| [] -> None
| `Docker :: others -> ( (* docker only accepts docker images *)
match find_docker_image images with
| Some i -> `Docker_container i
| Some i -> Some (`Docker_container i)
| None -> choose_container others images
)
| `Singularity :: others -> (
match find_singularity_image images with
| Some i -> `Singularity_container (Command.Singularity_image i)
| Some i -> Some (`Singularity_container (Workflow.Singularity_image i))
| None ->
match find_docker_image images with
| Some i -> `Singularity_container (Command.Docker_image i)
| Some i -> Some (`Singularity_container (Workflow.Docker_image i))
| None -> choose_container others images
)

let rec images_for_singularity allowed_containers = function
| Command.Simple_command _ -> []
| And_list xs
| Or_list xs
| Pipe_list xs -> images_for_singularity_aux allowed_containers xs
| Within_container (img, _) ->
match choose_container allowed_containers img with
| `Plain
| `Docker_container _ -> []
| `Singularity_container img -> [ img ]
and images_for_singularity_aux allowed_containers xs =
List.concat_map xs ~f:(images_for_singularity allowed_containers)
14 changes: 4 additions & 10 deletions lib/engine/execution_env.mli
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,12 @@ type container_mount = {

val container_mount : Db.t -> Workflow.path -> container_mount
val dockerize : t -> t
val docker_cache_dir : string
val container_cache_dir : string
val allows_docker : t -> bool
val singularize : t -> t

val choose_container :
[`Docker | `Singularity] list ->
Command.container_image list ->
[ `Plain
| `Docker_container of Command.Docker_image.t
| `Singularity_container of Command.container_image ]

val images_for_singularity :
[`Docker | `Singularity] list ->
_ Command.t ->
Command.container_image list
Workflow.container_image list ->
[ `Docker_container of Workflow.Docker_image.t
| `Singularity_container of Workflow.container_image ] option
2 changes: 1 addition & 1 deletion lib/engine/logger.ml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ type event =
| Workflow_skipped : _ Workflow.t * [ `Done_already | `Missing_dep ] -> event
| Workflow_allocation_error : _ Workflow.t * string -> event
| Workflow_collected : _ Workflow.t -> event
| Singularity_image_collected : Command.container_image -> event
| Singularity_image_collected : Workflow.container_image -> event
| Debug : string -> event

class type t = object
Expand Down

0 comments on commit 5e31409

Please sign in to comment.