Skip to content

Commit

Permalink
Merge branch 'include-script'
Browse files Browse the repository at this point in the history
  • Loading branch information
pveber committed Jan 30, 2021
2 parents 90d2271 + 3607cb0 commit 07879b7
Show file tree
Hide file tree
Showing 17 changed files with 353 additions and 412 deletions.
29 changes: 0 additions & 29 deletions .merlin

This file was deleted.

2 changes: 1 addition & 1 deletion examples/dune
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
(executables
(names chipseq rnaseq zhou2011 zhou2018)
(libraries bistro.bioinfo bistro.nlp bistro.utils)
(libraries bistro-bio.examples)
(preprocess (pps ppx_bistro ppx_enumerate ppx_deriving.std)))
100 changes: 1 addition & 99 deletions examples/zhou2011.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,103 +2,5 @@
Paper: https://www.ncbi.nlm.nih.gov/pubmed/21700227
Datasets: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE29506
*)
open Core
open Bistro_bioinfo
open Bistro_utils

let np = 4

type chIP_sample = [ `ChIP_Pho4_noPi ]
[@@deriving show, enumerate]


type factor = [ `Pho4 ]
[@@deriving show, enumerate]

let factor = function
| `ChIP_Pho4_noPi -> `Pho4

let control_sample = function
| `ChIP_Pho4_noPi -> `Input_WT_NoPi

let genome = Ucsc_gb.genome_sequence `sacCer2
let genome_2bit = Ucsc_gb.genome_2bit_sequence `sacCer2

let srr_id = function
| `ChIP_Pho4_noPi -> [ "SRR217304" ; "SRR217305" ]
| `Input_WT_NoPi -> [ "SRR217324" ]

let fastq x = List.map (srr_id x) ~f:(fun id ->
Sra_toolkit.fastq_dump (`id id)
)

let bowtie_index = Bowtie.bowtie_build genome

let mapped_reads x =
Bowtie.bowtie ~v:1 bowtie_index (SE_or_PE.Single_end (fastq x))

let mapped_reads_bam x =
Samtools.indexed_bam_of_sam (mapped_reads x)

let tf_peaks treatment_sample =
let control_sample = control_sample treatment_sample in
let treatment = mapped_reads treatment_sample in
let control = mapped_reads control_sample in
Macs2.callpeak ~mfold:(1,100) Macs2.sam ~control:[ control ] [ treatment ]

let peak_sequences ~radius treatment_sample =
let summits = Macs2.peak_summits (tf_peaks treatment_sample) in
let chrom_sizes = Ucsc_gb.fetchChromSizes `sacCer2 in
let regions = Bedtools.(slop ~mode:(`both radius) bed summits chrom_sizes) in
Ucsc_gb.twoBitToFa genome_2bit (Bed.keep4 regions)

let meme treatment_sample =
peak_sequences ~radius:50 treatment_sample
|> Meme_suite.meme ~nmotifs:3 ~minw:5 ~maxw:8 ~revcomp:true ~alphabet:`dna ~maxsize:1_000_000

let meme_chip treatment_sample =
peak_sequences ~radius:50 treatment_sample
|> Meme_suite.meme_chip
~meme_nmotifs:3 ~meme_minw:5 ~meme_maxw:8

let chipqc =
let samples = List.map all_of_chIP_sample ~f:(fun x -> {
ChIPQC.id = show_chIP_sample x ;
tissue = "yeast" ;
factor = show_factor (factor x) ;
replicate = "1" ;
bam = mapped_reads_bam x ;
peaks = Macs2.narrow_peaks (tf_peaks x) ;
})
in
ChIPQC.run samples

let report =
let open Bistro_utils.Html_report in
make
~title:"Integrated approaches reveal determinants of genome-wide binding and function of the transcription factor Pho4."
[
text {|
This is an attempt at reproducing a paper by Zhou and O'Shea on why
transcription factors with similar binding sequences are not bound
to the same genomic sites.
|} ;
section "Inferred motifs" ;
png (Meme_suite.meme_logo (meme `ChIP_Pho4_noPi) 1) ;
]
|> render

let repo = Repo.[
item [ "report.html" ] report ;
item [ "macs2" ; "Pho4" ; "noPi" ] (tf_peaks `ChIP_Pho4_noPi) ;
item [ "meme" ; "Pho4" ; "noPi" ] (meme `ChIP_Pho4_noPi) ;
item [ "meme_chip" ; "Pho4" ; "noPi" ] (meme_chip `ChIP_Pho4_noPi) ;
item [ "chIP-QC" ; "Pho4" ; "noPi" ] chipqc ;
]

let () =
Repo.build_main
~np ~mem:(`GB 4)
~outdir:"res"
~loggers:[ Console_logger.create () ]
repo
let () = Bistro_bio_examples.Zhou2011.run ()
1 change: 1 addition & 0 deletions lib/bio/examples/dune
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
(public_name bistro-bio.examples)
(libraries bistro_bio bistro.utils)
(preprocess (pps bistro.ppx ppx_enumerate ppx_deriving.std))
(preprocessor_deps zhou2011.md)
(optional))
11 changes: 11 additions & 0 deletions lib/bio/examples/zhou2011.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
title: Integrated approaches reveal determinants of genome-wide binding and function of the transcription factor Pho4.
----

This is an attempt at reproducing a paper by Zhou and O'Shea on why
transcription factors with similar binding sequences are not bound
to the same genomic sites.

# Inferred motifs
{{Report.Md.png (Meme_suite.meme_logo (meme `ChIP_Pho4_noPi) 1)}}

{{Report.Md.svg (occdist_vs_peak_score `ChIP_Pho4_noPi)}}
16 changes: 3 additions & 13 deletions lib/bio/examples/zhou2011.ml
Original file line number Diff line number Diff line change
Expand Up @@ -199,21 +199,11 @@ dev.off()
]

let report =
let title = "Integrated approaches reveal determinants of genome-wide binding and function of the transcription factor Pho4." in
Report.make ~title [%script{|

This is an attempt at reproducing a paper by Zhou and O'Shea on why
transcription factors with similar binding sequences are not bound
to the same genomic sites.

# Inferred motifs
{{Report.png (Meme_suite.meme_logo (meme `ChIP_Pho4_noPi) 1)}}

{{Report.svg (occdist_vs_peak_score `ChIP_Pho4_noPi)}}
|}]
[%include_script "lib/bio/examples/zhou2011.md"]
|> Report.Md.to_html

let repo = Repo.[
item [ "report.html" ] (Report.to_html report) ;
item [ "report.html" ] report ;
(* item [ "macs2" ; "Pho4" ; "noPi" ] (tf_peaks `ChIP_Pho4_noPi) ;
* item [ "meme" ; "Pho4" ; "noPi" ] (meme `ChIP_Pho4_noPi) ;
* item [ "meme_chip" ; "Pho4" ; "noPi" ] (meme_chip `ChIP_Pho4_noPi) ;
Expand Down
8 changes: 8 additions & 0 deletions lib/engine/scheduler.ml
Original file line number Diff line number Diff line change
Expand Up @@ -918,3 +918,11 @@ let simple_eval_exn
let thread = eval_exn sched w in
start sched ;
Lwt_main.run thread

let build_file_exn ?np ?mem ?allowed_containers ?loggers ?collect ?db_path ~output file =
let path =
simple_eval_exn
?np ?mem ?loggers ?allowed_containers ?db_path ?collect
(Bistro.Workflow.path file)
in
Lwt_main.run (Misc.cp path output)
11 changes: 11 additions & 0 deletions lib/engine/scheduler.mli
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,14 @@ val simple_eval_exn :
?db_path:string ->
'a Bistro.workflow ->
'a

val build_file_exn :
?np:int ->
?mem:[`GB of int] ->
?allowed_containers:[`Docker | `Singularity] list ->
?loggers:Logger.t list ->
?collect:bool ->
?db_path:string ->
output:string ->
'a Bistro.file ->
unit
96 changes: 30 additions & 66 deletions lib/utils/report.ml
Original file line number Diff line number Diff line change
@@ -1,35 +1,23 @@
open Core
open Bistro

type t = {
title : string ;
contents : Template_dsl.template ;
}

let make ~title contents = { title ; contents }

let picture_elt format file =
[%workflow
let format = match format with
| `svg -> "svg+xml"
| `png -> "png"
in
let contents =
In_channel.read_all [%path file]
|> Base64.encode_exn
in
sprintf {|<img src="data:image/%s;base64,%s"/>|} format contents]

let svg x = Template_dsl.string_dep (picture_elt `svg x)
let png x = Template_dsl.string_dep (picture_elt `png x)

let header d =
let open Template_dsl in
[%script{|---
title: {{string d.title}}
---|}]

let html_template = Template_dsl.string {|<!DOCTYPE html>
module Md = struct
let picture_elt format file =
[%workflow
let format = match format with
| `svg -> "svg+xml"
| `png -> "png"
in
let contents =
In_channel.read_all [%path file]
|> Base64.encode_exn
in
sprintf {|<img src="data:image/%s;base64,%s"/>|} format contents]

let svg x = Template_dsl.string_dep (picture_elt `svg x)
let png x = Template_dsl.string_dep (picture_elt `png x)

let html_template = Template_dsl.string {|<!DOCTYPE html>
<html $if(lang)$ lang="$lang$" $endif$ dir="ltr">

<head>
Expand Down Expand Up @@ -136,41 +124,17 @@ $endif$
</html>
|}

let document d = Template_dsl.seq ~sep:"\n" [ header d ; d.contents ]

let to_html d =
Workflow.shell ~descr:"bistro_utils.report.to_html" Bistro.Shell_dsl.[
cmd "ln" [ string "-s" ; file_dump html_template ; tmp // "template.html5" ] ;
cmd "pandoc" [
opt' "--from" string "markdown+tex_math_single_backslash+tex_math_dollars" ;
opt' "--to" string "html5" ;
string "--katex" ;
opt' "--template" Fn.id (tmp // "template.html5") ;
opt' "--output" Fn.id dest ;
string "--toc" ;
file_dump (document d) ;
let to_html doc =
Workflow.shell ~descr:"bistro_utils.report.to_html" Bistro.Shell_dsl.[
cmd "ln" [ string "-s" ; file_dump html_template ; tmp // "template.html5" ] ;
cmd "pandoc" [
opt' "--from" string "markdown+tex_math_single_backslash+tex_math_dollars" ;
opt' "--to" string "html5" ;
string "--katex" ;
opt' "--template" Fn.id (tmp // "template.html5") ;
opt' "--output" Fn.id dest ;
string "--toc" ;
file_dump doc ;
]
]
]

let build ?np ?mem ?loggers ?allowed_containers ?(bistro_dir = "_bistro") ?collect ~output report =
let open Bistro_engine in
let open Lwt in
let db = Db.init_exn bistro_dir in
let goal = Workflow.path (to_html report) in
let sched = Scheduler.create ?np ?mem ?loggers ?allowed_containers ?collect db in
let report_cache_path = Scheduler.eval sched goal in
Scheduler.start sched ;
report_cache_path >>= fun res ->
Scheduler.stop sched >>= fun () ->
match res with
| Ok path ->
Misc.exec_exn [|"cp" ; path ; output|]
| Error traces -> (
let errors = Execution_trace.gather_failures traces in
prerr_endline (Scheduler.error_report sched errors) ;
Lwt.fail_with "Some workflow failed!"
)

let build_main ?np ?mem ?loggers ?allowed_containers ?bistro_dir ?collect ~output report =
build ?np ?mem ?loggers ?allowed_containers ?bistro_dir ?collect ~output report
|> Lwt_main.run
end
40 changes: 9 additions & 31 deletions lib/utils/report.mli
Original file line number Diff line number Diff line change
@@ -1,34 +1,12 @@
(** Markdown report *)

open Bistro
open Bistro_engine

type t

val make :
title:string ->
Template_dsl.template ->
t

val svg : svg file -> Template_dsl.template
val png : png file -> Template_dsl.template

val to_html : t -> html file

val build :
?np:int ->
?mem:[`GB of int] ->
?loggers:Logger.t list ->
?allowed_containers:[`Docker | `Singularity] list ->
?bistro_dir:string ->
?collect:bool ->
output:string -> t -> unit Lwt.t

val build_main :
?np:int ->
?mem:[`GB of int] ->
?loggers:Logger.t list ->
?allowed_containers:[`Docker | `Singularity] list ->
?bistro_dir:string ->
?collect:bool ->
output:string -> t -> unit
(** Markdown report *)
module Md : sig
val svg : svg file -> Template_dsl.template
val png : png file -> Template_dsl.template

val to_html :
Template_dsl.template ->
html file
end

0 comments on commit 07879b7

Please sign in to comment.