Skip to content

Commit

Permalink
bioinfo: introduced SE_or_PE type
Browse files Browse the repository at this point in the history
  • Loading branch information
pveber committed Sep 30, 2019
1 parent 0f10c39 commit 7f624d7
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 30 deletions.
2 changes: 1 addition & 1 deletion doc/getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ paste the following program:
let genome = Ucsc_gb.genome_sequence `sacCer2 (* Fetch a reference genome *)
let bowtie2_index = Bowtie2.bowtie2_build genome (* Build a Bowtie2 index from it *)
let sample_sam = (* Map the reads on the reference genome *)
Bowtie2.bowtie2 bowtie2_index (`single_end [ sample_fq ])
Bowtie2.bowtie2 bowtie2_index (SE_or_PE.Single_end [ sample_fq ])
let sample_peaks = (* Call peaks on mapped reads *)
Macs2.(callpeak sam [ sample_sam ])
Expand Down
6 changes: 3 additions & 3 deletions examples/chipseq.ml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
(**
(**
Paper: https://www.ncbi.nlm.nih.gov/pubmed/21700227
Datasets: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE29506
*)
Expand All @@ -13,7 +13,7 @@ let bowtie_index = Bowtie.bowtie_build genome

let mapped_reads srrid =
let fastq = Sra_toolkit.fastq_dump (`id srrid) in
Bowtie.bowtie ~v:1 bowtie_index (`single_end [ fastq ])
Bowtie.bowtie ~v:1 bowtie_index (SE_or_PE.Single_end [ fastq ])

let macs2 =
Macs2.callpeak ~qvalue:1e-10 ~mfold:(1,100) Macs2.sam
Expand Down Expand Up @@ -44,5 +44,5 @@ let () =
~allowed_containers:[`Singularity]
~np:4 ~mem:(`GB 4)
~outdir:"res"
~loggers:[ Console_logger.create () ]
~loggers:[ Console_logger.create () ]
repo
2 changes: 1 addition & 1 deletion examples/rnaseq.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ let genome : fasta pworkflow =
let bowtie2_index = Bowtie2.bowtie2_build genome

let mapped_reads x =
Bowtie2.bowtie2 bowtie2_index (`single_end [ fastq x ])
Bowtie2.bowtie2 bowtie2_index (SE_or_PE.Single_end [ fastq x ])

let annotation : gff pworkflow =
Bistro_unix.wget "ftp://ftp.ensemblgenomes.org/pub/bacteria/release-41/gff3/bacteria_21_collection/listeria_monocytogenes_10403s/Listeria_monocytogenes_10403s.ASM16869v2.41.chromosome.Chromosome.gff3.gz"
Expand Down
2 changes: 1 addition & 1 deletion examples/zhou2011.ml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ let fastq x = List.map (srr_id x) ~f:(fun id ->
let bowtie_index = Bowtie.bowtie_build genome

let mapped_reads x =
Bowtie.bowtie ~v:1 bowtie_index (`single_end (fastq x))
Bowtie.bowtie ~v:1 bowtie_index (SE_or_PE.Single_end (fastq x))

let mapped_reads_bam x =
Samtools.indexed_bam_of_sam (mapped_reads x)
Expand Down
34 changes: 22 additions & 12 deletions lib/bioinfo/bistro_bioinfo.ml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,16 @@ module Bed = struct
let keep6 x = keep ~n:6 x
end

module SE_or_PE = struct
type 'a t =
| Single_end of 'a
| Paired_end of 'a * 'a

let map x ~f = match x with
| Single_end x -> Single_end (f x)
| Paired_end (x, y) -> Paired_end (f x, f y)
end

module Fastq = struct

type _ format =
Expand Down Expand Up @@ -402,9 +412,9 @@ module Bowtie2 = struct
?fastq_format index fqs =

let args = match fqs with
| `single_end fqs ->
| SE_or_PE.Single_end fqs ->
opt "-U" (list dep ~sep:",") fqs
| `paired_end (fqs1, fqs2) ->
| Paired_end (fqs1, fqs2) ->
seq [
opt "-1" (list dep ~sep:",") fqs1 ;
string " " ;
Expand Down Expand Up @@ -474,8 +484,8 @@ module Bowtie = struct

let bowtie ?l ?e ?m ?fastq_format ?n ?v ?maxins index fastq_files =
let args = match fastq_files with
| `single_end fqs -> list dep ~sep:"," fqs
| `paired_end (fqs1, fqs2) ->
| SE_or_PE.Single_end fqs -> list dep ~sep:"," fqs
| Paired_end (fqs1, fqs2) ->
seq [
opt "-1" (list dep ~sep:",") fqs1 ;
string " " ;
Expand Down Expand Up @@ -1963,8 +1973,8 @@ module Tophat = struct

let tophat1 ?color index fqs =
let args = match fqs with
| `single_end fqs -> list dep ~sep:"," fqs
| `paired_end (fqs1, fqs2) ->
| SE_or_PE.Single_end fqs -> list dep ~sep:"," fqs
| Paired_end (fqs1, fqs2) ->
seq [
list dep ~sep:"," fqs1 ;
string " " ;
Expand All @@ -1984,8 +1994,8 @@ module Tophat = struct

let tophat2 index fqs =
let args = match fqs with
| `single_end fqs -> list dep ~sep:"," fqs
| `paired_end (fqs1, fqs2) ->
| SE_or_PE.Single_end fqs -> list dep ~sep:"," fqs
| Paired_end (fqs1, fqs2) ->
seq [
list dep ~sep:"," fqs1 ;
string " " ;
Expand Down Expand Up @@ -2695,9 +2705,9 @@ module Hisat2 = struct
fqs
=
let args = match fqs with
| `single_end fqs ->
| SE_or_PE.Single_end fqs ->
opt "-U" (list dep ~sep:",") fqs
| `paired_end (fqs1, fqs2) ->
| Paired_end (fqs1, fqs2) ->
seq [
opt "-1" (list dep ~sep:",") fqs1 ;
string " " ;
Expand Down Expand Up @@ -2838,9 +2848,9 @@ module Star = struct
]

let fq_args = function
| `paired_end (fq1, fq2) ->
| SE_or_PE.Single_end fq -> [ dep fq ]
| Paired_end (fq1, fq2) ->
[dep fq1 ; dep fq2]
| `single_end fq -> [ dep fq ]

let samStrandField = function
| `None -> string "None"
Expand Down
26 changes: 14 additions & 12 deletions lib/bioinfo/bistro_bioinfo.mli
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ class type sra = object
method format : [`sra]
end

module SE_or_PE : sig
type 'a t =
| Single_end of 'a
| Paired_end of 'a * 'a

val map : 'a t -> f:('a -> 'b) -> 'b t
end

(** {3 File_formats} *)

module Bed : sig
Expand Down Expand Up @@ -792,8 +800,7 @@ module Bowtie : sig
?n:int -> ?v:int ->
?maxins:int ->
index pworkflow ->
[ `single_end of 'a pworkflow list
| `paired_end of 'a pworkflow list * 'a pworkflow list ] ->
'a pworkflow list SE_or_PE.t ->
sam pworkflow
end

Expand Down Expand Up @@ -847,8 +854,7 @@ module Bowtie2 : sig
?seed:int ->
?fastq_format:'a Fastq.format ->
index pworkflow ->
[ `single_end of 'a pworkflow list
| `paired_end of 'a pworkflow list * 'a pworkflow list ] ->
'a pworkflow list SE_or_PE.t ->
sam pworkflow
end

Expand All @@ -862,14 +868,12 @@ module Tophat : sig
val tophat1 :
?color:bool ->
Bowtie.index pworkflow ->
[ `single_end of #fastq pworkflow list
| `paired_end of (#fastq as 'a) pworkflow list * 'a pworkflow list ] ->
#fastq pworkflow list SE_or_PE.t ->
output pworkflow

val tophat2 :
Bowtie2.index pworkflow ->
[ `single_end of #fastq pworkflow list
| `paired_end of (#fastq as 'a) pworkflow list * 'a pworkflow list ] ->
#fastq pworkflow list SE_or_PE.t ->
output pworkflow

val accepted_hits : output pworkflow -> bam pworkflow
Expand Down Expand Up @@ -911,8 +915,7 @@ module Hisat2 : sig
?no_discordant:bool ->
?seed:int ->
[`hisat2_index] dworkflow ->
[ `single_end of sanger_fastq pworkflow list
| `paired_end of sanger_fastq pworkflow list * sanger_fastq pworkflow list ] ->
sanger_fastq pworkflow list SE_or_PE.t ->
sam pworkflow
end

Expand All @@ -926,8 +929,7 @@ module Star : sig
?outSAMstrandField:[`None | `intronMotif] ->
?alignIntronMax:int ->
[`star_index] dworkflow ->
[ `single_end of sanger_fastq pworkflow
| `paired_end of sanger_fastq pworkflow * sanger_fastq pworkflow ] ->
sanger_fastq pworkflow SE_or_PE.t ->
bam pworkflow
end

Expand Down

0 comments on commit 7f624d7

Please sign in to comment.