Skip to content

Commit

Permalink
update examples
Browse files Browse the repository at this point in the history
  • Loading branch information
pveber committed Dec 12, 2018
1 parent 09362ca commit f58dcba
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 27 deletions.
3 changes: 1 addition & 2 deletions examples/chipseq.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Paper: https://www.ncbi.nlm.nih.gov/pubmed/21700227
Datasets: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE29506
*)
open Bistro
open Bistro_bioinfo
open Bistro_utils

Expand All @@ -13,7 +12,7 @@ let genome = Ucsc_gb.genome_sequence `sacCer2
let bowtie_index = Bowtie.bowtie_build genome

let mapped_reads srrid =
let sra = Sra.fetch_srr (Workflow.string srrid) in
let sra = Sra.fetch_srr srrid in
let fastq = Sra_toolkit.fastq_dump sra in
Bowtie.bowtie ~v:1 bowtie_index (`single_end [ fastq ])

Expand Down
72 changes: 50 additions & 22 deletions examples/zhou2011.ml
Original file line number Diff line number Diff line change
Expand Up @@ -8,56 +8,84 @@ open Bistro_bioinfo
open Bistro_utils

let np = 4

type chIP_sample = [ `ChIP_Pho4_noPi ]
[@@deriving show, enumerate]


type factor = [ `Pho4 ]
[@@deriving show, enumerate]

let factor = function
| `ChIP_Pho4_noPi -> `Pho4

let control_sample = function
| `ChIP_Pho4_noPi -> `Input_WT_NoPi

let genome = Ucsc_gb.genome_sequence `sacCer2
let genome_2bit = Ucsc_gb.genome_2bit_sequence `sacCer2

let chIP_design tf condition = match tf, condition with
| `Pho4, `noPi -> (`ChIP_Pho4_noPi, `Input_Pho4_NoPi)

let srr_id = function
| `ChIP_Pho4_noPi -> [ "SRR217304" ; "SRR217305" ]
| `Input_Pho4_NoPi -> [ "SRR217319" ]
| `Input_WT_NoPi -> [ "SRR217324" ]

let sra x = List.map (srr_id x) ~f:(fun id ->
Sra.fetch_srr (Workflow.string id)
Sra.fetch_srr id
)

let fastq x = List.map ~f:Sra_toolkit.fastq_dump (sra x)

let bowtie_index = Bowtie.bowtie_build genome

let mapped_reads x =
Bowtie.bowtie ~v:2 bowtie_index (`single_end (fastq x))
Bowtie.bowtie ~v:1 bowtie_index (`single_end (fastq x))

let tf_peaks tf condition =
let treatment_sample, control_sample = chIP_design tf condition in
let mapped_reads_bam x =
Samtools.bam_of_sam (mapped_reads x)

let tf_peaks treatment_sample =
let control_sample = control_sample treatment_sample in
let treatment = mapped_reads treatment_sample in
let control = mapped_reads control_sample in
Macs2.callpeak ~mfold:(1,100) Macs2.sam ~control:[ control ] [ treatment ]

let peak_sequences ~radius tf condition =
let summits = Macs2.peak_summits (tf_peaks tf condition) in
let regions = Bedtools.(slop ~mode:(`both radius) bed summits (Ucsc_gb.fetchChromSizes `sacCer2)) in
let peak_sequences ~radius treatment_sample =
let summits = Macs2.peak_summits (tf_peaks treatment_sample) in
let chrom_sizes = Ucsc_gb.fetchChromSizes `sacCer2 in
let regions = Bedtools.(slop ~mode:(`both radius) bed summits chrom_sizes) in
Ucsc_gb.twoBitToFa genome_2bit (Bed.keep4 regions)

let meme tf condition =
peak_sequences ~radius:50 tf condition
|> Meme_suite.meme ~nmotifs:3 ~minw:5 ~maxw:15 ~revcomp:true ~alphabet:`dna ~maxsize:1_000_000
let meme treatment_sample =
peak_sequences ~radius:50 treatment_sample
|> Meme_suite.meme ~nmotifs:3 ~minw:5 ~maxw:8 ~revcomp:true ~alphabet:`dna ~maxsize:1_000_000

let meme_chip tf condition =
peak_sequences ~radius:50 tf condition
let meme_chip treatment_sample =
peak_sequences ~radius:50 treatment_sample
|> Meme_suite.meme_chip
~meme_nmotifs:3 ~meme_minw:5 ~meme_maxw:15
~meme_nmotifs:3 ~meme_minw:5 ~meme_maxw:8

let chipqc =
let samples = List.map all_of_chIP_sample ~f:(fun x -> {
ChIPQC.id = show_chIP_sample x ;
tissue = "yeast" ;
factor = show_factor (factor x) ;
replicate = "1" ;
bam = mapped_reads_bam x ;
peaks = Macs2.narrow_peaks (tf_peaks x) ;
})
in
ChIPQC.run samples

let repo = Repo.[
item [ "peaks" ; "Pho4" ; "noPi" ] (tf_peaks `Pho4 `noPi) ;
item [ "motifs" ; "meme" ; "Pho4" ; "noPi" ] (meme `Pho4 `noPi) ;
item [ "motifs" ; "meme_chip" ; "Pho4" ; "noPi" ] (meme_chip `Pho4 `noPi) ;
item [ "macs2" ; "Pho4" ; "noPi" ] (tf_peaks `ChIP_Pho4_noPi) ;
item [ "meme" ; "Pho4" ; "noPi" ] (meme `ChIP_Pho4_noPi) ;
item [ "meme_chip" ; "Pho4" ; "noPi" ] (meme_chip `ChIP_Pho4_noPi) ;
item [ "chIP-QC" ; "Pho4" ; "noPi" ] chipqc ;
]

let () =
Repo.build_main
~np:4 ~mem:(`GB 4)
~np ~mem:(`GB 4)
~outdir:"res"
~loggers:[ Console_logger.create () ]
repo
repo
7 changes: 4 additions & 3 deletions examples/zhou2018.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ module Dataset = struct

let alignments d =
Workflow.string "https://ndownloader.figshare.com/files/9473962"
|> Bistro_unix.wget
|> Bistro_unix.wget
|> Bistro_unix.tar_xfj
|> Fn.flip Workflow.select ["single-gene_alignments" ; to_string d ]
|> Workflow.glob ~pattern:"*"

let best_trees d =
Workflow.string "https://ndownloader.figshare.com/files/9473953"
|> Bistro_unix.wget
|> Bistro_unix.wget
|> Bistro_unix.tar_xfj
|> Fn.flip Workflow.select ["single-gene_trees" ; to_string d ; "Best_observed"]
|> Workflow.glob ~pattern:"*"
Expand Down Expand Up @@ -157,6 +157,7 @@ let%pworkflow concat results =

let repo = Repo.[
item ["concatenated_comps_fasttree"] (concat (comparisons `SongD1 `Fasttree)) ;
items ["comps_fasttree"] ~prefix:"tree" (comparisons `SongD1 `Fasttree) ;
]

let () = Repo.build_main ~loggers:[Console_logger.create ()] ~np:8 ~mem:(`GB 4) ~outdir:"res" repo
let () = Repo.build_main ~loggers:[Console_logger.create ()] ~np:4 ~mem:(`GB 4) ~outdir:"res" repo

0 comments on commit f58dcba

Please sign in to comment.