-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
103 changed files
with
6,712 additions
and
4,430 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ depends: [ | |
"dune" {>= "1.11"} | ||
"biocaml" | ||
"biotk" | ||
"tyxml" | ||
] | ||
build: [ | ||
["dune" "subst"] {pinned} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
open Core_kernel | ||
open Bistro | ||
open Bistro.Shell_dsl | ||
open Formats | ||
|
||
type 'a sample = { | ||
id : string ; | ||
tissue : string ; | ||
factor : string ; | ||
replicate : string ; | ||
bam : [`indexed_bam] directory ; | ||
peaks : (#bed3 as 'a) file ; | ||
} | ||
|
||
let img = [ docker_image ~account:"pveber" ~name:"bioconductor" ~tag:"3.8" () ] | ||
|
||
let sample_sheet samples = | ||
let header = string "SampleID,Tissue,Factor,Replicate,bamReads,Peaks" in | ||
let line { id ; tissue ; factor ; replicate ; bam ; peaks } = | ||
seq ~sep:"," [ string id ; string tissue ; string factor ; string replicate ; dep (Samtools.indexed_bam_to_bam bam) ; dep peaks ] | ||
in | ||
seq ~sep:"\n" (header :: List.map ~f:line samples) | ||
|
||
let rscript sample_sheet = | ||
seq ~sep:"\n" [ | ||
string "library(ChIPQC)" ; | ||
seq ~sep:"" [ | ||
string {|samples = read.csv("|} ; | ||
file_dump sample_sheet ; | ||
string {|")|} ; | ||
] ; | ||
string "experiment = ChIPQC(samples)" ; | ||
seq ~sep:"" [ | ||
string {|ChIPQCreport(experiment,facet=F,reportFolder="|} ; | ||
dest ; | ||
string {|")|} ; | ||
] | ||
] | ||
|
||
let run samples = | ||
Workflow.shell ~descr:"ChIPQC" ~img [ | ||
cmd "Rscript" [ file_dump (rscript (sample_sheet samples)) ] ; | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
open Bistro | ||
open Formats | ||
|
||
type 'a sample = { | ||
id : string ; | ||
tissue : string ; | ||
factor : string ; | ||
replicate : string ; | ||
bam : [`indexed_bam] directory ; | ||
peaks : (#bed3 as 'a) file ; | ||
} | ||
|
||
val run : 'a sample list -> [`ChIPQC] directory | ||
(** Beware: doesn't work with only one sample (see | ||
https://support.bioconductor.org/p/84754/) *) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,224 @@ | ||
open Core_kernel | ||
open Bistro | ||
open Bistro.Shell_dsl | ||
|
||
class type table = object | ||
inherit tsv | ||
method header : [`yes] | ||
end | ||
|
||
type output = | ||
< | ||
comparison_summary : table file ; | ||
comparisons : ((string * string * string) * table file) list ; | ||
effect_table : table file ; | ||
normalized_counts : table file ; | ||
sample_clustering : svg file ; | ||
sample_pca : svg file ; | ||
directory : [`DESeq2] directory ; | ||
> | ||
|
||
let img = [ docker_image ~account:"pveber" ~name:"bioconductor" ~tag:"3.3" () ] | ||
|
||
let wrapper_script = {| | ||
library(DESeq2) | ||
library(gplots) | ||
library(RColorBrewer) | ||
|
||
### DATA PROCESSING | ||
loadCounts <- function(sample_files) { | ||
loadFile <- function(fn) { | ||
d <- read.table(fn,header=F,sep='\t') | ||
d[!grepl("^__",d[,1]), 2] #remove HTSEQ sum counts | ||
} | ||
sapply(sample_files,loadFile) | ||
} | ||
|
||
loadIds <- function(sample_files) { | ||
d <- read.table(sample_files[1],header=F,sep='\t') | ||
d[!grepl("^__",d[,1]), 1] | ||
} | ||
|
||
differentialAnalysis <- function(counts, description) { | ||
DESeq(DESeqDataSetFromMatrix(countData=counts, | ||
colData=description, | ||
design=as.formula(paste("~", paste(colnames(description),collapse=" + ")))), | ||
fitType='local') | ||
} | ||
|
||
my.summary.results <- function(object) { | ||
alpha <- 0.1 | ||
notallzero <- sum(object$baseMean > 0) | ||
up <- sum(object$padj < alpha & object$log2FoldChange > 0, | ||
na.rm = TRUE) | ||
down <- sum(object$padj < alpha & object$log2FoldChange < | ||
0, na.rm = TRUE) | ||
filt <- sum(!is.na(object$pvalue) & is.na(object$padj)) | ||
outlier <- sum(object$baseMean > 0 & is.na(object$pvalue)) | ||
c(notallzero, up, down, filt, outlier) | ||
} | ||
|
||
### OUTPUT | ||
outputForAllComparisons <- function(description, outdir, factor_names, ids, dds) { | ||
recap <- data.frame(gene = ids) | ||
stats <- data.frame(comparison = character(0), | ||
expressed = integer(0), | ||
up = integer(0), | ||
down = integer(0), | ||
filt = integer(0), | ||
outlier = integer(0), stringsAsFactors=F) | ||
for(f in factor_names) { | ||
l <- unique(description[,f]) | ||
for(i in 1:length(l)) | ||
for(j in if(i+1 > length(l)) c() else (i + 1):length(l)) { | ||
label <- paste0(f,"_",l[i],"_",l[j]) | ||
|
||
res <- results(dds,contrast=c(f,as.character(l[i]),as.character(l[j]))) | ||
|
||
fn <- paste0(outdir,"/results_",label,".tsv") | ||
write.table(cbind(data.frame(id = ids), res),file=fn,row.names=F,sep='\t',quote=F) | ||
|
||
recap[,paste0(label,"_l2fc")] <- res[,"log2FoldChange"] | ||
recap[,paste0(label,"_padj")] <- res[,"padj"] | ||
|
||
stats[dim(stats)[1]+1,] <- c(label,my.summary.results(res)) | ||
|
||
svg(paste0(outdir,"/MA_plot_",label,".svg"), width=7, height=3.5) | ||
plotMA(res,main=label) | ||
dev.off() | ||
} | ||
} | ||
|
||
write.table(recap, file = paste0(outdir,"/recap.tsv"),row.names=F,sep='\t',quote=F) | ||
write.table(stats, file = paste0(outdir,"/summary.tsv"),row.names=F,sep='\t',quote=F) | ||
} | ||
|
||
|
||
generalPlots <- function(description, outdir, factor_names, ids, dds) { | ||
rld <- rlog(dds) | ||
rldMat <- assay(rld) | ||
rldDist <- dist(t(rldMat)) | ||
mat <- as.matrix(rldDist) | ||
rownames(mat) <- colnames(mat) <- apply(description, 1, paste, collapse = ":") | ||
hc <- hclust(rldDist) | ||
hmcol <- colorRampPalette(brewer.pal(9,"GnBu"))(100) | ||
|
||
svg(paste0(outdir,"/sample_clustering.svg")) | ||
heatmap.2(mat, Rowv = as.dendrogram(hc), symm = TRUE, trace = "none", col = rev(hmcol), margin = c(13,13)) | ||
dev.off() | ||
|
||
svg(paste0(outdir,"/sample_pca.svg")) | ||
print(plotPCA(rld, intgroup = factor_names)) | ||
dev.off() | ||
|
||
counts <- cbind(ids,as.data.frame(counts(dds,normalized=T))) | ||
write.table(counts, file = paste0(outdir,"/normalized_counts.tsv"),row.names=F,sep='\t',quote=F,col.names=F) | ||
} | ||
|
||
main <- function(outdir, factor_names, sample_files, conditions) { | ||
description <- as.data.frame(conditions) | ||
colnames(description) <- factor_names | ||
rownames(description) <- NULL | ||
ids <- loadIds(sample_files) | ||
counts <- loadCounts(sample_files) | ||
dds <- differentialAnalysis(counts, description) | ||
system(paste("mkdir -p", outdir)) | ||
outputForAllComparisons(description, outdir, factor_names, ids, dds) | ||
generalPlots(description, outdir, factor_names, ids, dds) | ||
} | ||
|} | ||
|
||
let app fn args = | ||
seq ~sep:"" [ | ||
string fn ; string "(" ; | ||
seq ~sep:"," args ; | ||
string ")" ; | ||
] | ||
|
||
let app' fn args = | ||
let arg (k, v) = seq ~sep:"=" [ string k ; v ] in | ||
seq ~sep:"" [ | ||
string fn ; string "(" ; | ||
list arg ~sep:"," args ; | ||
string ")" ; | ||
] | ||
|
||
let script factors samples = | ||
seq ~sep:"\n" [ | ||
string wrapper_script ; | ||
app "main" [ | ||
quote dest ~using:'"' ; | ||
app "c" (List.map ~f:(string % quote ~using:'"') factors) ; | ||
app "c" (List.map ~f:(snd % dep % quote ~using:'"') samples) ; | ||
app' "matrix" [ | ||
"data", app "c" (List.map ~f:fst samples | ||
|> List.concat | ||
|> List.map ~f:(string % quote ~using:'"')) ; | ||
"ncol", int (List.length factors) ; | ||
"byrow", string "T" ; | ||
] | ||
] | ||
] | ||
|
||
|
||
let wrapper factors samples = | ||
Workflow.shell ~descr:"deseq2.wrapper" ~img [ | ||
cmd "Rscript" [ file_dump (script factors samples) ] ; | ||
] | ||
|
||
(* | ||
remove duplicates *and* keep original order | ||
not tail-recursive and quadratic complexity | ||
*) | ||
let unique xs = | ||
let rec aux seen = function | ||
| [] -> [] | ||
| h :: t -> | ||
if List.mem seen h ~equal:Poly.( = ) then | ||
aux seen t | ||
else | ||
h :: aux (h :: seen) t | ||
in | ||
aux [] xs | ||
|
||
let rec fold_2sets xs ~init ~f = | ||
match xs with | ||
| [] -> init | ||
| h :: t -> | ||
let next = List.fold_left t ~init ~f:(fun accu g -> f accu h g) in | ||
fold_2sets t ~init:next ~f | ||
|
||
let factor_levels factor_names conditions = | ||
List.fold_right | ||
conditions | ||
~init:(List.map factor_names ~f:(fun _ -> [])) | ||
~f:(fun cond accu -> List.map2_exn cond accu ~f:(fun c cs -> c :: cs)) | ||
|> List.map ~f:unique | ||
|
||
let comparisons factor_names conditions = | ||
let factor_levels = factor_levels factor_names conditions in | ||
List.map2_exn factor_names factor_levels ~f:(fun name levels -> | ||
fold_2sets levels ~init:[] ~f:(fun accu l1 l2 -> | ||
(name, l1, l2) :: accu | ||
) | ||
|> List.rev | ||
) | ||
|> List.concat | ||
|
||
|
||
let main_effects factors samples = | ||
let o = wrapper factors samples in | ||
let sel p = Workflow.select o p in | ||
object | ||
method sample_clustering = sel [ "sample_clustering.svg" ] | ||
method sample_pca = sel [ "sample_pca.svg" ] | ||
method normalized_counts = sel [ "normalized_counts.tsv" ] | ||
method comparison_summary = sel [ "summary.tsv" ] | ||
method effect_table = sel [ "recap.tsv" ] | ||
method comparisons = | ||
let conditions = List.map samples ~f:fst in | ||
List.map (comparisons factors conditions) ~f:(fun ((name, l1, l2) as comp) -> | ||
comp, sel [ sprintf "results_%s_%s_%s.tsv" name l1 l2 ] | ||
) | ||
method directory = o | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
open Bistro | ||
|
||
val img : container_image list | ||
|
||
class type table = object | ||
inherit tsv | ||
method header : [`yes] | ||
end | ||
|
||
type output = | ||
< | ||
comparison_summary : table file ; | ||
comparisons : ((string * string * string) * table file) list ; | ||
effect_table : table file ; | ||
normalized_counts : table file ; | ||
sample_clustering : svg file ; | ||
sample_pca : svg file ; | ||
directory : [`DESeq2] directory ; | ||
> | ||
|
||
val main_effects : | ||
string list -> | ||
(string list * #Htseq.count_tsv file) list -> | ||
output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
open Bistro | ||
open Bistro.Shell_dsl | ||
|
||
type report = [`fastQC] directory | ||
|
||
let img = [ docker_image ~account:"pveber" ~name:"fastqc" ~tag:"0.11.8" () ] | ||
|
||
module Cmd = struct | ||
let fastqc x = [ | ||
mkdir_p dest ; | ||
cmd "fastqc" [ | ||
seq ~sep:"" [ string "--outdir=" ; dest ] ; | ||
( | ||
match x with | ||
| `fq fq -> dep fq | ||
| `fq_gz fq_gz -> Bistro_unix.Cmd.gzdep fq_gz | ||
) | ||
] ; | ||
and_list [ | ||
cd dest ; | ||
cmd "unzip" [ string "*_fastqc.zip" ] ; | ||
cmd "mv" [ string "*_fastqc/*" ; string "." ] | ||
] ; | ||
] | ||
end | ||
|
||
let fastqc fq = Workflow.shell ~descr:"fastQC" ~img (Cmd.fastqc (`fq fq)) | ||
|
||
let fastqc_gz fq_gz = Workflow.shell ~descr:"fastQC" ~img (Cmd.fastqc (`fq_gz fq_gz)) | ||
|
||
let html_report x = Workflow.select x ["fastqc_report.html"] | ||
|
||
let per_base_quality x = | ||
Workflow.select x ["Images" ; "per_base_quality.png"] | ||
|
||
let per_base_sequence_content x = | ||
Workflow.select x ["Images" ; "per_base_sequence_content.png"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
open Bistro | ||
open Formats | ||
|
||
type report = [`fastQC] directory | ||
|
||
val fastqc : #fastq file -> report | ||
val fastqc_gz : #fastq gz file -> report | ||
val html_report : report -> html file | ||
val per_base_quality : report -> png file | ||
val per_base_sequence_content : report -> png file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
type 'a t = | ||
| Single_end of 'a | ||
| Paired_end of 'a * 'a | ||
|
||
let map x ~f = match x with | ||
| Single_end x -> Single_end (f x) | ||
| Paired_end (x, y) -> Paired_end (f x, f y) | ||
|
||
let fst = function | ||
| Single_end x | ||
| Paired_end (x, _) -> x |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
type 'a t = | ||
| Single_end of 'a | ||
| Paired_end of 'a * 'a | ||
|
||
val map : 'a t -> f:('a -> 'b) -> 'b t | ||
val fst : 'a t -> 'a |
Oops, something went wrong.