Skip to content

Commit

Permalink
Merge pull request #67 from sanjaynagi/move-plots-to-quant-26-11-22
Browse files Browse the repository at this point in the history
mv plots to quant

Former-commit-id: 5cc2b84
  • Loading branch information
sanjaynagi committed Nov 26, 2022
2 parents 9929070 + 6fd4915 commit aea0f8e
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 37 deletions.
2 changes: 1 addition & 1 deletion workflow/rules/alignment.smk
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ rule KallistoQuant:
fastq=lambda wildcards: getFASTQs(wildcards=wildcards, rules="KallistoQuant"),
index="resources/reference/kallisto.idx",
output:
directory("results/quant/{sample}"),
directory("results/counts/{sample}"),
group:
"diffexp"
log:
Expand Down
16 changes: 8 additions & 8 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def GetDesiredOutputs(wildcards):
[
"results/.input.check",
"results/alignments/{sample}_stats/genome_results.txt",
"results/multiQC.html",
"results/qc/multiQC.html",
],
sample=samples,
)
Expand All @@ -111,7 +111,7 @@ def GetDesiredOutputs(wildcards):
wanted_input.extend(
expand(
[
"resources/reads/qc/{sample}_{n}_fastqc.html",
"results/qc/{sample}_{n}_fastqc.html",
],
sample=samples,
n=[1, 2],
Expand All @@ -121,7 +121,7 @@ def GetDesiredOutputs(wildcards):
wanted_input.extend(
expand(
[
"resources/reads/qc/{sample}_{n}_fastqc.html",
"results/qc/{sample}_{n}_fastqc.html",
],
sample=samples,
n=1
Expand All @@ -133,8 +133,8 @@ def GetDesiredOutputs(wildcards):
wanted_input.extend(
expand(
[
"results/alignments/coverage/{sample}.mosdepth.summary.txt",
"results/alignments/bamStats/{sample}.flagstat",
"results/qc/coverage/{sample}.mosdepth.summary.txt",
"results/qc/alignments/{sample}.flagstat",
],
sample=samples,
)
Expand All @@ -150,8 +150,8 @@ def GetDesiredOutputs(wildcards):
"results/genediff/{dataset}_diffexp.xlsx",
"results/isoformdiff/{comp}.csv",
"results/isoformdiff/{dataset}_isoformdiffexp.xlsx",
"results/plots/PCA.pdf",
"results/quant/countStatistics.tsv",
"results/counts/PCA.pdf",
"results/counts/countStatistics.tsv",
],
comp=config["contrasts"],
dataset=config["dataset"],
Expand Down Expand Up @@ -187,7 +187,7 @@ def GetDesiredOutputs(wildcards):
wanted_input.extend(
expand(
[
"results/variantAnalysis/vcfs/stats/{contig}.txt",
"results/qc/vcfs/{contig}.txt",
"results/variantAnalysis/pca/PCA-{contig}-{dataset}.svg",
"results/variantAnalysis/SNPstats/snpsPerGenomicFeature.tsv",
"results/variantAnalysis/SNPstats/nSNPsPerGene.tsv",
Expand Down
12 changes: 6 additions & 6 deletions workflow/rules/diffexp.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ rule DifferentialGeneExpression:
input:
metadata=config["metadata"],
genes2transcripts=config["ref"]["genes2transcripts"],
counts=expand("results/quant/{sample}", sample=samples),
counts=expand("results/counts/{sample}", sample=samples),
output:
csvs=expand("results/genediff/{comp}.csv", comp=config["contrasts"]),
xlsx=expand(
"results/genediff/{dataset}_diffexp.xlsx", dataset=config["dataset"]
),
pca="results/plots/PCA.pdf",
countStats="results/quant/countStatistics.tsv",
normCounts="results/quant/normCounts.tsv",
pca="results/counts/PCA.pdf",
countStats="results/counts/countStatistics.tsv",
normCounts="results/counts/normCounts.tsv",
group:
"diffexp"
priority: 20
Expand All @@ -43,7 +43,7 @@ rule DifferentialIsoformExpression:
input:
metadata=config["metadata"],
genes2transcripts=config["ref"]["genes2transcripts"],
counts=expand("results/quant/{sample}", sample=samples),
counts=expand("results/counts/{sample}", sample=samples),
output:
csvs=expand("results/isoformdiff/{comp}.csv", comp=config["contrasts"]),
xlsx=expand(
Expand Down Expand Up @@ -184,7 +184,7 @@ rule Ag1000gSweepsDE:
rule geneFamilies:
input:
genediff=expand("results/genediff/{comp}.csv", comp=config["contrasts"]),
normcounts="results/quant/normCounts.tsv",
normcounts="results/counts/normCounts.tsv",
eggnog=config["miscellaneous"]["GeneFamiliesHeatmap"]["eggnog"],
pfam=config["miscellaneous"]["GeneFamiliesHeatmap"]["pfam"],
output:
Expand Down
24 changes: 12 additions & 12 deletions workflow/rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ rule FastQC:
input:
lambda wildcards:getFASTQs(wildcards=wildcards, rules='fastqc'),
output:
html="resources/reads/qc/{sample}_{n}_fastqc.html",
zip="resources/reads/qc/{sample}_{n}_fastqc.zip",
html="results/qc/{sample}_{n}_fastqc.html",
zip="results/qc/{sample}_{n}_fastqc.zip",
log:
"logs/FastQC/{sample}_{n}_QC.log",
params:
Expand All @@ -49,7 +49,7 @@ rule cutAdapt:
output:
fastq1="resources/reads/trimmed/{sample}_1.fastq.gz",
fastq2="resources/reads/trimmed/{sample}_2.fastq.gz" if config['fastq']['paired'] else [],
qc="resources/trimmed/{sample}.qc.txt",
qc="results/qc/cutadapt/{sample}.qc.txt",
params:
# https://cutadapt.readthedocs.io/en/stable/guide.html#adapter-types
adapters=config["cutadapt"]["adaptors"], #"-a AGAGCACACGTCTGAACTCCAGTCAC -g AGATCGGAAGAGCACACGT -A AGAGCACACGTCTGAACTCCAGTCAC -G AGATCGGAAGAGCACACGT",
Expand All @@ -70,7 +70,7 @@ rule BamStats:
bam="results/alignments/{sample}.bam",
idx="results/alignments/{sample}.bam.bai",
output:
stats="results/alignments/bamStats/{sample}.flagstat",
stats="results/qc/alignments/{sample}.flagstat",
log:
"logs/BamStats/{sample}.log",
wrapper:
Expand All @@ -85,7 +85,7 @@ rule Coverage:
bam="results/alignments/{sample}.bam",
idx="results/alignments/{sample}.bam.bai",
output:
"results/alignments/coverage/{sample}.mosdepth.summary.txt",
"results/qc/coverage/{sample}.mosdepth.summary.txt",
log:
"logs/Coverage/{sample}.log",
conda:
Expand All @@ -108,7 +108,7 @@ rule vcfStats:
dataset=config["dataset"],
),
output:
vcfStats="results/variantAnalysis/vcfs/stats/{contig}.txt",
vcfStats="results/qc/vcfs/{contig}.txt",
conda:
"../envs/variants.yaml"
log:
Expand Down Expand Up @@ -142,19 +142,19 @@ rule multiQC:
Integrate QC statistics from other tools into a final .html report
"""
input:
expand("resources/reads/qc/{sample}_{n}_fastqc.zip", sample=samples, n=[1, 2]),
expand("results/qc/{sample}_{n}_fastqc.zip", sample=samples, n=[1, 2]),
expand(
"results/variantAnalysis/vcfs/stats/{contig}.txt", contig=config["contigs"]
"results/qc/vcfs/{contig}.txt", contig=config["contigs"]
)
if config["VariantAnalysis"]["activate"]
else [],
expand(
"results/alignments/coverage/{sample}.mosdepth.summary.txt", sample=samples
"results/qc/coverage/{sample}.mosdepth.summary.txt", sample=samples
),
expand("results/alignments/bamStats/{sample}.flagstat", sample=samples),
expand("results/quant/{sample}", sample=samples),
expand("results/qc/alignments/{sample}.flagstat", sample=samples),
expand("results/counts/{sample}", sample=samples),
output:
"results/multiQC.html",
"results/qc/multiQC.html",
params:
"results/ resources/ logs/", # Optional: extra parameters for multiqc.
log:
Expand Down
18 changes: 9 additions & 9 deletions workflow/scripts/DeseqGeneDE.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ vst_pca = function(counts, samples, colourvar, name="PCA", st="", comparison="")
pc = data.frame(pca2$x) %>% rownames_to_column("sampleID")
pc = left_join(pc, samples)

pdf(glue("results/plots/{name}{st}{comparison}.pdf"))
pdf(glue("results/counts/{name}{st}{comparison}.pdf"))
print(ggplot(data=pc,aes(x=PC1, y=PC2, colour=treatment)) +
geom_point(size=6, alpha=0.8) +
geom_text_repel(aes(label=sampleID), colour="black") +
Expand Down Expand Up @@ -114,7 +114,7 @@ cat("\n", "------------- Kallisto - DESeq2 - RNASeq Differential expression ----
#### Read counts for each sample
df = list()
for (sample in metadata$sampleID){
df[[sample]]= fread(glue("results/quant/{sample}/abundance.tsv"), sep = "\t")
df[[sample]]= fread(glue("results/counts/{sample}/abundance.tsv"), sep = "\t")
}

counts = data.frame('TranscriptID' = df[[1]]$target_id)
Expand All @@ -141,10 +141,10 @@ count_stats$genes_zerocounts = apply(counts, 2, function(x){sum(x==0)}) # genes
count_stats$genes_lessthan10counts = apply(counts, 2, function(x){sum(x<10)}) # genes with less than 10 counts
count_stats = count_stats %>% dplyr::mutate("proportion_zero" = genes_zerocounts/ngenes,
"proportion_low" = genes_lessthan10counts/ngenes)
count_stats %>% fwrite(., "results/quant/countStatistics.tsv",sep="\t")
count_stats %>% fwrite(., "results/counts/countStatistics.tsv",sep="\t")

print("Counting and plotting total reads per sample...")
pdf("results/quant/total_reads_counted.pdf")
pdf("results/counts/total_reads_counted.pdf")
ggplot(count_stats, aes(x=Sample, y=total_counts, fill=metadata$treatment)) +
geom_bar(stat='identity') +
theme_light() +
Expand All @@ -166,17 +166,17 @@ normcounts = res[[3]]
### write out raw and normalised counts
counts %>%
rownames_to_column("GeneID") %>%
fwrite(., "results/quant/rawcounts.tsv", sep="\t", row.names = FALSE)
fwrite(., "results/counts/rawcounts.tsv", sep="\t", row.names = FALSE)

normcounts %>%
as.data.frame() %>%
rownames_to_column("GeneID") %>%
round_df(., 1) %>%
fwrite(., "results/quant/normCounts.tsv", sep="\t", row.names = FALSE)
fwrite(., "results/counts/normCounts.tsv", sep="\t", row.names = FALSE)

# calculate correlations between samples based on the count data, and plot heatmap
correlations = cor(vstcounts)
pdf("results/plots/heatmap_correlations.pdf")
pdf("results/counts/heatmap_correlations.pdf")
pheatmap(correlations)
garbage = dev.off()

Expand Down Expand Up @@ -294,7 +294,7 @@ totalAligned = c()

# open kallisto json info and store stats, save to file
for (sample in metadata$sampleID){
run = fromJSON(glue("results/quant/{sample}/run_info.json"), flatten=TRUE)
run = fromJSON(glue("results/counts/{sample}/run_info.json"), flatten=TRUE)

totalReads = c(totalReads, run$n_processed)
totalAligned = c(totalAligned, run$n_pseudoaligned)
Expand All @@ -305,7 +305,7 @@ df = data.frame("sample" = c(metadata$sampleID, "Total"),
"totalReads" = c(totalReads, sum(totalReads)),
"totalAligned" = c(totalAligned, sum(totalAligned))) %>%
mutate("percentage" = (totalAligned/totalReads)*100) %>%
fwrite("results/quant/KallistoQuantSummary.tsv", sep="\t", col.names = TRUE)
fwrite("results/counts/KallistoQuantSummary.tsv", sep="\t", col.names = TRUE)


sessionInfo()
2 changes: 1 addition & 1 deletion workflow/scripts/SleuthIsoformsDE.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ metadata = fread(snakemake@input[['metadata']], sep="\t") %>%
dplyr::rename('sample' = "sampleID")

#add path column for sleuth object
metadata$path = paste0("results/quant/", metadata$sample)
metadata$path = paste0("results/counts/", metadata$sample)

#read metadata and get contrasts
gene_names = fread(snakemake@input[['genes2transcripts']], sep="\t") %>% select(-GeneID)
Expand Down

0 comments on commit aea0f8e

Please sign in to comment.