Skip to content

Commit

Permalink
skip QC, but make report
Browse files Browse the repository at this point in the history
  • Loading branch information
SilasK committed Mar 18, 2019
1 parent 10443f4 commit 70fc0ba
Show file tree
Hide file tree
Showing 5 changed files with 282 additions and 231 deletions.
6 changes: 3 additions & 3 deletions .test/dryrun.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ rm -fr $WD
atlas download --db-dir $databaseDir -n


atlas init --db-dir $databaseDir --threads 3 -w $WD example_data
atlas init --db-dir $databaseDir --threads 3 -w $WD --assembler spades example_data


for w in qc assembly genomes genecatalog ; do
Expand All @@ -39,8 +39,8 @@ atlas run -w $WD --dryrun $@
# skip QC

WD=${WD}_noQC

atlas init --db-dir $databaseDir --threads 3 --skip-qc -w $WD example_data
rm -fr $WD
atlas init --db-dir $databaseDir --threads 3 --skip-qc -w $WD --assembler megahit example_data


for w in assembly genomes genecatalog ; do
Expand Down
33 changes: 16 additions & 17 deletions atlas/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,16 @@ RAW_INPUT_FRACTIONS = ['R1', 'R2'] if PAIRED_END else ['se']
colum_headers_QC= sampleTable.columns[sampleTable.columns.str.startswith("Reads_QC_")]
if len(colum_headers_QC)>=1:
MULTIFILE_FRACTIONS= list(colum_headers_QC.str.replace('Reads_QC_',''))
SKIP_QC=True

if (len(MULTIFILE_FRACTIONS)==1 ) and config.get('interleaved_fastqs',False):
raise NotImplementedError("To start from QC reads that are interleaved is not implemented."
"Do deinterleave the fastq files with e.g."
"reformat.sh in=file.fastq.gz out1=file_R1.fastq.gz out2=file_R2.fastq.gz"
)
else:
MULTIFILE_FRACTIONS = ['R1', 'R2', 'se'] if PAIRED_END else ['se']

SKIP_QC=False


class FileNotInSampleTableException(Exception):
Expand Down Expand Up @@ -158,21 +165,6 @@ def get_quality_controlled_reads(wildcards):



def get_input_fastq(wildcards):

if not config.get('interleaved_fastqs',False):
Raw_Headers= ['Reads_raw_'+f for f in RAW_INPUT_FRACTIONS]
else:
Raw_Headers= ['Reads_raw_se']

if not (wildcards.sample in sampleTable.index):
return expand("Impossible/file/{sample}_{fraction}.fastq.gz",sample=wildcards.sample,fraction=RAW_INPUT_FRACTIONS)
logger.debug(f"Searched for qc reads for inexisitng sample. wildcards: {wildcards}")
else:
try:
return get_files_from_sampleTable(wildcards.sample,Raw_Headers)
except FileNotInSampleTableException:
raise Exception(f"Raw files are not in sampleTable. Are you trying to run QC on files that are already quality controlled?\nsample: {wildcards.sample}\nHeaders: {Raw_Headers}")



Expand All @@ -184,6 +176,8 @@ include: "rules/binning.snakefile"
include: "rules/genecatalog.snakefile"
include: "rules/cat_taxonomy.smk"



CONDAENV = "envs" # overwrite definition in download.smk

localrules: all, qc, assembly_one_sample, assembly, genomes
Expand Down Expand Up @@ -256,7 +250,9 @@ rule assembly:
rule qc:
input:
expand("{sample}/sequence_quality_control/finished_QC", sample=SAMPLES),
"reports/QC_report.html",
read_counts = "stats/read_counts.tsv",
read_length_stats = ['stats/insert_stats.tsv', 'stats/read_length_stats.tsv'] if PAIRED_END else 'stats/read_length_stats.tsv',
report= "reports/QC_report.html",
output:
temp(touch("finished_QC"))

Expand All @@ -265,6 +261,9 @@ rule qc:






# overwrite commands in rules/download.snakefile
onsuccess:
print("ATLAS finished")
Expand Down
44 changes: 30 additions & 14 deletions atlas/report/qc_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,19 @@ def draw_se_read_quality(df,quality_range,**kwargs):



def main(report_out, read_counts, zipfiles_raw,zipfiles_QC, min_quality):
def main(report_out, read_counts,zipfiles_QC, min_quality,zipfiles_raw=None):
div = {}

# N reads / N bases
df = pd.read_table(read_counts, index_col=[0, 1])
for variable in ['Total_Reads','Total_Bases']:
data = df[variable].unstack()[df.loc[df.index[0][0]].index.drop('clean')]


data = df[variable].unstack()[df.loc[df.index[0][0]].index]

if 'clean' in data.columns:
data.drop('clean',axis=1,inplace=True)

div[variable] = offline.plot(
data.iplot(
asFigure=True,
Expand Down Expand Up @@ -183,20 +189,30 @@ def main(report_out, read_counts, zipfiles_raw,zipfiles_QC, min_quality):
{div[quality_qc_pe]}
"""

if Quality_se.shape[0] > 0:

div['quality_qc_se'] = draw_se_read_quality(Quality_se,[min_quality,max_quality])
Report_read_quality_qc += """
Single end
**********
Paired end reads that lost their mate during filtering.
"""
div['quality_qc_se'] = draw_se_read_quality(Quality_se,[min_quality,max_quality])
Report_read_quality_qc += """
.. raw:: html
{div[quality_qc_se]}
"""
Report_read_quality_raw = """

if zipfiles_raw is None:
Report_read_quality_raw=""
else:


Report_read_quality_raw = """
Reads quality before QC
~~~~~~~~~~~~~~~~~~~~~~~
Expand All @@ -206,13 +222,13 @@ def main(report_out, read_counts, zipfiles_raw,zipfiles_QC, min_quality):
{div[quality_raw]}
"""
Quality_pe, Quality_se = get_stats_from_zips(zipfiles_raw)
if Quality_pe.shape[0] > 0:
div['quality_raw'] = get_pe_read_quality_plot(Quality_pe,[min_quality,max_quality])
elif Quality_se.shape[0] > 0:
div['quality_raw'] = draw_se_read_quality(Quality_se,[min_quality,max_quality])
else:
raise IndexError()
Quality_pe, Quality_se = get_stats_from_zips(zipfiles_raw)
if Quality_pe.shape[0] > 0:
div['quality_raw'] = get_pe_read_quality_plot(Quality_pe,[min_quality,max_quality])
elif Quality_se.shape[0] > 0:
div['quality_raw'] = draw_se_read_quality(Quality_se,[min_quality,max_quality])
else:
raise IndexError()

report_str = """
Expand Down Expand Up @@ -261,7 +277,7 @@ def main(report_out, read_counts, zipfiles_raw,zipfiles_QC, min_quality):
try:
main(report_out=snakemake.output.report,
read_counts=snakemake.input.read_counts,
zipfiles_raw=snakemake.input.zipfiles_raw,
zipfiles_raw= snakemake.input.zipfiles_raw if hasattr(snakemake.input,'zipfiles_raw') else None,
zipfiles_QC=snakemake.input.zipfiles_QC,
min_quality=snakemake.params.min_quality,
)
Expand Down
2 changes: 1 addition & 1 deletion atlas/rules/binning.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ rule align_reads_to_MAGs:
fraction=MULTIFILE_FRACTIONS)
params:
input = lambda wc, input : input_params_for_bbwrap( input.reads),
unmapped = lambda wc, output: "outu1={0},{2} outu2={1},null".format(*output.unmapped) if PAIRED_END else "outu={0}".format(*output.unmapped),
unmapped = lambda wc, output: io_params_for_tadpole(output.unmapped,key='outu' ),
maxsites = config.get("maximum_counted_map_sites", MAXIMUM_COUNTED_MAP_SITES),
max_distance_between_pairs = config.get('contig_max_distance_between_pairs', CONTIG_MAX_DISTANCE_BETWEEN_PAIRS),
paired_only = 't' if config.get("contig_map_paired_only", CONTIG_MAP_PAIRED_ONLY) else 'f',
Expand Down

0 comments on commit 70fc0ba

Please sign in to comment.