diff --git a/qp_pacbio/data/resources.yaml b/qp_pacbio/data/resources.yaml index bf8d034..2dda60d 100644 --- a/qp_pacbio/data/resources.yaml +++ b/qp_pacbio/data/resources.yaml @@ -62,11 +62,11 @@ Remove SynDNA plasmid, insert, & GCF_000184185 reads (minimap2): syndna: node_count: 1 nprocs: 16 - wall_time_limit: 10:00:00 - mem_in_gb: 60 + wall_time_limit: 4:00:00 + mem_in_gb: 20 max_tasks: 16 finish: node_count: 1 - nprocs: 16 - wall_time_limit: 1-00:00:00 - mem_in_gb: 120 + nprocs: 1 + wall_time_limit: 4:00:00 + mem_in_gb: 4 diff --git a/qp_pacbio/data/templates/syndna.sbatch b/qp_pacbio/data/templates/syndna.sbatch index 9bcbc57..cf9f359 100644 --- a/qp_pacbio/data/templates/syndna.sbatch +++ b/qp_pacbio/data/templates/syndna.sbatch @@ -5,15 +5,15 @@ #SBATCH -n {{nprocs}} #SBATCH --time {{wall_time_limit}} #SBATCH --mem {{mem_in_gb}}G -#SBATCH -o {{output}}/minimap2/logs/%x-%A_%a.out -#SBATCH -e {{output}}/minimap2/logs/%x-%A_%a.err +#SBATCH -o {{output}}/syndna/logs/%x-%A_%a.out +#SBATCH -e {{output}}/syndna/logs/%x-%A_%a.err #SBATCH --array {{array_params}} source ~/.bashrc set -e {{conda_environment}} out_folder={{output}}/syndna -mkdir -p +mkdir -p ${out_folder} cd ${out_folder} db_folder=/scratch/qp-pacbio/minimap2/syndna/ @@ -28,23 +28,49 @@ mkdir -p ${out_folder}/filtered/ sn_folder=${out_folder}/bioms/${sample_name} mkdir -p ${sn_folder} +txt=${sn_folder}/${sample_name}.txt +tsv=${txt/.txt/.tsv} coverm contig --single $filename --reference ${db_folder}/All_synDNA_inserts.fasta --mapper minimap2-hifi \ --min-read-percent-identity 0.95 --min-read-aligned-percent 0.0 -m mean count --threads {{nprocs}} \ - --output-file ${sn_folder}/${sample_name}.txt -cat ${sn_folder}/${sample_name}_insert_counts.txt | sed 's/Contig/\#OTU ID/' | \ - sed 's/ Read Count//' > ${sn_folder}/${sample_name}.tsv -biom convert -i ${sn_folder}/${sample_name}.txt -o ${sn_folder}/${sample_name}.biom --to-hdf5 + --output-file ${txt} + +awk 'BEGIN {FS=OFS="\t"}; {print $1,$3}' ${txt} | \ + sed 's/Contig/\#OTU ID/' | sed 's/All_synDNA_inserts.fasta\///' | \ + sed 's/ Read Count//' | sed "s/${fn}/${sample_name}/" > ${tsv} + +# if counts is zero mark it as missing and stop +counts=`tail -n +2 ${tsv} | awk '{sum += $NF} END {print sum}'` +if [[ "$counts" == "0" ]]; then + echo ${sample_name} > {{output}}/failed_${SLURM_ARRAY_TASK_ID}.log + exit 0 +fi + +biom convert -i ${tsv} -o ${sn_folder}/syndna.biom --to-hdf5 # removing AllsynDNA_plasmids_FASTA_ReIndexed_FINAL.fasta not coverm +# ---- original commands ---- +# minimap2 -x map-hifi -t {{nprocs}} -a --MD --eqx -o ${out_folder}/${sample_name}_plasmid.sam ${db_folder}/AllsynDNA_plasmids_FASTA_ReIndexed_FINAL.fasta $filename +# samtools view -F 4 -@ {{nprocs}} ${out_folder}/${sample_name}_plasmid.sam | awk '{print $1}' | sort -u > ${out_folder}/${sample_name}_plasmid_mapped.txt +# seqkit grep -v -f ${out_folder}/${sample_name}_plasmid_mapped.txt $filename > ${out_folder}/${sample_name}_no_plasmid.fastq +# ---- original commands ---- minimap2 -x map-hifi -t {{nprocs}} -a --MD --eqx -o ${out_folder}/${sample_name}_plasmid.sam ${db_folder}/AllsynDNA_plasmids_FASTA_ReIndexed_FINAL.fasta $filename samtools view -F 4 -@ {{nprocs}} ${out_folder}/${sample_name}_plasmid.sam | awk '{print $1}' | sort -u > ${out_folder}/${sample_name}_plasmid_mapped.txt seqkit grep -v -f ${out_folder}/${sample_name}_plasmid_mapped.txt $filename > ${out_folder}/${sample_name}_no_plasmid.fastq # removing GCF_000184185.1_ASM18418v1_genomic_chroso.fna use coverm -minimap2 -x map-hifi -t {{nprocs}} -a --MD --eqx -o ${out_folder}/${sample_name}_GCF_000184185.sam ${db_folder}/GCF_000184185.1_ASM18418v1_genomic_chroso.fna ${out_folder}/${sample_name}_no_plasmid_no_inserts.fastq -samtools view -bS -@ {{ nprocs/2 | int }} ${out_folder}/${sample_name}_no_plasmid_no_inserts.fastq | samtools sort -@ {{ nprocs/2 | int }} -O bam -o ${out_folder}/${sample_name}_GCF_000184185_sorted.sam -coverm filter --bam-files ${out_folder}/${sample_name}_GCF_000184185_sorted.sam --min-read-percent-identity 99.9 --min-read-aligned-percent 95 --threads {{nprocs}} -o ${out_folder}/${sample_name}_GCF_000184185.bam -samtools view -O SAM -o ${out_folder}/${sample_name}_no_GCF_000184185_sorted.sam ${out_folder}/${sample_name}_no_inserts.bam +# ---- original commands ---- +# minimap2 -x map-hifi -t 8 -a --MD --eqx -o reads.sam ecoli_genome.fna reads.fastq +# samtools view -bS -@ 8 reads.fastq | samtools sort -@ 24 -O bam -o reads.sorted.bam +# coverm filter --bam-files reads.sorted.bam --min-read-percent-identity 99.9 --min-read-aligned-percent 95 --threads 8 -o reads_filtered.sorted.bam +# samtools view -O SAM -o reads_filtered.sam ./reads_filtered.sorted.bam +# awk '{print $1}' reads_filtered.sam > reads_filtered.txt +# seqkit grep -v -f reads_filtered.txt reads.fastq > reads_no_ecoli.fastq +# ---- original commands ---- +minimap2 -x map-hifi -t {{nprocs}} -a --MD --eqx -o ${out_folder}/${sample_name}_GCF_000184185.sam ${db_folder}/GCF_000184185.1_ASM18418v1_genomic_chroso.fna ${out_folder}/${sample_name}_no_plasmid.fastq +samtools view -bS -@ {{ nprocs/2 | int }} ${out_folder}/${sample_name}_no_plasmid.fastq | samtools sort -@ {{ nprocs/2 | int }} -O bam -o ${out_folder}/${sample_name}_GCF_000184185_sorted.bam +coverm filter --bam-files ${out_folder}/${sample_name}_GCF_000184185_sorted.bam --min-read-percent-identity 99.9 --min-read-aligned-percent 95 --threads {{nprocs}} -o ${out_folder}/${sample_name}_GCF_000184185.bam +samtools view -O SAM -o ${out_folder}/${sample_name}_no_GCF_000184185_sorted.sam ${out_folder}/${sample_name}_GCF_000184185.bam awk '{print $1}' ${out_folder}/${sample_name}_no_GCF_000184185_sorted.sam > ${out_folder}/${sample_name}_GCF_000184185_reads_filtered.txt -seqkit grep -v -f ${out_folder}/${sample_name}_GCF_000184185_reads_filtered.txt ${out_folder}/${sample_name}_GCF_000184185.fastq | gz > ${out_folder}/filtered/${fn} -awk 'BEGIN {FS=OFS="\t"}; {print $1,$3}' +seqkit grep -v -f ${out_folder}/${sample_name}_GCF_000184185_reads_filtered.txt ${out_folder}/${sample_name}_no_plasmid.fastq | gzip > ${out_folder}/filtered/${fn} + +touch {{output}}/completed_${SLURM_ARRAY_TASK_ID}.log diff --git a/qp_pacbio/data/templates/syndna_finish.sbatch b/qp_pacbio/data/templates/syndna_finish.sbatch index 198e35c..151f976 100644 --- a/qp_pacbio/data/templates/syndna_finish.sbatch +++ b/qp_pacbio/data/templates/syndna_finish.sbatch @@ -5,15 +5,15 @@ #SBATCH -n {{nprocs}} #SBATCH --time {{wall_time_limit}} #SBATCH --mem {{mem_in_gb}}G -#SBATCH -o {{output}}/merge/logs/%x-%A_%a.out -#SBATCH -e {{output}}/merge/logs/%x-%A_%a.err +#SBATCH -o {{output}}/finish/logs/%x-%A_%a.out +#SBATCH -e {{output}}/finish/logs/%x-%A_%a.err source ~/.bashrc set -e {{conda_environment}} cd {{output}}/ -biom_merge_pacbio --base {{output}} --type syndna +biom_merge_pacbio --base {{output}}/syndna --merge-type syndna # find {{output}}/coverages/ -iname "*.cov" > {{output}}/cov_files.txt # micov consolidate --paths {{output}}/cov_files.txt --lengths ${len_map} --output {{output}}/coverages.tgz diff --git a/qp_pacbio/data/templates/woltka_minimap2_merge.sbatch b/qp_pacbio/data/templates/woltka_minimap2_merge.sbatch index 48e8db1..30382ba 100644 --- a/qp_pacbio/data/templates/woltka_minimap2_merge.sbatch +++ b/qp_pacbio/data/templates/woltka_minimap2_merge.sbatch @@ -39,7 +39,7 @@ for f in `ls bioms/*/per-gene.biom`; do done | parallel --halt now,fail=1 -j {{nprocs}} wait -biom_merge_pacbio --base {{output}} --type woltka +biom_merge_pacbio --base {{output}} --merge-type woltka find {{output}}/coverages/ -iname "*.cov" > {{output}}/cov_files.txt micov consolidate --paths {{output}}/cov_files.txt --lengths ${len_map} --output {{output}}/coverages.tgz diff --git a/qp_pacbio/qp_pacbio.py b/qp_pacbio/qp_pacbio.py index ac213ae..0a75328 100644 --- a/qp_pacbio/qp_pacbio.py +++ b/qp_pacbio/qp_pacbio.py @@ -229,6 +229,9 @@ def generate_sample_list(qclient, artifact_id, out_dir): with open(out_fp, "w", encoding="utf-8") as f: f.write("\n".join(lines)) + preparation_information = join(out_dir, "prep_info.tsv") + prep.set_index("sample_name").to_csv(preparation_information, sep="\t") + return len(lines) @@ -454,11 +457,26 @@ def syndna_processing(qclient, job_id, parameters, out_dir): errors = [] ainfo = [] - fp_biom = f"{out_dir}/syndna.biom" + + failures = glob(f"{out_dir}/failed_*.log") + if failures: + errors.append("Samples failed: ") + for f in failures: + with open(f, "r") as fp: + errors.append(fp.read()) + return False, ainfo, "\n".join(errors) + + completed = len(glob(f"{out_dir}/completed_*.log")) + with open(f"{out_dir}/sample_list.txt") as fp: + samples = len(fp.readlines()) + + if completed != samples: + errors.append(f"There are {samples - completed} missing samples.") + + fp_biom = f"{out_dir}/syndna/syndna.biom" # do we need to stor alignments? # fp_alng = f'{out_dir}/sams/final/alignment.tar' - - if exists(fp_biom): # and exists(fp_alng): + if not errors and exists(fp_biom): # and exists(fp_alng): # if we got to this point a preparation file should exist in # the output folder prep = pd.read_csv(f"{out_dir}/prep_info.tsv", index_col=None, sep="\t") @@ -492,7 +510,7 @@ def syndna_processing(qclient, job_id, parameters, out_dir): "contact qiita.help@gmail.com for more information" ) - fp_seqs = f"{out_dir}/filtered" + fp_seqs = f"{out_dir}/syndna/filtered" reads = [] for f in glob(f"{fp_seqs}/*.fastq.gz"): reads.append((f, "raw_forward_seqs")) @@ -558,7 +576,7 @@ def generate_syndna_processing(qclient, job_id, out_dir, parameters, url): "mem_in_gb": step_resources["mem_in_gb"], "array_params": f"1-{njobs}%{step_resources['max_tasks']}", } - minimap2_script = _write_slurm(f"{out_dir}/minimap2", m2t, **params) + minimap2_script = _write_slurm(f"{out_dir}/syndna", m2t, **params) m2mt = JGT("syndna_finish.sbatch") step_resources = resources["finish"] @@ -570,6 +588,6 @@ def generate_syndna_processing(qclient, job_id, out_dir, parameters, url): "mem_in_gb": step_resources["mem_in_gb"], "url": url, } - minimap2_merge_script = _write_slurm(f"{out_dir}/merge", m2mt, **params) + minimap2_finish_script = _write_slurm(f"{out_dir}/finish", m2mt, **params) - return minimap2_script, minimap2_merge_script + return minimap2_script, minimap2_finish_script diff --git a/qp_pacbio/scripts.py b/qp_pacbio/scripts.py index 47ff151..2309289 100644 --- a/qp_pacbio/scripts.py +++ b/qp_pacbio/scripts.py @@ -6,7 +6,6 @@ # # The full license is in the file LICENSE, distributed with this software. # ----------------------------------------------------------------------------- -import enum from glob import glob from os import makedirs from os.path import join @@ -21,6 +20,7 @@ PACBIO_PROCESSING_STEPS, generate_minimap2_processing, generate_sample_list, + generate_syndna_processing, pacbio_generate_templates, ) from qp_pacbio.util import client_connect @@ -57,18 +57,23 @@ def execute(url, job_id, output_dir): command = job_info["command"] artifact_id = parameters["artifact"] - if command == "Woltka v0.1.7, minimap2": - main_fp, merge_fp = generate_minimap2_processing( + regular_commands = { + "Woltka v0.1.7, minimap2": generate_minimap2_processing, + "Remove SynDNA plasmid, insert, & GCF_000184185 reads (minimap2)": generate_syndna_processing, + } + + if command in regular_commands.keys(): + first_fp, second_fp = regular_commands[command]( qclient, job_id, output_dir, parameters, url ) # Submitting jobs and returning id - main_job = run(["sbatch", main_fp], stdout=PIPE) - main_job_id = main_job.stdout.decode("utf8").split()[-1] - cmd = ["sbatch", "-d", f"afterok:{main_job_id}", merge_fp] - merge_job = run(cmd, stdout=PIPE) - merge_job_id = merge_job.stdout.decode("utf8").split()[-1] - print(f"{main_job_id}, {merge_job_id}") + first_job = run(["sbatch", first_fp], stdout=PIPE) + first_job_id = first_job.stdout.decode("utf8").split()[-1] + cmd = ["sbatch", "-d", f"afterok:{first_job_id}", second_fp] + second_job = run(cmd, stdout=PIPE) + second_job_id = second_job.stdout.decode("utf8").split()[-1] + print(f"{first_job_id}, {second_job_id}") qclient.update_job_step(job_id, "Step 2 of 4: Aligning sequences") elif command == "PacBio processing": frp = join(output_dir, "results") @@ -151,22 +156,20 @@ def _biom_merge(tables): return full -class BIOMMergeOptions(enum.Enum): - SYNDNA = enum.auto() - WOLTKA = enum.auto() - - @click.command() @click.option("--base", type=click.Path(exists=True), required=True) -@click.option("--type", type=click.Choice(BIOMMergeOptions, case_sensitive=False)) -def biom_merge(base, type: BIOMMergeOptions): +@click.option( + "--merge-type", type=click.Choice(["syndna", "woltka"], case_sensitive=False) +) +def biom_merge(base, merge_type): """Merges all PacBio biom tables""" - if type == BIOMMergeOptions.SYNDNA: + merge_type = merge_type.lower() + if merge_type == "syndna": ranks = ["syndna"] - elif type == BIOMMergeOptions.WOLTKA: + elif merge_type == "woltka": ranks = ["none", "per-gene", "ko", "ec", "pathway"] else: - raise ValueError(f"Type '{type}' not supported") + raise ValueError(f"Type '{merge_type}' not supported") for rank in ranks: rank = rank + ".biom" diff --git a/qp_pacbio/tests/test_pacbio.py b/qp_pacbio/tests/test_pacbio.py index c5614dc..6a19c24 100644 --- a/qp_pacbio/tests/test_pacbio.py +++ b/qp_pacbio/tests/test_pacbio.py @@ -261,7 +261,7 @@ def test_pacbio_profiling(self): "done | parallel --halt now,fail=1 -j 16\n", "wait\n", "\n", - f"biom_merge_pacbio --base {out_dir} --type woltka\n", + f"biom_merge_pacbio --base {out_dir} --merge-type woltka\n", "\n", ( f'find {out_dir}/coverages/ -iname "*.cov" ' @@ -291,9 +291,11 @@ def test_syndna(self): self.qclient, job_id, out_dir, params, url ) with open(main_fp, "r") as f: - obs_main = f.readlines() + # ignoring comments + obs_main = [line for line in f.readlines() if not line.startswith("# ")] with open(finish_fp, "r") as f: - obs_finish = f.readlines() + # ignoring comments + obs_finish = [line for line in f.readlines() if not line.startswith("# ")] exp_main = [ "#!/bin/bash\n", @@ -301,17 +303,17 @@ def test_syndna(self): "#SBATCH -p qiita\n", "#SBATCH -N 1\n", "#SBATCH -n 16\n", - "#SBATCH --time 36000\n", - "#SBATCH --mem 60G\n", - f"#SBATCH -o {out_dir}/minimap2/logs/%x-%A_%a.out\n", - f"#SBATCH -e {out_dir}/minimap2/logs/%x-%A_%a.err\n", + "#SBATCH --time 14400\n", + "#SBATCH --mem 20G\n", + f"#SBATCH -o {out_dir}/syndna/logs/%x-%A_%a.out\n", + f"#SBATCH -e {out_dir}/syndna/logs/%x-%A_%a.err\n", "#SBATCH --array 1-2%16\n", "\n", "source ~/.bashrc\n", "set -e\n", f"{CONDA_ENVIRONMENT}\n", f"out_folder={out_dir}/syndna\n", - "mkdir -p\n", + "mkdir -p ${out_folder}\n", "cd ${out_folder}\n", "db_folder=/scratch/qp-pacbio/minimap2/syndna/\n", "\n", @@ -326,26 +328,36 @@ def test_syndna(self): "sn_folder=${out_folder}/bioms/${sample_name}\n", "mkdir -p ${sn_folder}\n", "\n", + "txt=${sn_folder}/${sample_name}.txt\n", + "tsv=${txt/.txt/.tsv}\n", "coverm contig --single $filename --reference ${db_folder}/All_synDNA_inserts.fasta --mapper minimap2-hifi \\\n", " --min-read-percent-identity 0.95 --min-read-aligned-percent 0.0 -m mean count --threads 16 \\\n", - " --output-file ${sn_folder}/${sample_name}.txt\n", - "cat ${sn_folder}/${sample_name}_insert_counts.txt | sed 's/Contig/\\#OTU ID/' | \\\n", - " sed 's/ Read Count//' > ${sn_folder}/${sample_name}.tsv\n", - "biom convert -i ${sn_folder}/${sample_name}.txt -o ${sn_folder}/${sample_name}.biom --to-hdf5\n", + " --output-file ${txt}\n", + "\n", + "awk 'BEGIN {FS=OFS=\"\\t\"}; {print $1,$3}' ${txt} | \\\n", + " sed 's/Contig/\#OTU ID/' | sed 's/All_synDNA_inserts.fasta\///' | \\\n", + " sed 's/ Read Count//' | sed \"s/${fn}/${sample_name}/\" > ${tsv}\n", + "\n", + "counts=`tail -n +2 ${tsv} | awk '{sum += $NF} END {print sum}'`\n", + 'if [[ "$counts" == "0" ]]; then\n', + f" echo ${{sample_name}} > {out_dir}/failed_${{SLURM_ARRAY_TASK_ID}}.log\n", + " exit 0\n", + "fi\n", + "\n", + "biom convert -i ${tsv} -o ${sn_folder}/syndna.biom --to-hdf5\n", "\n", - "# removing AllsynDNA_plasmids_FASTA_ReIndexed_FINAL.fasta not coverm\n", "minimap2 -x map-hifi -t 16 -a --MD --eqx -o ${out_folder}/${sample_name}_plasmid.sam ${db_folder}/AllsynDNA_plasmids_FASTA_ReIndexed_FINAL.fasta $filename\n", "samtools view -F 4 -@ 16 ${out_folder}/${sample_name}_plasmid.sam | awk '{print $1}' | sort -u > ${out_folder}/${sample_name}_plasmid_mapped.txt\n", "seqkit grep -v -f ${out_folder}/${sample_name}_plasmid_mapped.txt $filename > ${out_folder}/${sample_name}_no_plasmid.fastq\n", "\n", - "# removing GCF_000184185.1_ASM18418v1_genomic_chroso.fna use coverm\n", - "minimap2 -x map-hifi -t 16 -a --MD --eqx -o ${out_folder}/${sample_name}_GCF_000184185.sam ${db_folder}/GCF_000184185.1_ASM18418v1_genomic_chroso.fna ${out_folder}/${sample_name}_no_plasmid_no_inserts.fastq\n", - "samtools view -bS -@ 8.0 ${out_folder}/${sample_name}_no_plasmid_no_inserts.fastq | samtools sort -@ 8.0 -O bam -o ${out_folder}/${sample_name}_GCF_000184185_sorted.sam\n", - "coverm filter --bam-files ${out_folder}/${sample_name}_GCF_000184185_sorted.sam --min-read-percent-identity 99.9 --min-read-aligned-percent 95 --threads 16 -o ${out_folder}/${sample_name}_GCF_000184185.bam\n", - "samtools view -O SAM -o ${out_folder}/${sample_name}_no_GCF_000184185_sorted.sam ${out_folder}/${sample_name}_no_inserts.bam\n", + "minimap2 -x map-hifi -t 16 -a --MD --eqx -o ${out_folder}/${sample_name}_GCF_000184185.sam ${db_folder}/GCF_000184185.1_ASM18418v1_genomic_chroso.fna ${out_folder}/${sample_name}_no_plasmid.fastq\n", + "samtools view -bS -@ 8.0 ${out_folder}/${sample_name}_no_plasmid.fastq | samtools sort -@ 8.0 -O bam -o ${out_folder}/${sample_name}_GCF_000184185_sorted.bam\n", + "coverm filter --bam-files ${out_folder}/${sample_name}_GCF_000184185_sorted.bam --min-read-percent-identity 99.9 --min-read-aligned-percent 95 --threads 16 -o ${out_folder}/${sample_name}_GCF_000184185.bam\n", + "samtools view -O SAM -o ${out_folder}/${sample_name}_no_GCF_000184185_sorted.sam ${out_folder}/${sample_name}_GCF_000184185.bam\n", "awk '{print $1}' ${out_folder}/${sample_name}_no_GCF_000184185_sorted.sam > ${out_folder}/${sample_name}_GCF_000184185_reads_filtered.txt\n", - "seqkit grep -v -f ${out_folder}/${sample_name}_GCF_000184185_reads_filtered.txt ${out_folder}/${sample_name}_GCF_000184185.fastq | gz > ${out_folder}/filtered/${fn}\n", - "awk 'BEGIN {FS=OFS=\"\\t\"}; {print $1,$3}'", + "seqkit grep -v -f ${out_folder}/${sample_name}_GCF_000184185_reads_filtered.txt ${out_folder}/${sample_name}_no_plasmid.fastq | gzip > ${out_folder}/filtered/${fn}\n", + "\n", + f"touch {out_dir}/completed_${{SLURM_ARRAY_TASK_ID}}.log", ] self.assertEqual(obs_main, exp_main) @@ -355,24 +367,20 @@ def test_syndna(self): "#SBATCH -J me_my-job-id\n", "#SBATCH -p qiita\n", "#SBATCH -N 1\n", - "#SBATCH -n 16\n", - "#SBATCH --time 1-00:00:00\n", - "#SBATCH --mem 120G\n", - f"#SBATCH -o {out_dir}/merge/logs/%x-%A_%a.out\n", - f"#SBATCH -e {out_dir}/merge/logs/%x-%A_%a.err\n", + "#SBATCH -n 1\n", + "#SBATCH --time 14400\n", + "#SBATCH --mem 4G\n", + f"#SBATCH -o {out_dir}/finish/logs/%x-%A_%a.out\n", + f"#SBATCH -e {out_dir}/finish/logs/%x-%A_%a.err\n", "\n", "source ~/.bashrc\n", "set -e\n", f"{CONDA_ENVIRONMENT}\n", f"cd {out_dir}/\n", "\n", - f"biom_merge_pacbio --base {out_dir} --type syndna\n", + f"biom_merge_pacbio --base {out_dir}/syndna --merge-type syndna\n", "\n", - f'# find {out_dir}/coverages/ -iname "*.cov" > {out_dir}/cov_files.txt\n', - f"# micov consolidate --paths {out_dir}/cov_files.txt --lengths ${{len_map}} --output {out_dir}/coverages.tgz\n", "\n", - "# cd alignment\n", - "# tar -cvf ../alignment.tar *.sam.xz\n", "\n", f"finish_qp_pacbio https://test.test.edu/ my-job-id {out_dir}", ]