From 2f84dd02787abffa6d39efbc50c82c92d1c87528 Mon Sep 17 00:00:00 2001 From: kclem Date: Fri, 10 Dec 2021 16:39:41 -0500 Subject: [PATCH] Fastq_output report inserted bases when the `--fastq_output` parameter is provided, the inserted bases will be written to the output fastq file. Previously, a string like "DEL= INS=78(1) SUB= " would indicate a 1bp insertion at site 78. This update outputs strings like "DEL= INS=78(1+G) SUB= " with a plus character followed by the inserted bases. --- CRISPResso2/CRISPRessoCORE.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py index c369ab14..fe1d7438 100644 --- a/CRISPResso2/CRISPRessoCORE.py +++ b/CRISPResso2/CRISPRessoCORE.py @@ -690,7 +690,12 @@ def process_fastq_write_out(fastq_input, fastq_output, variantCache, ref_names, payload=new_variant['variant_'+best_match_name] del_inds.append([str(x[0][0])+"("+str(x[1])+")" for x in zip(payload['deletion_coordinates'], payload['deletion_sizes'])]) - ins_inds.append([str(x[0][0])+"("+str(x[1])+")" for x in zip(payload['insertion_coordinates'], payload['insertion_sizes'])]) + + ins_vals = [] + for ins_coord,ins_size in zip(payload['insertion_coordinates'],payload['insertion_sizes']): + ins_start = payload['ref_positions'].index(ins_coord[0]) + ins_vals.append(payload['aln_seq'][ins_start:ins_start+ins_size]) + ins_inds.append([str(x[0][0])+"("+str(x[1])+"+"+x[2]+")" for x in zip(payload['insertion_coordinates'], payload['insertion_sizes'], ins_vals)]) sub_inds.append(payload['substitution_positions']) edit_strings.append('D'+str(int(payload['deletion_n']))+';I'+str(int(payload['insertion_n']))+';S'+str(int(payload['substitution_n'])))