Skip to content

Commit

Permalink
Fix bug in find_indels_substitutions
Browse files Browse the repository at this point in the history
This bug occurred when there was a deletion at the end of a sequence, and was
thus not properly accounted for.
  • Loading branch information
Colelyman authored and kclem committed Jan 11, 2022
1 parent 7212f87 commit f6ef62c
Showing 1 changed file with 48 additions and 29 deletions.
77 changes: 48 additions & 29 deletions CRISPResso2/CRISPRessoCOREResources.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -78,35 +78,54 @@ def find_indels_substitutions(read_seq_al, ref_seq_al, _include_indx):
start_insertion = idx - 1
current_insertion_size += 1

substitution_n = len(substitution_positions)

#the remainder of positions are with reference to the original reference sequence indexes we calculated above
all_deletion_positions=[]
deletion_positions=[]
deletion_coordinates=[]
deletion_sizes=[]

all_insertion_positions=[]
all_insertion_left_positions=[]
insertion_positions=[]
insertion_coordinates = []
insertion_sizes=[]

include_indx_set = set(_include_indx)
for p in re_find_indels.finditer(read_seq_al):
st,en=p.span()
ref_st = 0
if st-1 > 0:
ref_st = ref_positions[st]
ref_en = idx-1
if en < len(ref_positions):
ref_en = ref_positions[en]
all_deletion_positions.extend(range(ref_st,ref_en))
inc_del_pos = include_indx_set.intersection(range(ref_st,ref_en))
if(len(inc_del_pos)>0):
deletion_positions.extend(range(ref_st,ref_en))
deletion_coordinates.append((ref_st,ref_en))
deletion_sizes.append(en-st)
if read_seq_al[idx_c] == '-' and start_deletion == -1: # this is the first part of a deletion
if idx_c - 1 > 0:
start_deletion = ref_positions[idx_c]
else:
start_deletion = 0
elif read_seq_al[idx_c] != '-' and start_deletion != -1: # this is the end of a deletion
end_deletion = ref_positions[idx_c]
all_deletion_positions.extend(range(start_deletion, end_deletion))
if include_indx_set.intersection(range(start_deletion, end_deletion)):
deletion_positions.extend(range(start_deletion, end_deletion))
deletion_coordinates.append((start_deletion, end_deletion))
deletion_sizes.append(end_deletion - start_deletion)
start_deletion = -1

if start_deletion != -1:
end_deletion = ref_positions[seq_len - 1]
all_deletion_positions.extend(range(start_deletion, end_deletion))
if include_indx_set.intersection(range(start_deletion, end_deletion)):
deletion_positions.extend(range(start_deletion, end_deletion))
deletion_coordinates.append((start_deletion, end_deletion))
deletion_sizes.append(end_deletion - start_deletion)

cdef size_t substitution_n = len(substitution_positions)
cdef size_t deletion_n = sum(deletion_sizes)
cdef size_t insertion_n = sum(insertion_sizes)

return {
'all_insertion_positions': all_insertion_positions,
'all_insertion_left_positions': all_insertion_left_positions,
'insertion_positions': insertion_positions,
'insertion_coordinates': insertion_coordinates,
'insertion_sizes': insertion_sizes,
'insertion_n': insertion_n,

'all_deletion_positions': all_deletion_positions,
'deletion_positions': deletion_positions,
'deletion_coordinates': deletion_coordinates,
'deletion_sizes': deletion_sizes,
'deletion_n': deletion_n,

'all_substitution_positions': all_substitution_positions,
'substitution_positions': substitution_positions,
'all_substitution_values': np.array(all_substitution_values),
'substitution_values': np.array(substitution_values),
'substitution_n': substitution_n,

'ref_positions': ref_positions,
}

deletion_n = np.sum(deletion_sizes)

Expand Down

0 comments on commit f6ef62c

Please sign in to comment.