Skip to content

Commit

Permalink
all ProteinSequence tests pass
Browse files Browse the repository at this point in the history
  • Loading branch information
iskandr committed Oct 20, 2019
1 parent 3ef7894 commit f43d5e6
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 12 deletions.
15 changes: 12 additions & 3 deletions isovar/protein_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,9 +412,15 @@ def subsequence(self, start_idx, end_idx):
"""
old_length = len(self)

start_idx, end_idx = normalize_base0_range_indices(
start_idx=start_idx,
end_idx=end_idx,
sequence_length=old_length)

amino_acids = self.amino_acids[start_idx:end_idx]

new_length = len(amino_acids)

# if we lose amino acids from the end of the sequence then it
# can't end with a stop codon anymore
ends_with_stop_codon = (
Expand All @@ -428,13 +434,16 @@ def subsequence(self, start_idx, end_idx):
# then the start/end will be be clipped to 0. If the mutation is
# to the right of the new subsequence then the start/end will both
# be clipped to the subsequence length
mutation_start_idx = max(0, self.mutation_start_idx - start_idx)
mutation_end_idx = min(new_length, self.mutation_end_idx - start_idx)
mutation_start_idx = \
min(new_length, max(0, self.mutation_start_idx - start_idx))
mutation_end_idx = \
min(new_length, max(0, self.mutation_end_idx - start_idx))

# number of mutant amino acids in the new subsequence
num_mutant_aa = mutation_end_idx - mutation_start_idx
# a deletion is considered a mutant sequence if the amino acids to
# the left and right of it are both present
deletion = (num_mutant_aa == 0) and (0 < start_idx < new_length)
deletion = (num_mutant_aa == 0) and (0 < mutation_start_idx < new_length)
contains_mutation = self.contains_mutation and (
(num_mutant_aa > 0) or deletion
)
Expand Down
83 changes: 74 additions & 9 deletions test/test_protein_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from isovar.protein_sequence import ProteinSequence
from isovar.protein_sequence_helpers import sort_protein_sequences

def test_protein_sequence_num_mutant_amino_acids():
# testing that we got the correct number of amino acids (1)
def test_protein_sequence_substitution():
# testing that we got the correct properties in the case
# where "SIINFEKL" was mutated into "SIINFEQL"
p = ProteinSequence(
amino_acids="SIINFEQL",
Expand All @@ -27,13 +27,19 @@ def test_protein_sequence_num_mutant_amino_acids():
ends_with_stop_codon=True,
frameshift=False,
translations=[])
eq_(p.amino_acids, "SIINFEQL")
eq_(len(p), 8)
eq_(p.contains_mutation, True)
eq_(p.contains_deletion, False)
eq_(p.frameshift, False)
eq_(p.ends_with_stop_codon, True)
eq_(p.num_mutant_amino_acids, 1)
eq_(p.num_mutant_amino_acids, len(p.mutant_amino_acids))
eq_(p.mutant_amino_acids, "Q")


def test_protein_sequence_num_mutant_amino_acids_deletion():
# testing that we got the correct number of amino acids (0)
# where "SIINFEKL" was mutated into "SIINFEL"
def test_protein_sequence_deletion():
# testing that we got the correct properties in the case
# where "SIINFEKL" was mutated into "SIINFEL" by deletion of one amino acid
p = ProteinSequence(
amino_acids="SIINFEL",
contains_mutation=True,
Expand All @@ -42,10 +48,17 @@ def test_protein_sequence_num_mutant_amino_acids_deletion():
ends_with_stop_codon=True,
frameshift=False,
translations=[])
eq_(p.amino_acids, "SIINFEL")
eq_(len(p), 7)
eq_(p.num_mutant_amino_acids, 0)
eq_(p.num_mutant_amino_acids, len(p.mutant_amino_acids))
eq_(p.mutant_amino_acids, "")
eq_(p.contains_mutation, True)
eq_(p.contains_deletion, True)
eq_(p.frameshift, False)
eq_(p.ends_with_stop_codon, True)

def test_protein_subsequence_overlaps_mutation():

def test_protein_subsequence_overlaps_substitution():
# testing that we got the correct properties for case where "SIINFEKL" was
# mutated into "SIINFEQL" and then sliced to keep just "FEQL"
p = ProteinSequence(
Expand All @@ -60,10 +73,13 @@ def test_protein_subsequence_overlaps_mutation():
eq_(p2.amino_acids, "FEQL")
eq_(p2.contains_mutation, True)
eq_(p2.contains_deletion, False)
eq_(p2.frameshift, False)
eq_(p2.ends_with_stop_codon, True)
eq_(p2.num_mutant_amino_acids, 1)
eq_(p2.mutant_amino_acids, "Q")
eq_(len(p2), 4)

def test_protein_subsequence_does_not_overlap_mutation():
def test_protein_subsequence_does_not_overlap_substitution():
# testing that we got the correct properties for case where "SIINFEKL" was
# mutated into "SIINFEQL" and then sliced to keep just "FEQL"
p = ProteinSequence(
Expand All @@ -78,8 +94,57 @@ def test_protein_subsequence_does_not_overlap_mutation():
eq_(p2.amino_acids, "SIIN")
eq_(p2.contains_mutation, False)
eq_(p2.contains_deletion, False)
eq_(p2.frameshift, False)
eq_(p2.ends_with_stop_codon, False)
eq_(p2.num_mutant_amino_acids, 0)
eq_(p2.mutant_amino_acids, "")
eq_(len(p2), 4)


def test_protein_subsequence_overlaps_deletion():
# testing that we got correct properties for the case
# where "SIINFEKL" was mutated into "SIINFEL" (by a deletion of "K")
# and then we took the subsequence "FEL"
p = ProteinSequence(
amino_acids="SIINFEL",
contains_mutation=True,
mutation_start_idx=len("SIINFE"),
mutation_end_idx=len("SIINFE"),
ends_with_stop_codon=True,
frameshift=False,
translations=[])
p2 = p.subsequence(len("SIIN"), None)
eq_(len(p2), 3)
eq_(p2.amino_acids, "FEL")
eq_(p2.mutant_amino_acids, "")
eq_(p2.contains_deletion, True)
eq_(p2.contains_mutation, True)
eq_(p2.frameshift, False)
eq_(p2.ends_with_stop_codon, True)
eq_(p2.num_mutant_amino_acids, 0)


def test_protein_subsequence_does_not_overlap_deletion():
# testing that we got correct properties for the case
# where "SIINFEKL" was mutated into "SIINFEL" (by a deletion of "K")
# and then we took the subsequence "SIINFE"
p = ProteinSequence(
amino_acids="SIINFEL",
contains_mutation=True,
mutation_start_idx=len("SIINFE"),
mutation_end_idx=len("SIINFE"),
ends_with_stop_codon=True,
frameshift=False,
translations=[])
p2 = p.subsequence(None, len("SIINFE"))
eq_(len(p2), 6)
eq_(p2.amino_acids, "SIINFE")
eq_(p2.contains_deletion, False)
eq_(p2.contains_mutation, False)
eq_(p2.frameshift, False)
eq_(p2.ends_with_stop_codon, False)
eq_(p2.mutant_amino_acids, "")
eq_(p2.num_mutant_amino_acids, 0)

def test_sort_protein_sequences():
protseq_most_reads = make_dummy_protein_sequence(
Expand Down

0 comments on commit f43d5e6

Please sign in to comment.