Skip to content

Commit

Permalink
Merge pull request #203 from hammerlab/stoploss-with-extra-codons
Browse files Browse the repository at this point in the history
Adding aa_ref argument to StopLoss for variants which delete codons before stop
  • Loading branch information
iskandr committed Dec 5, 2016
2 parents 23f7dd5 + 0af2084 commit a811b1e
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 22 deletions.
36 changes: 36 additions & 0 deletions test/test_effect_annotation_errors.py
Expand Up @@ -242,3 +242,39 @@ def test_issue193_SNV_stop_gain_in_ZNF45_not_deletion():
effect_class=PrematureStop,
modifies_coding_sequence=True,
modifies_protein_sequence=True)

def test_issue202_stoploss_deletes_two_amino_acids():
"""
Issue: https://github.com/hammerlab/varcode/issues/202
Variant: chr1 100484693 . TTCATCTGA CCC
Transcript: ENSMUST00000086738
>>>
The end of that transcript looks like:
TTC ATC TGA ACT
F I * T
and this mutation will cause the location plus downstream to become
PTIVWSSGPLF(...)
The annotation that varcode gives is
StopLoss
* aa_mutation_start_offset = 1292
* aa_ref="*"
* aa_alt="PTIVWSS(...)"
It should actually be
StopLoss
* aa_mutation_start_offset = 1290
* aa_ref="FI*"
* aa_alt="PTIVWSS(...)"
"""
variant = Variant('chr1', 100484693, 'TTCATCTGA', 'CCC', 'GRCm38')
expect_effect(
variant,
transcript_id='ENSMUST00000086738',
effect_class=StopLoss,
modifies_coding_sequence=True,
modifies_protein_sequence=True,
aa_ref='FI',
aa_alt='PTIVWSSGPLFCRGFHLFFFSFF')
52 changes: 35 additions & 17 deletions varcode/effects/effect_classes.py
Expand Up @@ -577,12 +577,14 @@ def __init__(
Insertion of premature stop codon, possibly preceded by a substitution
of `aa_ref` amino acids for `aa_alt` alternative residues.
"""
assert "*" not in aa_ref, \
("Unexpected aa_ref = '%s', should only include amino acids "
"before the new stop codon.") % aa_ref
assert "*" not in aa_alt, \
("Unexpected aa_ref = '%s', should only include amino acids "
"before the new stop codon.") % aa_alt
if "*" in aa_ref:
raise ValueError(
("Unexpected aa_ref = '%s', should only include amino acids "
"before the new stop codon.") % aa_ref)
if "*" in aa_alt:
raise ValueError(
("Unexpected aa_ref = '%s', should only include amino acids "
"before the new stop codon.") % aa_alt)
KnownAminoAcidChange.__init__(
self,
variant,
Expand All @@ -592,12 +594,13 @@ def __init__(
aa_alt=aa_alt)
self.stop_codon_offset = aa_mutation_start_offset + len(aa_alt)

assert self.stop_codon_offset < len(transcript.protein_sequence), \
("Premature stop codon cannot be at position %d"
" since the original protein of %s has length %d") % (
self.stop_codon_offset,
transcript,
len(transcript.protein_sequence))
if self.stop_codon_offset >= len(transcript.protein_sequence):
raise ValueError(
("Premature stop codon cannot be at position %d"
" since the original protein of %s has length %d") % (
self.stop_codon_offset,
transcript,
len(transcript.protein_sequence)))

@property
def short_description(self):
Expand All @@ -617,23 +620,38 @@ def __init__(
self,
variant,
transcript,
extended_protein_sequence):
aa_mutation_start_offset = len(transcript.protein_sequence)
aa_ref,
aa_alt):
# StopLoss assumes that we deleted some codons ending with a
# stop codon
if "*" in aa_ref:
raise ValueError(
"StopLoss aa_ref '%s' should not contain '*'" % (
aa_ref,))
if len(aa_alt) == 0:
raise ValueError(
"If no amino acids added by StopLoss then it should be Silent")
# subtract 1 for the stop codon
n_ref_amino_acids = len(aa_ref)
protein_length = len(transcript.protein_sequence)
aa_mutation_start_offset = protein_length - n_ref_amino_acids
KnownAminoAcidChange.__init__(
self,
variant,
transcript,
aa_mutation_start_offset=aa_mutation_start_offset,
aa_ref="*",
aa_alt=extended_protein_sequence)
aa_alt=aa_alt,
aa_ref=aa_ref)

@property
def extended_protein_sequence(self):
"""Deprecated name for aa_alt"""
return self.aa_alt

@property
def short_description(self):
return "p.*%d%s (stop-loss)" % (
return "p.%s*%d%s (stop-loss)" % (
self.aa_ref,
self.aa_mutation_start_offset + 1,
self.extended_protein_sequence)

Expand Down
13 changes: 9 additions & 4 deletions varcode/effects/effect_prediction_coding_frameshift.py
Expand Up @@ -79,12 +79,16 @@ def create_frameshift_effect(
alt=mutant_protein_suffix)
n_unchanged_amino_acids = len(unchanged_amino_acids)
offset_to_first_different_amino_acid = mutated_codon_index + n_unchanged_amino_acids
# miraculously, this frameshift left the protein unchanged,
# most likely by turning one stop codon into another stop codon
if n_unchanged_amino_acids == 0:
aa_ref = ""
else:
aa_ref = original_protein_sequence[-n_unchanged_amino_acids:]
if offset_to_first_different_amino_acid >= original_protein_length:
# frameshift is either extending the protein or leaving it unchanged
if len(mutant_protein_suffix) == 0:
# miraculously, this frameshift left the protein unchanged,
# most likely by turning one stop codon into another stop codon
aa_ref = original_protein_sequence[-n_unchanged_amino_acids:]

return Silent(
variant=variant,
transcript=transcript,
Expand All @@ -97,7 +101,8 @@ def create_frameshift_effect(
return StopLoss(
variant=variant,
transcript=transcript,
extended_protein_sequence=mutant_protein_suffix)
aa_ref=aa_ref,
aa_alt=mutant_protein_suffix)
# original amino acid at the mutated codon before the frameshift occurred
aa_ref = original_protein_sequence[offset_to_first_different_amino_acid]

Expand Down
3 changes: 2 additions & 1 deletion varcode/effects/effect_prediction_coding_in_frame.py
Expand Up @@ -233,7 +233,8 @@ def predict_in_frame_coding_effect(
return StopLoss(
variant,
transcript,
extended_protein_sequence=aa_alt)
aa_ref=aa_ref,
aa_alt=aa_alt)
elif n_aa_alt == 0:
return Deletion(
variant,
Expand Down

0 comments on commit a811b1e

Please sign in to comment.