Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding aa_ref argument to StopLoss for variants which delete codons before stop #203

Merged
merged 6 commits into from
Dec 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 36 additions & 0 deletions test/test_effect_annotation_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,39 @@ def test_issue193_SNV_stop_gain_in_ZNF45_not_deletion():
effect_class=PrematureStop,
modifies_coding_sequence=True,
modifies_protein_sequence=True)

def test_issue202_stoploss_deletes_two_amino_acids():
"""
Issue: https://github.com/hammerlab/varcode/issues/202
Variant: chr1 100484693 . TTCATCTGA CCC
Transcript: ENSMUST00000086738
>>>
The end of that transcript looks like:

TTC ATC TGA ACT
F I * T
and this mutation will cause the location plus downstream to become

PTIVWSSGPLF(...)
The annotation that varcode gives is

StopLoss
* aa_mutation_start_offset = 1292
* aa_ref="*"
* aa_alt="PTIVWSS(...)"
It should actually be

StopLoss
* aa_mutation_start_offset = 1290
* aa_ref="FI*"
* aa_alt="PTIVWSS(...)"
"""
variant = Variant('chr1', 100484693, 'TTCATCTGA', 'CCC', 'GRCm38')
expect_effect(
variant,
transcript_id='ENSMUST00000086738',
effect_class=StopLoss,
modifies_coding_sequence=True,
modifies_protein_sequence=True,
aa_ref='FI',
aa_alt='PTIVWSSGPLFCRGFHLFFFSFF')
52 changes: 35 additions & 17 deletions varcode/effects/effect_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,12 +577,14 @@ def __init__(
Insertion of premature stop codon, possibly preceded by a substitution
of `aa_ref` amino acids for `aa_alt` alternative residues.
"""
assert "*" not in aa_ref, \
("Unexpected aa_ref = '%s', should only include amino acids "
"before the new stop codon.") % aa_ref
assert "*" not in aa_alt, \
("Unexpected aa_ref = '%s', should only include amino acids "
"before the new stop codon.") % aa_alt
if "*" in aa_ref:
raise ValueError(
("Unexpected aa_ref = '%s', should only include amino acids "
"before the new stop codon.") % aa_ref)
if "*" in aa_alt:
raise ValueError(
("Unexpected aa_ref = '%s', should only include amino acids "
"before the new stop codon.") % aa_alt)
KnownAminoAcidChange.__init__(
self,
variant,
Expand All @@ -592,12 +594,13 @@ def __init__(
aa_alt=aa_alt)
self.stop_codon_offset = aa_mutation_start_offset + len(aa_alt)

assert self.stop_codon_offset < len(transcript.protein_sequence), \
("Premature stop codon cannot be at position %d"
" since the original protein of %s has length %d") % (
self.stop_codon_offset,
transcript,
len(transcript.protein_sequence))
if self.stop_codon_offset >= len(transcript.protein_sequence):
raise ValueError(
("Premature stop codon cannot be at position %d"
" since the original protein of %s has length %d") % (
self.stop_codon_offset,
transcript,
len(transcript.protein_sequence)))

@property
def short_description(self):
Expand All @@ -617,23 +620,38 @@ def __init__(
self,
variant,
transcript,
extended_protein_sequence):
aa_mutation_start_offset = len(transcript.protein_sequence)
aa_ref,
aa_alt):
# StopLoss assumes that we deleted some codons ending with a
# stop codon
if "*" in aa_ref:
raise ValueError(
"StopLoss aa_ref '%s' should not contain '*'" % (
aa_ref,))
if len(aa_alt) == 0:
raise ValueError(
"If no amino acids added by StopLoss then it should be Silent")
# subtract 1 for the stop codon
n_ref_amino_acids = len(aa_ref)
protein_length = len(transcript.protein_sequence)
aa_mutation_start_offset = protein_length - n_ref_amino_acids
KnownAminoAcidChange.__init__(
self,
variant,
transcript,
aa_mutation_start_offset=aa_mutation_start_offset,
aa_ref="*",
aa_alt=extended_protein_sequence)
aa_alt=aa_alt,
aa_ref=aa_ref)

@property
def extended_protein_sequence(self):
"""Deprecated name for aa_alt"""
return self.aa_alt

@property
def short_description(self):
return "p.*%d%s (stop-loss)" % (
return "p.%s*%d%s (stop-loss)" % (
self.aa_ref,
self.aa_mutation_start_offset + 1,
self.extended_protein_sequence)

Expand Down
13 changes: 9 additions & 4 deletions varcode/effects/effect_prediction_coding_frameshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,16 @@ def create_frameshift_effect(
alt=mutant_protein_suffix)
n_unchanged_amino_acids = len(unchanged_amino_acids)
offset_to_first_different_amino_acid = mutated_codon_index + n_unchanged_amino_acids
# miraculously, this frameshift left the protein unchanged,
# most likely by turning one stop codon into another stop codon
if n_unchanged_amino_acids == 0:
aa_ref = ""
else:
aa_ref = original_protein_sequence[-n_unchanged_amino_acids:]
if offset_to_first_different_amino_acid >= original_protein_length:
# frameshift is either extending the protein or leaving it unchanged
if len(mutant_protein_suffix) == 0:
# miraculously, this frameshift left the protein unchanged,
# most likely by turning one stop codon into another stop codon
aa_ref = original_protein_sequence[-n_unchanged_amino_acids:]

return Silent(
variant=variant,
transcript=transcript,
Expand All @@ -97,7 +101,8 @@ def create_frameshift_effect(
return StopLoss(
variant=variant,
transcript=transcript,
extended_protein_sequence=mutant_protein_suffix)
aa_ref=aa_ref,
aa_alt=mutant_protein_suffix)
# original amino acid at the mutated codon before the frameshift occurred
aa_ref = original_protein_sequence[offset_to_first_different_amino_acid]

Expand Down
3 changes: 2 additions & 1 deletion varcode/effects/effect_prediction_coding_in_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ def predict_in_frame_coding_effect(
return StopLoss(
variant,
transcript,
extended_protein_sequence=aa_alt)
aa_ref=aa_ref,
aa_alt=aa_alt)
elif n_aa_alt == 0:
return Deletion(
variant,
Expand Down