Skip to content

Commit

Permalink
changed filtering to include silent effects' transcripts
Browse files Browse the repository at this point in the history
  • Loading branch information
iskandr committed Jun 16, 2019
1 parent 1bc475b commit 6a97df2
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 28 deletions.
67 changes: 44 additions & 23 deletions isovar/effect_prediction.py
Expand Up @@ -14,18 +14,24 @@

from __future__ import print_function, division, absolute_import

from varcode import EffectCollection
from varcode.effects.effect_classes import ExonicSpliceSite, CodingMutation

from .logging import get_logger

logger = get_logger(__name__)


def effect_in_coding_sequence(effect):
if type(effect) is ExonicSpliceSite:
return effect_in_coding_sequence(effect.alternate_effect)
else:
return isinstance(effect, CodingMutation)

def predicted_effects_for_variant(
variant,
transcript_id_whitelist=None,
only_coding_transcripts=False,
drop_silent_and_noncoding=False,
only_coding_effects=False,
require_mutant_protein_sequence=False):
"""
For a given variant, return its set of predicted effects. Optionally
Expand All @@ -42,9 +48,9 @@ def predicted_effects_for_variant(
only_coding_transcripts : bool
If True, then only return effects on protein coding transcripts
drop_silent_and_noncoding : bool
If True, drop effects which aren't predicted to change the protein
sequence.
only_coding_effects : bool
If True, drop effects which aren't in the coding sequence, even
if they are silent.
require_mutant_protein_sequence : bool
Drop effects for which we can't predict what the new protein sequence
Expand All @@ -53,18 +59,22 @@ def predicted_effects_for_variant(
Returns a varcode.EffectCollection object
"""
effects = variant.effects(raise_on_error=False)
n_total_effects = len(effects)
logger.info("Predicted total %d effects for variant %s" % (
n_total_effects,
variant))

# effects filtered by allowed transcripts
effects_filtered_by_transcript = []

for effect in effects:
has_transcript = hasattr(effect, 'transcript') and effect.transcript is not None
transcript = getattr(effect, 'transcript', None)

if (only_coding_transcripts and not (
has_transcript and
transcript.complete and
transcript.is_protein_coding)):
transcript_is_coding = (
has_transcript and
transcript.complete and
transcript.is_protein_coding)
if only_coding_transcripts and not transcript_is_coding:
continue
elif transcript_id_whitelist and not has_transcript:
continue
Expand All @@ -77,21 +87,23 @@ def predicted_effects_for_variant(
effects_filtered_by_transcript.append(effect)

effects = effects.clone_with_new_elements(effects_filtered_by_transcript)

n_total_effects = len(effects)
logger.info("Predicted total %d effects for variant %s" % (
logger.info(
"Keeping %d/%d effects which have associated coding transcripts for %s: %s",
len(effects),
n_total_effects,
variant))
variant,
effects)

if drop_silent_and_noncoding:
nonsynonymous_coding_effects = effects.drop_silent_and_noncoding()
if only_coding_effects:
effects = effects.clone_with_new_elements([
e for e in effects if effect_in_coding_sequence(e)
])
logger.info(
"Keeping %d/%d effects which affect protein coding sequence for %s: %s",
len(nonsynonymous_coding_effects),
"Keeping %d/%d effects in coding sequence for %s: %s",
len(effects),
n_total_effects,
variant,
nonsynonymous_coding_effects)
effects = nonsynonymous_coding_effects
effects)

if require_mutant_protein_sequence:
effects_with_mut_sequence = [
Expand Down Expand Up @@ -134,17 +146,26 @@ def top_varcode_effect(variant, transcript_id_whitelist=None):
def reference_coding_transcripts_for_variant(
variant,
transcript_id_whitelist=None,
drop_silent_and_noncoding=False):
only_coding_effects=True):
"""
For a given variant, find all the transcripts which overlap the
variant and for which it has a predictable effect on the amino acid
sequence of the protein.
Parameters
----------
variant : varcode.Variant
transcript_id_whitelist : set or None
only_coding_effects : bool
Exclude non-coding effects such as intronic
"""
predicted_effects = predicted_effects_for_variant(
variant=variant,
transcript_id_whitelist=transcript_id_whitelist,
only_coding_transcripts=True,
drop_silent_and_noncoding=drop_silent_and_noncoding,
require_mutant_protein_sequence=True)
only_coding_effects=True,
require_mutant_protein_sequence=False)
return [effect.transcript for effect in predicted_effects]

2 changes: 0 additions & 2 deletions isovar/reference_context.py
Expand Up @@ -13,8 +13,6 @@
# limitations under the License.

from __future__ import print_function, division, absolute_import
from collections import OrderedDict, defaultdict


from .reference_coding_sequence_key import ReferenceCodingSequenceKey
from .logging import get_logger
Expand Down
2 changes: 1 addition & 1 deletion isovar/reference_context_helpers.py
Expand Up @@ -14,7 +14,7 @@

from __future__ import print_function, division, absolute_import

from collections import OrderedDict, defaultdict
from collections import defaultdict


from .effect_prediction import reference_coding_transcripts_for_variant
Expand Down
4 changes: 2 additions & 2 deletions test/test_effect_prediction.py
Expand Up @@ -20,12 +20,12 @@ def test_reference_coding_transcripts_outside_of_gene():
transcripts_without_drop = \
reference_coding_transcripts_for_variant(
intergenic_variant,
drop_silent_and_noncoding=False)
only_coding_effects=False)
eq_(len(transcripts_without_drop), 0)

transcripts_with_drop = \
reference_coding_transcripts_for_variant(
intergenic_variant,
drop_silent_and_noncoding=True)
only_coding_effects=True)
eq_(len(transcripts_with_drop), 0)

0 comments on commit 6a97df2

Please sign in to comment.