Skip to content

Commit

Permalink
Variant at CDS end, test cases added for HGVS notation
Browse files Browse the repository at this point in the history
  • Loading branch information
pcingola committed Mar 27, 2018
1 parent f8d12bb commit 447f965
Show file tree
Hide file tree
Showing 12 changed files with 141 additions and 16 deletions.
5 changes: 4 additions & 1 deletion src/main/java/org/snpeff/codons/CodonTable.java
Expand Up @@ -13,6 +13,9 @@
*/
public class CodonTable {

public static final String TERMINATION_CODON = "Ter";
public static final String TERMINATION_CODON_1 = "*";

private static HashMap<String, String> aa3letter;

/**
Expand Down Expand Up @@ -108,7 +111,7 @@ public String aa(String codons, boolean fullProteinSequence) {
}

public String aaThreeLetterCode(char aa) {
if (aa == '*') return "Ter"; // Termination codon. Used to be "*" (see reference http://www.hgvs.org/mutnomen/standards.html#aalist)
if (aa == '*') return TERMINATION_CODON; // Termination codon. Used to be "*" (see reference http://www.hgvs.org/mutnomen/standards.html#aalist)
String aa3 = aa3letter.get(Character.toString(aa).toUpperCase());
if (aa3 == null) return "???";
return aa3;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/snpeff/interval/Variant.java
Expand Up @@ -411,7 +411,7 @@ public int lengthChange() {
* Return the change (always compared to 'referenceStrand')
*/
public String netChange(boolean reverseStrand) {
if (isDel()) return reverseStrand ? GprSeq.reverseWc(ref) : ref; // Deleteion have empty 'alt'
if (isDel()) return reverseStrand ? GprSeq.reverseWc(ref) : ref; // Deletions have empty 'alt'
return reverseStrand ? GprSeq.reverseWc(alt) : alt; // Need reverse-WC?
}

Expand Down
Expand Up @@ -173,7 +173,7 @@ public void codonChange() {

if (cdsBaseInExon < 0) cdsBaseInExon = 0;

// Get codon number and index within codon (where seqChage is pointing)
// Get codon number and index within codon
if (codonStartNum < 0) {
codonStartNum = (firstCdsBaseInExon + cdsBaseInExon) / CODON_SIZE;
codonStartIndex = (firstCdsBaseInExon + cdsBaseInExon) % CODON_SIZE;
Expand Down Expand Up @@ -242,6 +242,13 @@ protected String codonsRef(int numCodons) {
return codon;
}

/**
* Calculate variant effect
* @param marker: Genomic marker affected by this variant (e.g. exon, transcript, etc)
* @param effectType: Effect type
* @param allowReplace: Can another variant effect replace this one?
* @return A new VariantEffect object
*/
protected VariantEffect effect(Marker marker, EffectType effectType, boolean allowReplace) {
return effect(marker, effectType, effectType.effectImpact(), codonsRef, codonsAlt, codonStartNum, codonStartIndex, allowReplace);
}
Expand Down
Expand Up @@ -17,6 +17,19 @@ public CodonChangeIns(Variant seqChange, Transcript transcript, VariantEffects c
returnNow = true; // An insertion can only affect one exon
}

@Override
public void codonChange() {
// Special case: Is the insertion at the edge of the CDS?
int pos = variant.getEnd();
if (pos == transcript.getCdsStart() || pos == transcript.getCdsEnd()) {
codonChangeCdsEdge();
return;
}

// OK, use the 'normal' codon change processing
super.codonChange();
}

/**
* Analyze insertions in this transcript.
* Add changeEffect to 'changeEffect'
Expand Down Expand Up @@ -74,6 +87,58 @@ protected boolean codonChange(Exon exon) {
return true;
}

/**
* Variant (insertion) starting right before CDS' left side
* or right after CDS's right side?
*
* E.g.:
* [M P D E E M D D P N P A ...
* ^Insertion here just before start codon
*
* ... P D E E M D D P N P *]
* ^Insertion here right after stop codon
*
* In either case, the insertion has no coding effect.
*/
protected void codonChangeCdsEdge() {
// Is the insertion at the edge of the CDS?
int cdsStart = transcript.getCdsStart();
int cdsEnd = transcript.getCdsEnd();
int cdsLeft = Math.min(cdsStart, cdsEnd);
int cdsRight = Math.max(cdsStart, cdsEnd);

int pos = variant.getStart(); // Insertions have coordinated 'start = end'

Exon ex = transcript.findExon(pos);
if (pos == cdsLeft) {
// Insertion on CDS' left side
if (transcript.isStrandPlus()) {
// Left side = CDS start
codonStartNum = 0;
effect(ex, EffectType.FRAME_SHIFT_BEFORE_CDS_START, false);
} else {
// Left side = CDS end
codonStartNum = transcript.protein().length();
effect(ex, EffectType.FRAME_SHIFT_AFTER_CDS_END, false);
}
return;
} else if (pos == cdsRight) {
// Insertion on CDS' right side
if (transcript.isStrandPlus()) {
// Right side = CDS end
codonStartNum = transcript.protein().length();
effect(ex, EffectType.FRAME_SHIFT_AFTER_CDS_END, false);
} else {
// Right side = CDS start
codonStartNum = 0;
effect(ex, EffectType.FRAME_SHIFT_BEFORE_CDS_START, false);
}
return;
}

throw new RuntimeException("This should never happen!");
}

/**
* Get new (modified) codons
*/
Expand All @@ -94,5 +159,4 @@ protected String codonsAlt() {

return codonsNew;
}

}
25 changes: 18 additions & 7 deletions src/main/java/org/snpeff/snpEffect/EffectType.java
Expand Up @@ -76,6 +76,8 @@ public enum EffectType {

// Modifiers
// Order: Highest impact first
, FRAME_SHIFT_BEFORE_CDS_START(EffectImpact.MODIFIER) //
, FRAME_SHIFT_AFTER_CDS_END(EffectImpact.MODIFIER) //
, UTR_5_PRIME(EffectImpact.MODIFIER) //
, UTR_3_PRIME(EffectImpact.MODIFIER) //
, REGULATION(EffectImpact.MODIFIER) //
Expand All @@ -100,14 +102,10 @@ public enum EffectType {
, NONE(EffectImpact.MODIFIER) //
;

private final EffectImpact effectImpact;

private EffectType(EffectImpact effectImpact) {
this.effectImpact = effectImpact;
}

static Map<String, EffectType> so2efftype = new HashMap<>();

private final EffectImpact effectImpact;

/**
* Parse a string to an EffectType
*/
Expand Down Expand Up @@ -150,6 +148,7 @@ static void so2efftype(EffFormatVersion formatVersion, Variant variant) {
oldSo2efftype.put("inframe_insertion", EffectType.CODON_INSERTION);
oldSo2efftype.put("inframe_deletion", EffectType.CODON_DELETION);
oldSo2efftype.put("transcript", EffectType.TRANSCRIPT);
oldSo2efftype.put("non_canonical_start_codon", EffectType.SYNONYMOUS_START);

// Add terms if not already in the map
for (String so : oldSo2efftype.keySet()) {
Expand All @@ -158,6 +157,10 @@ static void so2efftype(EffFormatVersion formatVersion, Variant variant) {

}

private EffectType(EffectImpact effectImpact) {
this.effectImpact = effectImpact;
}

/**
* Return effect impact
*/
Expand Down Expand Up @@ -234,6 +237,8 @@ public EffectType getGeneRegion() {
case SYNONYMOUS_CODING:
case SYNONYMOUS_START:
case FRAME_SHIFT:
case FRAME_SHIFT_AFTER_CDS_END:
case FRAME_SHIFT_BEFORE_CDS_START:
case CODON_CHANGE:
case CODON_INSERTION:
case CODON_CHANGE_PLUS_CODON_INSERTION:
Expand Down Expand Up @@ -353,6 +358,12 @@ public String toSequenceOntology(EffFormatVersion formatVersion, Variant variant
case FRAME_SHIFT:
return "frameshift_variant";

case FRAME_SHIFT_BEFORE_CDS_START:
return "start_retained_variant";

case FRAME_SHIFT_AFTER_CDS_END:
return "stop_retained_variant";

case GENE:
return "gene_variant";

Expand Down Expand Up @@ -459,7 +470,7 @@ public String toSequenceOntology(EffFormatVersion formatVersion, Variant variant
return "stop_retained_variant";

case SYNONYMOUS_START:
return "initiator_codon_variant" + formatVersion.separator() + "non_canonical_start_codon";
return "start_retained_variant";

case TRANSCRIPT:
return "non_coding_transcript_variant";
Expand Down
9 changes: 7 additions & 2 deletions src/main/java/org/snpeff/snpEffect/HgvsProtein.java
@@ -1,5 +1,6 @@
package org.snpeff.snpEffect;

import org.snpeff.codons.CodonTable;
import org.snpeff.interval.Transcript;
import org.snpeff.interval.VariantBnd;
import org.snpeff.util.Gpr;
Expand Down Expand Up @@ -216,7 +217,10 @@ protected boolean isDuplication() {
* Is this a frameShift variant?
*/
boolean isFs() {
return variantEffect.hasEffectType(EffectType.FRAME_SHIFT);
return variantEffect.hasEffectType(EffectType.FRAME_SHIFT) //
|| variantEffect.hasEffectType(EffectType.FRAME_SHIFT_BEFORE_CDS_START) //
|| variantEffect.hasEffectType(EffectType.FRAME_SHIFT_AFTER_CDS_END) //
;
}

/**
Expand Down Expand Up @@ -337,7 +341,8 @@ protected String posFs() {
// Sanity check: Longer than protein?
Transcript tr = variantEffect.getTranscript();
String protSeq = tr.protein();
if (codonNum >= protSeq.length()) return null;
if (codonNum > protSeq.length()) return null;
if (codonNum == protSeq.length()) return aaCode(CodonTable.TERMINATION_CODON_1) + codonNum;

// NOTE: the changes observed should be described on protein level and not try to
// incorporate any knowledge regarding the change at DNA-level.
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/snpeff/snpEffect/VariantEffect.java
Expand Up @@ -514,7 +514,7 @@ public String getEffectTypeString(boolean useSeqOntology, boolean useFirstEffect
Collections.sort(effectTypes);

// More than one effect? Check for repeats
Set<String> added = ((effectTypes.size() > 1) && (!useFirstEffect) ? new HashSet<String>() : null);
Set<String> added = ((effectTypes.size() > 1) && (!useFirstEffect) ? new HashSet<>() : null);

// Create string
for (EffectType et : effectTypes) {
Expand Down
Expand Up @@ -57,4 +57,13 @@ public void test_05_hgvs_frameshift() {
checkHgvs("testHg19Chr19", path("hgvs_frameshifts_syn_chr19.vcf"), 2);
}

/**
* Test HGVS frameshift at CDS end
*/
@Test
public void test_06_hgvs_frameshift() {
Gpr.debug("Test");
checkHgvs("testHg3775Chr2", path("hgvs_frameshifts_cds_end.vcf"), 6);
}

}
@@ -0,0 +1,12 @@
package org.snpeff.snpEffect.testCases.integration;

/**
* Test case
*/
public class TestCasesIntegrationZzz extends TestCasesIntegrationBase {

public TestCasesIntegrationZzz() {
super();
}

}
4 changes: 2 additions & 2 deletions test_04_TestCasesEff.csv
Expand Up @@ -3,8 +3,8 @@

Name , Value
Genome , testHg3770Chr22
Date , 2018-02-08 11:01
SnpEff_version , SnpEff 4.3u (build 2018-02-08 10:57), by Pablo Cingolani
Date , 2018-03-26 14:18
SnpEff_version , SnpEff 4.3u (build 2018-03-26 14:13), by Pablo Cingolani
Command_line_arguments , SnpEff -csvStats test_04_TestCasesEff.csv testHg3770Chr22 tests/integration/eff/eff_sort.vcf
Warnings , 1185
Number_of_lines_in_input_file, 1205
Expand Down
7 changes: 7 additions & 0 deletions tests/integration/hgvsFrameShift/hgvs_frameshifts_cds_end.vcf
@@ -0,0 +1,7 @@
#CHROM POS ID REF ALT QUAL FILTER INFO
2 37480319 . A AT . . TR=ENST00000234179;HGVSP=p.Ter891fs;HGVSC=c.2673dupA
2 37480320 . T TA . . TR=ENST00000234179;HGVSP=p.Ter891fs;HGVSC=c.2672_2673insT
2 37480321 . T TA . . TR=ENST00000234179;HGVSP=p.Ter891fs;HGVSC=c.2671dupT
2 37480322 . A AT . . TR=ENST00000234179;HGVSP=p.Ter891fs;HGVSC=c.2670_2671insA
2 37480323 . A AT . . TR=ENST00000234179;HGVSP=p.Ter891fs;HGVSC=c.2669_2670insA
2 37480324 . G GA . . TR=ENST00000234179;HGVSP=p.Pro890fs;HGVSC=c.2668_2669insT
7 changes: 7 additions & 0 deletions tests/integration/zzz/hgvs_frameshifts_cds_end.vcf
@@ -0,0 +1,7 @@
#CHROM POS ID REF ALT QUAL FILTER INFO
2 37480319 . A AT . . TR=ENST00000234179;HGVSP=p.Ter891fs;HGVSC=c.2673dupA
2 37480320 . T TA . . TR=ENST00000234179;HGVSP=p.Ter891fs;HGVSC=c.2672_2673insT
2 37480321 . T TA . . TR=ENST00000234179;HGVSP=p.Ter891fs;HGVSC=c.2671dupT
2 37480322 . A AT . . TR=ENST00000234179;HGVSP=p.Ter891fs;HGVSC=c.2670_2671insA
2 37480323 . A AT . . TR=ENST00000234179;HGVSP=p.Ter891fs;HGVSC=c.2669_2670insA
2 37480324 . G GA . . TR=ENST00000234179;HGVSP=p.Pro890fs;HGVSC=c.2668_2669insT

0 comments on commit 447f965

Please sign in to comment.