From 43ed1cfea441e8229ea5f156ae6e397074a98410 Mon Sep 17 00:00:00 2001 From: sol Date: Wed, 8 Jul 2015 12:20:28 -0400 Subject: [PATCH] this addresses issue #10 in which some splice junctions recieve "null" labels, which caused the enumerate function to run incorrectly. This issue labeling junctions occured because the aligner (STAR) assigned the read a strand flag (XS) that conflicted with the strand implied by the sequencing protocol. This patch modifies IsoSCM such that splice junctions from reads with conflicting strand information are omitted from assembled transcript models. --- IsoSCM/src/executable/IsoSCM.java | 2 +- IsoSCM/src/processing/FindSpliceJunctions.java | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/IsoSCM/src/executable/IsoSCM.java b/IsoSCM/src/executable/IsoSCM.java index 77519c3..1e8d937 100644 --- a/IsoSCM/src/executable/IsoSCM.java +++ b/IsoSCM/src/executable/IsoSCM.java @@ -218,7 +218,7 @@ else if(jc.getParsedCommand().equals("assemble")){ SAMFileReader sfr = new SAMFileReader(bamFile); logger.info("tabulating splice junctions..."); BEDWriter sj_bw = new BEDWriter(IO.bufferedPrintstream(splice_junction_bed)); - FindSpliceJunctions.tabulateSpliceJunctions(sfr, sj_bw); + FindSpliceJunctions.tabulateSpliceJunctions(sfr, strandedness, sj_bw); sj_bw.close(); logger.info("counting junction supporting reads..."); diff --git a/IsoSCM/src/processing/FindSpliceJunctions.java b/IsoSCM/src/processing/FindSpliceJunctions.java index fa5330d..9b1d634 100644 --- a/IsoSCM/src/processing/FindSpliceJunctions.java +++ b/IsoSCM/src/processing/FindSpliceJunctions.java @@ -114,7 +114,7 @@ public static List splice_junctions(SAMRecord sr){ return splice_junctions; } - public static StrandedGenomicIntervalTree> tabulateSpliceJunctions(SAMFileReader sfr, BEDWriter bw){ + public static StrandedGenomicIntervalTree> tabulateSpliceJunctions(SAMFileReader sfr, Strandedness strandedness, BEDWriter bw){ Map referenceLengths = BAMTools.referenceSequenceLengths(sfr.getFileHeader()); StrandedGenomicIntervalTree> splice_junctions = new StrandedGenomicIntervalTree>(); @@ -130,6 +130,9 @@ public static StrandedGenomicIntervalTree> tabulateSpliceJunc SAMRecord sr = sri.next(); if(sr.getCigarString().indexOf('N') == -1) continue; + + if(strandedness!=Strandedness.unstranded && sr.getAttribute("XS")!=null && BAMTools.strand(sr, strandedness)!=sr.getCharacterAttribute("XS")) + continue; Cigar cigar = sr.getCigar(); int alignmentPosition = sr.getAlignmentStart(); @@ -869,6 +872,9 @@ public MapCounter instantiate(Object... objects) { String chr = sr.getReferenceName(); int alignment_start = sr.getAlignmentStart(); + if(strandedness!=Strandedness.unstranded && sr.getAttribute("XS")!=null && BAMTools.strand(sr, strandedness)!=sr.getCharacterAttribute("XS")) + continue; + if(prev_chr!=null && !chr.equals(prev_chr)){ writeAll(strand_count_5p_splice,strand_count_5p_span, prev_chr, true,gw); // IMPORTANT: corrected error when last read aligned to the chromosome is spliced