diff --git a/IsoSCM/src/executable/ConfigurationIO.java b/IsoSCM/src/executable/ConfigurationIO.java index d31713f..c15d8b4 100644 --- a/IsoSCM/src/executable/ConfigurationIO.java +++ b/IsoSCM/src/executable/ConfigurationIO.java @@ -67,7 +67,7 @@ public static void writeConfiguration(CompareCommand command, File configuration * @throws JAXBException */ public static CompareCommand readCompareConfiguration(File configuration_xml) throws JAXBException{ - JAXBContext jaxbc = JAXBContext.newInstance(AssembleCommand.class); + JAXBContext jaxbc = JAXBContext.newInstance(CompareCommand.class); Unmarshaller um = jaxbc.createUnmarshaller(); diff --git a/IsoSCM/src/executable/DiffCommand.java b/IsoSCM/src/executable/DiffCommand.java new file mode 100644 index 0000000..09aca6f --- /dev/null +++ b/IsoSCM/src/executable/DiffCommand.java @@ -0,0 +1,34 @@ +package executable; + +import java.io.File; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; + +import com.beust.jcommander.Parameter; + +@XmlRootElement(name="CompareCommand") +@XmlAccessorType(XmlAccessType.PROPERTY) +public class DiffCommand{ + + + @Parameter(names="-x", description="XML configuration file from the 'compare' step", converter=FileConverter.class, required=true) + File compareXml; + + @Parameter(names="-G", description="Ensembl format GTF to which the assembled exons will be compared", required=true) + String refGtf; + + public File getAssemblyXml() { + return compareXml; + } + + @XmlElement + public void setAssemblyXml(File assemblyXml) { + this.compareXml = assemblyXml; + } + + +} + diff --git a/IsoSCM/src/executable/IsoSCM.java b/IsoSCM/src/executable/IsoSCM.java index 8b37c9c..77519c3 100644 --- a/IsoSCM/src/executable/IsoSCM.java +++ b/IsoSCM/src/executable/IsoSCM.java @@ -163,6 +163,7 @@ public static void main(String[] args) throws IOException, JAXBException { AssembleCommand assemble = new AssembleCommand(); SegmentCommand segment = new SegmentCommand(); CompareCommand compare = new CompareCommand(); + DiffCommand diff = new DiffCommand(); EnumerateCommand enumerate = new EnumerateCommand(); HelpCommand help = new HelpCommand(); @@ -170,6 +171,7 @@ public static void main(String[] args) throws IOException, JAXBException { jc.addCommand("segment", segment); jc.addCommand("compare", compare); jc.addCommand("enumerate", enumerate); + jc.addCommand("diff", diff); jc.addCommand("-h", help); try{ jc.parse(args); @@ -614,6 +616,11 @@ else if(jc.getParsedCommand().equals("compare")){ logger.info("compare comleted."); } + else if(jc.getParsedCommand().equals("diff")){ + CompareCommand compareConfiguration = ConfigurationIO.readCompareConfiguration(diff.compareXml); + File compareGtf = FileUtils.getFile(compareConfiguration.dir,Util.sprintf("%s.gtf",compareConfiguration.base)); + processing.DiffReference.diff(diff.refGtf, compareGtf); + } } catch(ParameterException e){ e.printStackTrace(System.out); diff --git a/IsoSCM/src/processing/DiffReference.java b/IsoSCM/src/processing/DiffReference.java new file mode 100644 index 0000000..43a729a --- /dev/null +++ b/IsoSCM/src/processing/DiffReference.java @@ -0,0 +1,102 @@ +package processing; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; + +import tools.AnnotatedRegion; +import tools.IntervalTools; +import tools.StrandedGenomicIntervalTree; +import tools.GTFTools.AnnotationParser; +import tools.ParseGTF.TranscriptIterator; +import util.Util; + +public class DiffReference { + + public static void labelExons(AnnotationParser annotation){ + + for(AnnotatedRegion transcript : annotation.transcripts){ + List exons = IntervalTools.SelectIntervals(annotation.exons, transcript.chr, transcript.start, transcript.end, transcript.strand, "transcript_id", transcript.getAttribute("transcript_id")); + + if(exons.size()>1){ + Collections.sort(exons,IntervalTools.AnnotatedRegionComparator(true, !transcript.isNegativeStrand())); + List internal = exons.subList(1, exons.size()-1); + for(AnnotatedRegion r : internal){ + r.addAttribute("exon_type", "internal_exon"); + } + exons.get(0).addAttribute("exon_type", "5p_exon"); + exons.get(exons.size()-1).addAttribute("exon_type", "3p_exon"); + } + else{ + exons.get(0).addAttribute("exon_type", "unspliced"); + } + } + + + } + + + public static void diff(String gtfFile, File compareGtf) throws FileNotFoundException { + TranscriptIterator loci = new TranscriptIterator(compareGtf); + AnnotationParser ap = new AnnotationParser(gtfFile); + + labelExons(ap); + + PrintStream out = System.out; + + out.printf("exon\tlocus_id\tmatch_5p_3p\tmatch_5p_3p_type\tmatch_5p\tmatch_5p_type\tmatch_3p\tmatch_3p_type\toverlaps\toverlaps_type\n"); + for(AnnotatedRegion r : loci){ + if(r.annotation.equals("locus")){ + String locus_id = (String) r.getAttribute("id"); + + Set matched5p = new HashSet(); + Set matched3p = new HashSet(); + Set matched5p3p = new HashSet(); + Set overlappingIds = new HashSet(); + Set overlappingTypes = new HashSet(); + Set matched5pTypes= new HashSet(); + Set matched3pTypes= new HashSet(); + Set matched5p3pTypes = new HashSet(); + for(AnnotatedRegion e : ap.exons.overlappingRegions(r.chr, r.start, r.end, r.strand)){ + String transcriptId = (String) e.getAttribute("transcript_id"); + String exon_type = (String) e.getAttribute("exon_type"); + + overlappingIds.add(transcriptId); + overlappingTypes.add(exon_type); + + if(r.get5Prime()==e.get5Prime()){ + matched5p.add(transcriptId); + matched5pTypes.add(exon_type); + } + if(r.get3Prime()==e.get3Prime()){ + matched3p.add(transcriptId); + matched3pTypes.add(exon_type); + } + if(r.get5Prime()==e.get5Prime() && r.get3Prime()==e.get3Prime()){ + matched5p3p.add(transcriptId); + matched5p3pTypes.add(exon_type); + } + } + + out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", r, locus_id, + StringUtils.join(matched5p3p,","), + StringUtils.join(matched5p3pTypes,","), + StringUtils.join(matched5p,","), + StringUtils.join(matched5pTypes,","), + StringUtils.join(matched3p,","), + StringUtils.join(matched3pTypes,","), + StringUtils.join(overlappingIds,","), + StringUtils.join(overlappingTypes,",") + ); + } + } + + } +}