Skip to content

Commit

Permalink
added a command to compare exons from an isoscm assembly with reference
Browse files Browse the repository at this point in the history
gene models, requested in issue #7
  • Loading branch information
shenkers committed Jun 2, 2015
1 parent af9ef48 commit 86d3819
Show file tree
Hide file tree
Showing 3 changed files with 189 additions and 0 deletions.
34 changes: 34 additions & 0 deletions IsoSCM/src/executable/DiffCommand.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package executable;

import java.io.File;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;

import com.beust.jcommander.Parameter;

@XmlRootElement(name="CompareCommand")
@XmlAccessorType(XmlAccessType.PROPERTY)
public class DiffCommand{


@Parameter(names="-x1", description="XML configuration file from the assembly step", converter=FileConverter.class, required=true)
File assemblyXml;

@Parameter(names="-G", description="Ensembl format GTF to which the assembled exons will be compared")
String refGtf;

public File getAssemblyXml() {
return assemblyXml;
}

@XmlElement
public void setAssemblyXml(File assemblyXml) {
this.assemblyXml = assemblyXml;
}


}

9 changes: 9 additions & 0 deletions IsoSCM/src/executable/IsoSCM.java
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,15 @@ public static void main(String[] args) throws IOException, JAXBException {
AssembleCommand assemble = new AssembleCommand();
SegmentCommand segment = new SegmentCommand();
CompareCommand compare = new CompareCommand();
DiffCommand diff = new DiffCommand();
EnumerateCommand enumerate = new EnumerateCommand();
HelpCommand help = new HelpCommand();

jc.addCommand("assemble", assemble);
jc.addCommand("segment", segment);
jc.addCommand("compare", compare);
jc.addCommand("enumerate", enumerate);
jc.addCommand("diff", diff);
jc.addCommand("-h", help);
try{
jc.parse(args);
Expand Down Expand Up @@ -614,6 +616,13 @@ else if(jc.getParsedCommand().equals("compare")){

logger.info("compare comleted.");
}
else if(jc.getParsedCommand().equals("diff")){
AssembleCommand assemblyConfiguration = ConfigurationIO.readAssemblyConfiguration(diff.assemblyXml);
File assemblyGtf = new File(Util.sprintf("%s/%s.gtf",assemblyConfiguration.dir, assemblyConfiguration.base));
File assemblyUnsplicedGtf = new File(Util.sprintf("%s/%s.unspliced.gtf",assemblyConfiguration.dir, assemblyConfiguration.base));
processing.DiffReference.diff(diff.refGtf, assemblyGtf, assemblyUnsplicedGtf);

}
}
catch(ParameterException e){
e.printStackTrace(System.out);
Expand Down
146 changes: 146 additions & 0 deletions IsoSCM/src/processing/DiffReference.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
package processing;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringUtils;

import tools.AnnotatedRegion;
import tools.IntervalTools;
import tools.StrandedGenomicIntervalTree;
import tools.GTFTools.AnnotationParser;
import tools.ParseGTF.TranscriptIterator;
import util.Util;

public class DiffReference {

public static void labelExons(AnnotationParser annotation){

for(AnnotatedRegion transcript : annotation.transcripts){
List<AnnotatedRegion> exons = IntervalTools.SelectIntervals(annotation.exons, transcript.chr, transcript.start, transcript.end, transcript.strand, "transcript_id", transcript.getAttribute("transcript_id"));

if(exons.size()>1){
Collections.sort(exons,IntervalTools.AnnotatedRegionComparator(true, !transcript.isNegativeStrand()));
List<AnnotatedRegion> internal = exons.subList(1, exons.size()-1);
for(AnnotatedRegion r : internal){
r.addAttribute("exon_type", "internal_exon");
}
exons.get(0).addAttribute("exon_type", "5p_exon");
exons.get(exons.size()-1).addAttribute("exon_type", "3p_exon");
}
else{
exons.get(0).addAttribute("exon_type", "unspliced");
}
}


}


public static void diff(String gtfFile, File assemblyGtf, File assemblyUnsplicedGtf) throws FileNotFoundException {
TranscriptIterator spliced = new TranscriptIterator(assemblyGtf);
TranscriptIterator unspliced = new TranscriptIterator(assemblyUnsplicedGtf);
AnnotationParser ap = new AnnotationParser(gtfFile);

labelExons(ap);
PrintStream out = System.out;
out.printf("exon\ttype\tmatch_5p_3p\tmatch_5p_3p_type\tmatch_5p\tmatch_5p_type\tmatch_3p\tmatch_3p_type\toverlaps\toverlaps_type\n");
for(AnnotatedRegion r : spliced){
if(r.annotation.equals("exon")){
String type = (String) r.getAttribute("type");

Set<String> matched5p = new HashSet<String>();
Set<String> matched3p = new HashSet<String>();
Set<String> matched5p3p = new HashSet<String>();
Set<String> overlappingIds = new HashSet<String>();
Set<String> overlappingTypes = new HashSet<String>();
Set<String> matched5pTypes= new HashSet<String>();
Set<String> matched3pTypes= new HashSet<String>();
Set<String> matched5p3pTypes = new HashSet<String>();
for(AnnotatedRegion e : ap.exons.overlappingRegions(r.chr, r.start, r.end, r.strand)){
String transcriptId = (String) e.getAttribute("transcript_id");
String exon_type = (String) e.getAttribute("exon_type");

overlappingIds.add(transcriptId);
overlappingTypes.add(exon_type);

if(r.get5Prime()==e.get5Prime()){
matched5p.add(transcriptId);
matched5pTypes.add(exon_type);
}
if(r.get3Prime()==e.get3Prime()){
matched3p.add(transcriptId);
matched3pTypes.add(exon_type);
}
if(r.get5Prime()==e.get5Prime() && r.get3Prime()==e.get3Prime()){
matched5p3p.add(transcriptId);
matched5p3pTypes.add(exon_type);
}
}

out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", r, type,
StringUtils.join(matched5p3p,","),
StringUtils.join(matched5p3pTypes,","),
StringUtils.join(matched5p,","),
StringUtils.join(matched5pTypes,","),
StringUtils.join(matched3p,","),
StringUtils.join(matched3pTypes,","),
StringUtils.join(overlappingIds,","),
StringUtils.join(overlappingTypes,",")
);
}
}
for(AnnotatedRegion r : unspliced){
if(r.annotation.equals("exon")){
String type = "unspliced";

Set<String> matched5p = new HashSet<String>();
Set<String> matched3p = new HashSet<String>();
Set<String> matched5p3p = new HashSet<String>();
Set<String> overlappingIds = new HashSet<String>();
Set<String> overlappingTypes = new HashSet<String>();
Set<String> matched5pTypes= new HashSet<String>();
Set<String> matched3pTypes= new HashSet<String>();
Set<String> matched5p3pTypes = new HashSet<String>();
for(AnnotatedRegion e : ap.exons.overlappingRegions(r.chr, r.start, r.end, r.strand)){
String transcriptId = (String) e.getAttribute("transcript_id");
String exon_type = (String) e.getAttribute("exon_type");

overlappingIds.add(transcriptId);
overlappingTypes.add(exon_type);

if(r.get5Prime()==e.get5Prime()){
matched5p.add(transcriptId);
matched5pTypes.add(exon_type);
}
if(r.get3Prime()==e.get3Prime()){
matched3p.add(transcriptId);
matched3pTypes.add(exon_type);
}
if(r.get5Prime()==e.get5Prime() && r.get3Prime()==e.get3Prime()){
matched5p3p.add(transcriptId);
matched5p3pTypes.add(exon_type);
}
}

out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", r, type,
StringUtils.join(matched5p3p,","),
StringUtils.join(matched5p3pTypes,","),
StringUtils.join(matched5p,","),
StringUtils.join(matched5pTypes,","),
StringUtils.join(matched3p,","),
StringUtils.join(matched3pTypes,","),
StringUtils.join(overlappingIds,","),
StringUtils.join(overlappingTypes,",")
);
}
}

}
}

0 comments on commit 86d3819

Please sign in to comment.