Skip to content

Commit

Permalink
added a command to compare results from the 'compare' command with
Browse files Browse the repository at this point in the history
reference
gene models, requested in issue #7
  • Loading branch information
shenkers committed Jun 2, 2015
1 parent af9ef48 commit a7f4686
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 1 deletion.
2 changes: 1 addition & 1 deletion IsoSCM/src/executable/ConfigurationIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public static void writeConfiguration(CompareCommand command, File configuration
* @throws JAXBException
*/
public static CompareCommand readCompareConfiguration(File configuration_xml) throws JAXBException{
JAXBContext jaxbc = JAXBContext.newInstance(AssembleCommand.class);
JAXBContext jaxbc = JAXBContext.newInstance(CompareCommand.class);

Unmarshaller um = jaxbc.createUnmarshaller();

Expand Down
34 changes: 34 additions & 0 deletions IsoSCM/src/executable/DiffCommand.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package executable;

import java.io.File;

import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;

import com.beust.jcommander.Parameter;

@XmlRootElement(name="CompareCommand")
@XmlAccessorType(XmlAccessType.PROPERTY)
public class DiffCommand{


@Parameter(names="-x", description="XML configuration file from the 'compare' step", converter=FileConverter.class, required=true)
File compareXml;

@Parameter(names="-G", description="Ensembl format GTF to which the assembled exons will be compared", required=true)
String refGtf;

public File getAssemblyXml() {
return compareXml;
}

@XmlElement
public void setAssemblyXml(File assemblyXml) {
this.compareXml = assemblyXml;
}


}

7 changes: 7 additions & 0 deletions IsoSCM/src/executable/IsoSCM.java
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,15 @@ public static void main(String[] args) throws IOException, JAXBException {
AssembleCommand assemble = new AssembleCommand();
SegmentCommand segment = new SegmentCommand();
CompareCommand compare = new CompareCommand();
DiffCommand diff = new DiffCommand();
EnumerateCommand enumerate = new EnumerateCommand();
HelpCommand help = new HelpCommand();

jc.addCommand("assemble", assemble);
jc.addCommand("segment", segment);
jc.addCommand("compare", compare);
jc.addCommand("enumerate", enumerate);
jc.addCommand("diff", diff);
jc.addCommand("-h", help);
try{
jc.parse(args);
Expand Down Expand Up @@ -614,6 +616,11 @@ else if(jc.getParsedCommand().equals("compare")){

logger.info("compare comleted.");
}
else if(jc.getParsedCommand().equals("diff")){
CompareCommand compareConfiguration = ConfigurationIO.readCompareConfiguration(diff.compareXml);
File compareGtf = FileUtils.getFile(compareConfiguration.dir,Util.sprintf("%s.gtf",compareConfiguration.base));
processing.DiffReference.diff(diff.refGtf, compareGtf);
}
}
catch(ParameterException e){
e.printStackTrace(System.out);
Expand Down
102 changes: 102 additions & 0 deletions IsoSCM/src/processing/DiffReference.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package processing;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringUtils;

import tools.AnnotatedRegion;
import tools.IntervalTools;
import tools.StrandedGenomicIntervalTree;
import tools.GTFTools.AnnotationParser;
import tools.ParseGTF.TranscriptIterator;
import util.Util;

public class DiffReference {

public static void labelExons(AnnotationParser annotation){

for(AnnotatedRegion transcript : annotation.transcripts){
List<AnnotatedRegion> exons = IntervalTools.SelectIntervals(annotation.exons, transcript.chr, transcript.start, transcript.end, transcript.strand, "transcript_id", transcript.getAttribute("transcript_id"));

if(exons.size()>1){
Collections.sort(exons,IntervalTools.AnnotatedRegionComparator(true, !transcript.isNegativeStrand()));
List<AnnotatedRegion> internal = exons.subList(1, exons.size()-1);
for(AnnotatedRegion r : internal){
r.addAttribute("exon_type", "internal_exon");
}
exons.get(0).addAttribute("exon_type", "5p_exon");
exons.get(exons.size()-1).addAttribute("exon_type", "3p_exon");
}
else{
exons.get(0).addAttribute("exon_type", "unspliced");
}
}


}


public static void diff(String gtfFile, File compareGtf) throws FileNotFoundException {
TranscriptIterator loci = new TranscriptIterator(compareGtf);
AnnotationParser ap = new AnnotationParser(gtfFile);

labelExons(ap);

PrintStream out = System.out;

out.printf("exon\tlocus_id\tmatch_5p_3p\tmatch_5p_3p_type\tmatch_5p\tmatch_5p_type\tmatch_3p\tmatch_3p_type\toverlaps\toverlaps_type\n");
for(AnnotatedRegion r : loci){
if(r.annotation.equals("locus")){
String locus_id = (String) r.getAttribute("id");

Set<String> matched5p = new HashSet<String>();
Set<String> matched3p = new HashSet<String>();
Set<String> matched5p3p = new HashSet<String>();
Set<String> overlappingIds = new HashSet<String>();
Set<String> overlappingTypes = new HashSet<String>();
Set<String> matched5pTypes= new HashSet<String>();
Set<String> matched3pTypes= new HashSet<String>();
Set<String> matched5p3pTypes = new HashSet<String>();
for(AnnotatedRegion e : ap.exons.overlappingRegions(r.chr, r.start, r.end, r.strand)){
String transcriptId = (String) e.getAttribute("transcript_id");
String exon_type = (String) e.getAttribute("exon_type");

overlappingIds.add(transcriptId);
overlappingTypes.add(exon_type);

if(r.get5Prime()==e.get5Prime()){
matched5p.add(transcriptId);
matched5pTypes.add(exon_type);
}
if(r.get3Prime()==e.get3Prime()){
matched3p.add(transcriptId);
matched3pTypes.add(exon_type);
}
if(r.get5Prime()==e.get5Prime() && r.get3Prime()==e.get3Prime()){
matched5p3p.add(transcriptId);
matched5p3pTypes.add(exon_type);
}
}

out.printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", r, locus_id,
StringUtils.join(matched5p3p,","),
StringUtils.join(matched5p3pTypes,","),
StringUtils.join(matched5p,","),
StringUtils.join(matched5pTypes,","),
StringUtils.join(matched3p,","),
StringUtils.join(matched3pTypes,","),
StringUtils.join(overlappingIds,","),
StringUtils.join(overlappingTypes,",")
);
}
}

}
}

0 comments on commit a7f4686

Please sign in to comment.