>();
+ clusterList.add(readsLengthRange);
+
+ for(int x=0;x tempcluster = new ArrayList();
+ for(int y=0;k[x][y]!=-1 && ym[i]){
+ diff[i] = list[m[i]][a];
+ }
+ else{
+ diff[i]=0;
+ }
+ }
+
+ int val=0;
+ double temp=diff[0];
+ for(int i=0;i<2;++i){
+ if(diff[i] t = new ArrayList() ;
+
+ for(int i=0;i<2;++i){
+ m[i]=0; // initializing means to 0
+ }
+
+ //int cnt=0;
+ for(int i=0;i<2;++i){
+
+ for(int j=0;j1){
+ ArrayList s = new ArrayList();
+
+ for(int x1 = 0; x1t.get(x2)){
+ sum = sum+(list[t.get(x2)][t.get(x1)]*list[t.get(x2)][t.get(x1)]);
+ }
+ else{
+ sum = sum+0;
+ }
+ }
+ //System.out.println(sum);
+ s.add(sum/t.size());
+ }
+ //System.out.println(s.size());
+ double min=s.get(0);
+ int d2 = 0;
+ for(int x3=1;x3s.get(x3)){
+ min = s.get(x3);
+ d2 = x3;
+ }
+ }
+
+
+ m[i]=t.get(d2);
+ s.clear();
+ t.clear();
+ }else{
+ m[i]=t.get(0);
+ }
+
+ }
+ }
+
+//This checks if previous k ie. tempk and current k are same.Used as terminating case.
+ static int VerifyEqual(int n, int k[][], int tempk[][]){
+ for(int i=0;i<2;++i){
+ for(int j=0;j parentMap = new HashMap<>();
+
+ for (int i = 1; i <= m; ++i) {
+ d[i][0] = i;
+ }
+
+ for (int j = 1; j <= n; ++j) {
+ d[0][j] = j;
+ }
+
+
+
+
+ for (int j = 1; j <= n; ++j) {
+ for (int i = 1; i <= m; ++i) {
+ final int delta = (s.charAt(j - 1) == z.charAt(i - 1)) ? 0 : 1;
+
+ double tentativeDistance = d[i - 1][j] +0.5;//gap penalty
+ EditOperation editOperation = EditOperation.INSERT;
+
+ if (tentativeDistance > d[i][j - 1] +0.5) {
+ tentativeDistance = d[i][j - 1] +0.5;
+ editOperation = EditOperation.DELETE;
+ }
+
+ if (tentativeDistance > d[i - 1][j - 1] + delta) {
+ tentativeDistance = d[i - 1][j - 1] + delta;
+ editOperation = EditOperation.SUBSTITUTE;
+ }
+
+ d[i][j] = tentativeDistance;
+
+
+ switch (editOperation) {
+ case SUBSTITUTE:
+ parentMap.put(new Point(i, j), new Point(i - 1, j - 1));
+ break;
+
+ case INSERT:
+ parentMap.put(new Point(i, j), new Point(i - 1, j));
+ break;
+
+ case DELETE:
+ parentMap.put(new Point(i, j), new Point(i, j - 1));
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ final StringBuilder topLineBuilder = new StringBuilder(n + m);
+ final StringBuilder bottomLineBuilder = new StringBuilder(n + m);
+ final StringBuilder editSequenceBuilder = new StringBuilder(n + m);
+ Point current = new Point(m, n);
+
+ while (true) {
+ Point predecessor = parentMap.get(current);
+
+ if (predecessor == null) {
+ break;
+ }
+
+ if (current.x != predecessor.x && current.y != predecessor.y) {
+ final char schar = s.charAt(predecessor.y);
+ final char zchar = z.charAt(predecessor.x);
+
+ topLineBuilder.append(schar);
+ bottomLineBuilder.append(zchar);
+ editSequenceBuilder.append(schar != zchar ?
+ EditOperation.SUBSTITUTE :
+ EditOperation.MATCH);
+ } else if (current.x != predecessor.x) {
+ topLineBuilder.append(GAP);
+ bottomLineBuilder.append(z.charAt(predecessor.x));
+ editSequenceBuilder.append(EditOperation.INSERT);
+ } else {
+ topLineBuilder.append(s.charAt(predecessor.y));
+ bottomLineBuilder.append(GAP);
+ editSequenceBuilder.append(EditOperation.DELETE);
+ }
+
+ current = predecessor;
+ }
+
+ // Remove the last characters that correspond to the very beginning
+ // of the alignments and edit sequence (since the path reconstruction
+ // proceeds from the "end" to the "beginning" of the distance matrix.
+ topLineBuilder .deleteCharAt(topLineBuilder.length() - 1);
+ bottomLineBuilder .deleteCharAt(bottomLineBuilder.length() - 1);
+ editSequenceBuilder.deleteCharAt(editSequenceBuilder.length() - 1);
+
+ // Our result data is backwards, reverse them.
+ topLineBuilder .reverse();
+ bottomLineBuilder .reverse();
+ editSequenceBuilder.reverse();
+
+ return new EditDistanceResult(d[m][n],
+ editSequenceBuilder.toString(),
+ topLineBuilder.toString(),
+ bottomLineBuilder.toString());
+ }
+
+
+
+ public static void main(String[] args) throws IOException {
+ EditDistanceResult result = compute("ACTAGGTTA", "TAAGGCTCAAT");
+ System.out.println("Distance: " + result.getDistance());
+ System.out.println("Edit sequence: " + result.getEditSequence());
+ System.out.println("Alignment:");
+ System.out.println(result.getTopAlignmentRow());
+ System.out.println(result.getBottomAlignmentRow());
+
+
+ }
+
+
+
+
+}
diff --git a/src/dev/java/japsadev/bio/hts/clustering/WriteClusterResultOnFile.java b/src/dev/java/japsadev/bio/hts/clustering/WriteClusterResultOnFile.java
new file mode 100644
index 0000000..fe5c98a
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/clustering/WriteClusterResultOnFile.java
@@ -0,0 +1,73 @@
+package japsadev.bio.hts.clustering;
+
+import java.util.ArrayList;
+import java.io.*;
+
+import japsa.seq.SequenceOutputStream;
+import japsadev.bio.hts.clustering.PairDistance;
+import japsadev.bio.hts.clustering.GettingTreadsFromFasta;
+import japsa.seq.*;
+
+/**
+ * @author buvan.suji
+ *
+ */
+
+public class WriteClusterResultOnFile {
+
+ public static void writeOnFile(ArrayList>
+ list, Sequence cons1, Sequence cons2, String FileName ) throws Exception{
+
+ File file = new File("ClusterResult_"+FileName+".fasta");
+ FileWriter fw = new FileWriter(file.getAbsoluteFile());
+ BufferedWriter bw = new BufferedWriter(fw);
+
+
+ ArrayList list1 = new ArrayList();
+ list1 = list.get(0);
+
+ bw.write("Minimum Read Length: "+ list1.get(0));
+ bw.newLine();
+ bw.write("Maximum Read Length: "+ list1.get(1));
+ bw.newLine();
+ bw.write("Estimated Time: "+ list1.get(2));
+ bw.newLine();
+ bw.newLine();
+ bw.write("The consensus sequence of cluster1: ");
+ bw.newLine();
+ bw.write(""+cons1);
+ bw.newLine();
+ bw.newLine();
+ bw.write("The C1 members are: ");
+ bw.newLine();
+ for(int x=1;x tempList = new ArrayList();
+ tempList = list.get(x);
+
+ for(int y=0;y vertices = (HashSet) graph.getVertices();
+//
+// //display the initial setup- all vertices adjacent to each other
+// for(Vertex vertex:vertices){
+// System.out.println(vertex);
+//
+// for(int j = 0; j < vertex.getNeighborCount(); j++){
+// System.out.println(vertex.getNeighbor(j));
+// }
+//
+// System.out.println();
+// }
+//
+// Node source=new Node(graph.getVertex("108"),false),
+// dest=new Node(graph.getVertex("201"),true);
+// int distance=2962;
+//
+// ArrayList paths=graph.DFS(source, dest, distance);
+// if(!paths.isEmpty()){
+// System.out.println("Paths found ("+distance+"):");
+// for(Path p:paths)
+// System.out.println(p.toString() + " d=" + p.getDeviation() );
+// }
+// else
+// System.out.println("Path not found ("+distance+")!");
+
+ }catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+// Vertex v1= new Vertex("EDGE_1_length_1000_cov_9"),
+// v2= new Vertex("EDGE_2_length_200_cov_22"),
+// v3= new Vertex("EDGE_3_length_300_cov_12"),
+// v4= new Vertex("EDGE_4_length_500_cov_33"),
+// v5= new Vertex("EDGE_5_length_600_cov_21");
+// graph.addVertex(v1, false);
+// graph.addVertex(v2, false);
+// graph.addVertex(v3, false);
+// graph.addVertex(v4, false);
+// graph.addVertex(v5, false);
+//
+// graph.addEdge(v1, v2, true, true);
+// graph.addEdge(v1, v2, false, false);
+//
+// graph.addEdge(v2, v1, true, true);
+// graph.addEdge(v2, v3, true, true);
+// graph.addEdge(v2, v1, false, false);
+// graph.addEdge(v2, v4, false, false);
+//
+// graph.addEdge(v3, v4, true, true);
+// graph.addEdge(v3, v2, false, false);
+//
+// graph.addEdge(v4, v5, true, true);
+// graph.addEdge(v4, v2, true, true);
+// graph.addEdge(v4, v3, false, false);
+// graph.addEdge(v4, v5, false, false);
+//
+// graph.addEdge(v5, v4, false, false);
+// graph.addEdge(v5, v4, true, true);
+//
+// graph.printStats();
+//
+// Path p1=new Path(graph, "1+,2+,3+"),
+// p2=new Path(graph, "3+,4+");
+// graph.reduce(p1);
+// graph.printStats();
+// graph.reduce(p2);
+// graph.printStats();
+ }
+}
+
+
diff --git a/src/dev/java/japsadev/bio/hts/metagenome/Edge.java b/src/dev/java/japsadev/bio/hts/metagenome/Edge.java
new file mode 100644
index 0000000..99cb9a3
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/metagenome/Edge.java
@@ -0,0 +1,180 @@
+package japsadev.bio.hts.metagenome;
+
+/**
+ * This class models an bidirected Edge in my Graph implementation.
+ * An Edge contains two vertices and a weight (distance between them).
+ * A certain edge (v1,v2) can take one among 4 types: ++, --, +- and -+. Each
+ * type corresponds to the way we read the DNA sequence in each read when traversing
+ * this edge.
+ * For example: v1->---<-v2 or (v1,v2)+- spells out (v1 v2') and/or (v2 v1') as in SPAdes output.
+ * This class also deviates from the expectations of the Comparable interface
+ * in that a return value of 0 does not indicate that this.equals(other). The
+ * equals() method only compares the vertices, while the compareTo() method
+ * compares the edge weights. This provides more efficient implementation for
+ * checking uniqueness of edges, as well as the fact that two edges of equal weight
+ * should be considered equitably in a path finding or spanning tree algorithm.
+ *
+ * @author Son Nguyen
+ * @date August 20, 2016
+ */
+public class Edge implements Comparable {
+
+ private Vertex one, two;
+ private boolean dOne, dTwo;
+ private int weight;
+
+ /**
+ *
+ * @param one The first vertex in the Edge
+ * @param two The second vertex in the Edge
+ */
+ public Edge(Vertex one, Vertex two, boolean d1, boolean d2){
+ this(one, two, d1, d2, -127);
+ }
+
+ /**
+ *
+ * @param one The first vertex in the Edge
+ * @param two The second vertex of the Edge
+ * @param weight The weight of this Edge
+ */
+ public Edge(Vertex one, Vertex two, boolean dOne, boolean dTwo, int weight){
+ //this.one = (one.getLabel().compareTo(two.getLabel()) <= 0) ? one : two;
+ //this.two = (this.one == one) ? two : one;
+ this.one=one;
+ this.two=two;
+ this.weight = weight;
+ this.dOne=dOne;
+ this.dTwo=dTwo;
+ }
+
+
+ /**
+ *
+ * @param current
+ * @return The neighbor of current along this Edge
+ */
+ public Vertex getNeighbor(Vertex current){
+ if(!(current.equals(one) || current.equals(two))){
+ return null;
+ }
+
+ return (current.equals(one)) ? two : one;
+ }
+ /**
+ * Return the same Edge but reading the other way around
+ * just swap the order of its vertices upside down
+ * @param
+ * @return the identical Edge
+ */
+ public Edge getReversedRead(){
+ return new Edge(this.two, this.one, !this.dTwo, !this.dOne, this.weight);
+ }
+ /**
+ *
+ * @param current
+ * @return The direction to spell *current* along this Edge
+ */
+ public boolean getDirection(Vertex current){
+ assert (current.equals(one) || current.equals(two)):"Vertex doesn't belong to this Edge!";
+
+ return (current.equals(one)) ? dOne : !dTwo;
+ }
+
+ /**
+ *
+ * @return Vertex this.one
+ */
+ public Vertex getOne(){
+ return this.one;
+ }
+
+ /**
+ *
+ * @return Vertex this.two
+ */
+ public Vertex getTwo(){
+ return this.two;
+ }
+
+ /**
+ *
+ * @return boolean this.dOne
+ */
+ public boolean getDOne(){
+ return this.dOne;
+ }
+
+ /**
+ *
+ * @return boolean this.dTwo
+ */
+ public boolean getDTwo(){
+ return this.dTwo;
+ }
+ /**
+ *
+ * @return int The weight of this Edge
+ */
+ public int getWeight(){
+ return this.weight;
+ }
+
+
+ /**
+ *
+ * @param weight The new weight of this Edge
+ */
+ public void setWeight(int weight){
+ this.weight = weight;
+ }
+
+
+ /**
+ * Note that the compareTo() method deviates from
+ * the specifications in the Comparable interface. A
+ * return value of 0 does not indicate that this.equals(other).
+ * The equals() method checks the Vertex endpoints, while the
+ * compareTo() is used to compare Edge weights
+ *
+ * @param other The Edge to compare against this
+ * @return int this.weight - other.weight
+ */
+ public int compareTo(Edge other){
+ return this.weight - other.weight;
+ }
+
+ /**
+ *
+ * @return String A String representation of this Edge
+ */
+ public String toString(){
+ return "({" + one + (dOne?">":"<") + ", " + two + (dTwo?">":"<") + "}, " + weight + ")";
+ }
+
+ /**
+ *
+ * @return int The hash code for this Edge
+ */
+ public int hashCode(){
+ return (one.getLabel() + (dOne?"":"'") + two.getLabel() + (dTwo?"":"'")).hashCode();
+ }
+
+ /**
+ *
+ * @param other The Object to compare against this
+ * @return true iff other is an Edge with the same Vertices as this
+ */
+ public boolean equals(Object other){
+ if(!(other instanceof Edge)){
+ return false;
+ }
+
+ Edge e = (Edge)other;
+
+ return (e.one.equals(this.one) && e.two.equals(this.two) && (e.getDOne()==this.dOne) && (e.getDTwo()==this.dTwo))
+ || (e.one.equals(this.two) && e.two.equals(this.one) && (e.getDOne()!=this.dOne) && (e.getDTwo()!=this.dTwo));
+ }
+}
+
+
diff --git a/src/dev/java/japsadev/bio/hts/metagenome/Graph.java b/src/dev/java/japsadev/bio/hts/metagenome/Graph.java
new file mode 100644
index 0000000..d419852
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/metagenome/Graph.java
@@ -0,0 +1,389 @@
+package japsadev.bio.hts.metagenome;
+
+import java.io.IOException;
+import java.util.*;
+import japsa.seq.Alphabet;
+import japsa.seq.FastaReader;
+import japsa.seq.Sequence;
+import japsa.seq.SequenceReader;
+
+/**
+ * This class models a simple, bidirected graph using an
+ * incidence list representation. Vertices are identified
+ * uniquely by their labels, and only unique vertices are allowed.
+ * At most one unique Edge per vertex pair is allowed in this Graph.
+ *
+ * @author Son Nguyen
+ * @date August 20, 2016
+ */
+public class Graph {
+
+ private HashMap vertices;
+ private HashMap edges;
+ private int kmer;
+
+ static final int TOLERATE=500;
+
+
+ public Graph(){
+ this.vertices = new HashMap();
+ this.edges = new HashMap();
+ setKmerSize(127);//default kmer size used by SPAdes to assembly MiSeq data
+ }
+
+
+ public Graph(String graphFile) throws IOException{
+ this();
+ //1. next iterate over again to read the connections
+ SequenceReader reader = new FastaReader(graphFile);
+ Sequence seq;
+ int shortestLen = 10000;
+ while ((seq = reader.nextSequence(Alphabet.DNA())) != null){
+ if(seq.length() 1){
+ String[] nbList = adjList[1].split(",");
+ for(int i=0; i < nbList.length; i++){
+ // create list of bridges here (distance=-kmer overlapped)
+ String neighbor = nbList[i];
+ boolean dir2=neighbor.contains("'")?false:true;
+ neighbor=neighbor.replaceAll("[^a-zA-Z0-9_.]", "").trim();
+
+ Vertex nbVertex=new Vertex(neighbor);
+ if(getVertex(nbVertex.getLabel())!=null)
+ nbVertex=getVertex(nbVertex.getLabel());
+
+ addVertex(nbVertex, false);
+
+ addEdge(current, nbVertex, dir1, dir2);
+ }
+ }
+
+ }
+ //rough estimation of kmer used
+ if((shortestLen-1) != getKmerSize())
+ setKmerSize(shortestLen-1);
+
+ reader.close();
+ }
+ /**
+ * This constructor accepts an ArrayList and populates
+ * this.vertices. If multiple Vertex objects have the same label,
+ * then the last Vertex with the given label is used.
+ *
+ * @param vertices The initial Vertices to populate this Graph
+ */
+ public Graph(ArrayList vertices){
+ this.vertices = new HashMap();
+ this.edges = new HashMap();
+
+ for(Vertex v: vertices){
+ this.vertices.put(v.getLabel(), v);
+ }
+ setKmerSize(127);//default kmer size used by SPAdes to assembly MiSeq data
+
+ }
+
+ public int getKmerSize(){
+ return this.kmer;
+ }
+ public void setKmerSize(int kmer){
+ this.kmer=kmer;
+ Path.setK(kmer);
+ }
+ /**
+ * This method adds am edge between Vertices one and two
+ * and their corresponding direction of weight kmer,
+ * if no Edge between these Vertices already exists in the Graph.
+ *
+ * @param one The first vertex to add
+ * @param two The second vertex to add
+ * @param d1 The direction on the side of vertex one
+ * @param d2 The direction on the side of vertex two
+ * @return true iff no Edge relating one and two exists in the Graph
+ */
+ public boolean addEdge(Vertex one, Vertex two, boolean d1, boolean d2){
+ return addEdge(one, two, d1, d2, -kmer);
+ }
+
+
+ /**
+ * Accepts two vertices, their directions and a weight, and adds the edge
+ * ({one, two}, {d1, d2}, weight) iff no Edge relating one and two
+ * exists in the Graph.
+ *
+ * @param one The first Vertex of the Edge
+ * @param two The second Vertex of the Edge
+ * @param d1 The direction on the side of vertex one
+ * @param d2 The direction on the side of vertex two
+ * @param weight The weight of the Edge
+ * @return true iff no Edge already exists in the Graph
+ */
+ public boolean addEdge(Vertex one, Vertex two, boolean d1, boolean d2, int weight){
+
+ //ensures the Edge is not in the Graph
+ Edge e = new Edge(one, two, d1, d2, weight);
+ if(edges.containsKey(e.hashCode()) || edges.containsKey(e.getReversedRead().hashCode())){
+ return false;
+ }
+
+ //and that the Edge isn't already incident to one of the vertices
+ else if(one.containsNeighbor(e) || two.containsNeighbor(e.getReversedRead())){
+ return false;
+ }
+
+ edges.put(e.hashCode(), e);
+ one.addNeighbor(e);
+ two.addNeighbor(e.getReversedRead());
+ return true;
+ }
+
+ /**
+ *
+ * @param e The Edge to look up
+ * @return true iff this Graph contains the Edge e
+ */
+ public boolean containsEdge(Edge e){
+ if(e.getOne() == null || e.getTwo() == null){
+ return false;
+ }
+
+ return this.edges.containsKey(e.hashCode())
+ || this.edges.containsKey(e.getReversedRead().hashCode());
+ }
+
+
+ /**
+ * This method removes the specified Edge from the Graph,
+ * including as each vertex's incidence neighborhood.
+ *
+ * @param e The Edge to remove from the Graph
+ * @return Edge The Edge removed from the Graph
+ */
+ public Edge removeEdge(Edge e){
+ e.getOne().removeNeighbor(e);
+ e.getTwo().removeNeighbor(e.getReversedRead());
+ Edge rmEdge = this.edges.remove(e.hashCode());
+ if (rmEdge==null)
+ rmEdge = this.edges.remove(e.getReversedRead().hashCode());
+ return rmEdge;
+ }
+
+ /**
+ *
+ * @param vertex The Vertex to look up
+ * @return true iff this Graph contains vertex
+ */
+ public boolean containsVertex(Vertex vertex){
+ return this.vertices.get(vertex.getLabel()) != null;
+ }
+
+ /**
+ *
+ * @param label The specified Vertex label
+ * @return Vertex The Vertex with the specified label
+ */
+ public Vertex getVertex(String label){
+ return vertices.get(label);
+ }
+
+ /**
+ * This method adds a Vertex to the graph. If a Vertex with the same label
+ * as the parameter exists in the Graph, the existing Vertex is overwritten
+ * only if overwriteExisting is true. If the existing Vertex is overwritten,
+ * the Edges incident to it are all removed from the Graph.
+ *
+ * @param vertex
+ * @param overwriteExisting
+ * @return true iff vertex was added to the Graph
+ */
+ public boolean addVertex(Vertex vertex, boolean overwriteExisting){
+ Vertex current = this.vertices.get(vertex.getLabel());
+ if(current != null){
+ if(!overwriteExisting){
+ return false;
+ }
+
+ while(current.getNeighborCount() > 0){
+ this.removeEdge(current.getNeighbor(0));
+ }
+ }
+
+
+ vertices.put(vertex.getLabel(), vertex);
+ return true;
+ }
+
+ /**
+ *
+ * @param label The label of the Vertex to remove
+ * @return Vertex The removed Vertex object
+ */
+ public Vertex removeVertex(String label){
+ Vertex v = vertices.remove(label);
+
+ while(v.getNeighborCount() > 0){
+ this.removeEdge(v.getNeighbor((0)));
+ }
+
+ return v;
+ }
+
+ /**
+ *
+ * @return Set All Graph's Vertex objects
+ */
+ public Set getVertices(){
+ return new HashSet(this.vertices.values());
+ }
+
+ /**
+ *
+ * @return Set The Edges of this graph
+ */
+ public Set getEdges(){
+ return new HashSet(this.edges.values());
+ }
+
+ /**
+ * Find a path between two nodes within a given distance
+ */
+ public ArrayList DFS(Node source, Node dest, int distance){
+ System.out.println("Looking for path between " + source.toString() + " to " + dest.toString() + " with distance " + distance);
+ Path tmp = new Path();
+ ArrayList retval = new ArrayList();
+ tmp.addNode(source);
+
+ //traverse(tmp, dest, retval, distance+source.getSeq().length()+dest.getSeq().length());
+ traverse(tmp, dest, retval, distance);
+
+ return retval;
+ }
+
+ public void traverse(Path path, Node dest, ArrayList curResult, int distance){
+ Node source=path.getEnd();
+ assert source!=null:"Path null fault!";
+
+ ArrayList nList = source.getVertex().getNeighbors();
+ for(Edge e:nList){
+ if(e.getDOne()==source.getDirection()){
+ path.addNode(e.getTwo(), e.getDTwo());
+
+ if(e.getTwo()==dest.getVertex() && e.getDTwo()==dest.getDirection() && Math.abs(distance+getKmerSize()) < TOLERATE){
+
+ Path curPath=curResult.isEmpty()?new Path():curResult.get(0), //the best path saved among all possible paths from the list curResult
+ tmpPath=new Path();
+ tmpPath.setComp(path.getNodes());
+ tmpPath.setDeviation(Math.abs(distance+getKmerSize()));
+ if( Math.abs(distance+getKmerSize()) < curPath.getDeviation() )
+ curResult.add(0, tmpPath);
+ else
+ curResult.add(tmpPath);
+
+ System.out.println("Hit added: "+path+"(candidate deviation: "+Math.abs(distance+getKmerSize())+")");
+ }else{
+ int newDistance=distance-e.getTwo().getSequence().length()+getKmerSize();
+ if (newDistance+getKmerSize()<-TOLERATE){
+ System.out.println("Stop following path with distance "+newDistance+" already! : "+path);
+ }else
+ traverse(path, dest, curResult, newDistance);
+ }
+ path.removeLast();
+ }
+ }
+ }
+ /**
+ *
+ * @param p Path to be grouped as a virtually vertex
+ */
+ public void reduce(Path p){
+ Vertex comp=new Vertex(p);
+ //add the new composite Vertex to the graph
+ addVertex(comp, true);
+ //remove unique nodes on p
+ ArrayList tobeRemoved=new ArrayList();
+ for(Node n:p.getNodes()){
+ if(n.getVertex().isUnique())
+ tobeRemoved.add(n.getVertex().getLabel());
+ }
+
+ Node start = p.getStart(),
+ end = p.getEnd();
+ //set neighbors of the grouped Vertex
+ for(Edge e:start.getVertex().getNeighbors()){
+ if(e.getDOne()!=start.getDirection()){
+ //comp.addNeighbor(e);
+ addEdge(comp,e.getTwo(),e.getDOne(),e.getDTwo());
+ }
+ }
+ for(Edge e:end.getVertex().getNeighbors()){
+ if(e.getDOne()!=end.getDirection())
+ //comp.addNeighbor(e);
+ addEdge(comp,e.getTwo(),e.getDOne(),e.getDTwo());
+ }
+ for(String lab:tobeRemoved)
+ removeVertex(lab);
+ //TODO: remove bubbles...
+ }
+ /**
+ *
+ * @param v Vertex to be reverted (1-level reverting)
+ */
+ public void revert(Vertex v){
+ //TODO: revert to initial status by extracting a complex vertex into its initial components
+ Path p=v.getSubComps();
+ if(!containsVertex(v)||p==null) return;
+ //add back all vertices first
+ for(Node n:p.getNodes())
+ addVertex(n.getVertex(), false);
+ //then add back all neighbor edges of this composite vertex
+ for(Edge e:v.getNeighbors())
+ addEdge(v,e.getTwo(),e.getDOne(),e.getDTwo());
+ //finally add back all edges from the path
+ Node prev=p.getStart();
+ for(Node cur:p.getNodes()){
+ if(cur==p.getStart())
+ continue;
+ else{
+ addEdge(prev.getVertex(),cur.getVertex(),prev.getDirection(),cur.getDirection());
+ prev=cur;
+ }
+ }
+
+ //remove the original composite vertex
+ removeVertex(v.getLabel());
+ }
+ public void printStats(){
+ System.out.println(vertices.size() + " vertices:");
+ for(String label:vertices.keySet())
+ System.out.print(label+", ");
+ System.out.println();
+ System.out.println(edges.size() + " edges:");
+ for(Edge e:edges.values()){
+ System.out.print(e.toString()+", ");
+ }
+ System.out.println();
+
+ }
+}
+
+
diff --git a/src/dev/java/japsadev/bio/hts/metagenome/Node.java b/src/dev/java/japsadev/bio/hts/metagenome/Node.java
new file mode 100644
index 0000000..d6c1036
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/metagenome/Node.java
@@ -0,0 +1,34 @@
+package japsadev.bio.hts.metagenome;
+
+import japsa.seq.Alphabet;
+import japsa.seq.Sequence;
+
+public class Node{
+ Vertex v;
+ boolean dir;
+ Node(Vertex v, boolean dir){
+ this.v=v;
+ this.dir=dir;
+ }
+ public Vertex getVertex(){
+ return v;
+ }
+ public void setVertex(Vertex v){
+ this.v = v;
+ }
+ public boolean getDirection(){
+ return dir;
+ }
+ public void setDirection(boolean dir){
+ this.dir=dir;
+ }
+ public Node getRC(){
+ return new Node(v,!dir);
+ }
+ public Sequence getSeq(){
+ return dir?v.getSequence():Alphabet.DNA.complement(v.getSequence());
+ }
+ public String toString(){
+ return v.getLabel()+ (dir?"+":"-");
+ }
+}
diff --git a/src/dev/java/japsadev/bio/hts/metagenome/Path.java b/src/dev/java/japsadev/bio/hts/metagenome/Path.java
new file mode 100644
index 0000000..5474435
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/metagenome/Path.java
@@ -0,0 +1,167 @@
+package japsadev.bio.hts.metagenome;
+
+import java.util.ArrayList;
+import japsa.seq.Alphabet;
+import japsa.seq.Sequence;
+import japsa.seq.SequenceBuilder;
+
+public class Path implements Comparable{
+ public static int kmer=127;
+ ArrayList nodes;
+ //Graph graph;
+ int length, deviation; //how this path differ to long read data (todo: by multiple-alignment??)
+ public Path(){
+ this.nodes=new ArrayList();
+ //graph=new Graph();
+ length=0;
+ deviation=Integer.MAX_VALUE;
+ }
+
+// public Path(Graph graph){
+// this();
+// associate(graph);
+// }
+ public static void setK(int kmer){
+ Path.kmer=kmer;
+ }
+ public Path(Path p){
+ this();
+ //this(p.graph);
+ for(Node node:p.nodes)
+ this.nodes.add(node);
+ this.length=p.length;
+ }
+ /*
+ * @param String: a path as in contigs.paths of SPAdes output
+ * For example: 1+,2-,3+
+ */
+ public Path(Graph graph, String paths){
+ this();
+ //this(graph);
+ paths=paths.replace(";", ""); //optimized it!
+ String[] comps = paths.split(",");
+ for(int i=0; i getNodes(){
+ return nodes;
+ }
+ public void setComp(ArrayList nodes){
+ this.length=0;
+ for(Node node:nodes)
+ this.addNode(node);
+ }
+
+ public Path rc(){
+ //Path retval=new Path(graph);
+ Path retval=new Path();
+ for(Node node:nodes){
+ retval.nodes.add(0, node.getRC());
+ }
+ return retval;
+ }
+
+ public String toString(){
+ return "P"+getID();
+ }
+ public String getID(){
+ String retval="";
+ for(Node node:nodes){
+ retval+=node.toString();
+ }
+ return retval.trim();
+ }
+ public Node removeLast(){
+ Node retval=nodes.remove(nodes.size()-1);
+ length-=retval.getSeq().length()-kmer;
+ return retval;
+ }
+
+ public Sequence spelling(){
+ SequenceBuilder seq = new SequenceBuilder(Alphabet.DNA16(), 1024*1024, this.toString());
+
+ for(int i=0;i neighborhood;
+ private String fullName, label;
+ private double coverage;
+ private Sequence seq=null;
+ //a sub-graph (path) is equivalent to a Vertex recursively
+ private Path components=null;
+ public Vertex(){
+ fullName="";
+ label="";
+ coverage=0;
+ this.neighborhood = new ArrayList();
+ this.seq=new Sequence(Alphabet.DNA5(), 0);
+ }
+ /**
+ *
+ * @param name The unique label associated with this Vertex
+ */
+ public Vertex(String name){
+ this.fullName=name;
+ this.label=extractID(name);
+ this.coverage=extractCoverage(name);
+ this.neighborhood = new ArrayList();
+ this.seq=new Sequence(Alphabet.DNA5(), 0);
+ }
+
+ public Vertex(String name, Sequence seq){
+ this(name);
+ this.seq = seq;
+ }
+
+ public Vertex(Path p){
+ this();
+ fullName=label=p.getID();
+ components=p;
+ coverage=p.averageCov();
+ }
+ /**
+ *
+ * @return If this vertex is unique (only based on its degree)
+ */
+ public boolean isUnique(){
+ return (getNeighborCount() <= 2);
+ }
+ public Path getSubComps(){
+ return components;
+ }
+ /**
+ * Extract ID from name: EDGE_xx_length_yy_cov_zz;
+ * @param name The name of Edge in assembly graph that correspond to this Vertex
+ */
+ private String extractID(String name){
+ return name.split("_")[1];
+ }
+ /**
+ * Extract coverage from name: EDGE_xx_length_yy_cov_zz;
+ * @param name The name of Edge in assembly graph that correspond to this Vertex
+ * @return coverage value
+ */
+ private double extractCoverage(String name){
+ double res=1.0;
+ try{
+ res=Double.parseDouble(name.split("_")[5]);
+ }catch(Exception e){
+ e.printStackTrace();
+ }
+ return res;
+ }
+ public double getCoverage(){
+ return coverage;
+ }
+ /**
+ * This method adds an Edge to the incidence neighborhood of this graph iff
+ * the edge is not already present.
+ *
+ * @param edge The edge to add
+ */
+ public void addNeighbor(Edge edge){
+ if(this.neighborhood.contains(edge)){
+ return;
+ }
+ this.neighborhood.add(edge);
+ }
+
+
+ /**
+ *
+ * @param other The edge for which to search
+ * @return true iff other is contained in this.neighborhood
+ */
+ public boolean containsNeighbor(Edge other){
+ return this.neighborhood.contains(other);
+ }
+
+ /**
+ *
+ * @param index The index of the Edge to retrieve
+ * @return Edge The Edge at the specified index in this.neighborhood
+ */
+ public Edge getNeighbor(int index){
+ return this.neighborhood.get(index);
+ }
+
+
+ /**
+ *
+ * @param index The index of the edge to remove from this.neighborhood
+ * @return Edge The removed Edge
+ */
+ Edge removeNeighbor(int index){
+ return this.neighborhood.remove(index);
+ }
+
+ /**
+ *
+ * @param e The Edge to remove from this.neighborhood
+ */
+ public void removeNeighbor(Edge e){
+ this.neighborhood.remove(e);
+ }
+
+
+ /**
+ *
+ * @return int The number of neighbors of this Vertex
+ */
+ public int getNeighborCount(){
+ return this.neighborhood.size();
+ }
+ /**
+ *
+ * @return String The label of this Vertex
+ */
+ public String getLabel(){
+ return this.label;
+ }
+ /**
+ *
+ * @return String The full name of this Vertex
+ */
+ public String getName(){
+ return this.fullName;
+ }
+ /**
+ *
+ * @param Sequence A sequence
+ */
+ public void setSequence(Sequence seq){
+ this.seq = seq;
+ }
+ /**
+ *
+ * @return Sequence The sequence of this Vertex
+ */
+ public Sequence getSequence(){
+ return this.seq;
+ }
+ /**
+ *
+ * @return String A String representation of this Vertex
+ */
+ public String toString(){
+ return "Vertex " + label;
+ }
+
+ /**
+ *
+ * @return The hash code of this Vertex's label
+ */
+ public int hashCode(){
+ return this.label.hashCode();
+ }
+
+ /**
+ *
+ * @param other The object to compare
+ * @return true iff other instanceof Vertex and the two Vertex objects have the same label
+ */
+ public boolean equals(Object other){
+ if(!(other instanceof Vertex)){
+ return false;
+ }
+
+ Vertex v = (Vertex)other;
+ return this.label.equals(v.label);
+ }
+
+ /**
+ *
+ * @return ArrayList A copy of this.neighborhood. Modifying the returned
+ * ArrayList will not affect the neighborhood of this Vertex
+ */
+ public ArrayList getNeighbors(){
+ return new ArrayList(this.neighborhood);
+ }
+
+}
+
+
diff --git a/src/dev/java/japsadev/bio/hts/newscarf/Alignment.java b/src/dev/java/japsadev/bio/hts/newscarf/Alignment.java
new file mode 100644
index 0000000..41d04fe
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/newscarf/Alignment.java
@@ -0,0 +1,157 @@
+package japsadev.bio.hts.newscarf;
+
+import java.util.ArrayList;
+
+import htsjdk.samtools.Cigar;
+import htsjdk.samtools.CigarElement;
+import htsjdk.samtools.SAMRecord;
+import japsa.seq.Sequence;
+
+public class Alignment implements Comparable {
+ public final static int OVERHANG_THRES=1000;
+ public final static int GOOD_QUAL=60;
+
+ public static int MIN_QUAL=30; //TODO: reduce this by doing self-correction
+
+ int alignLength, quality;
+
+ public String readID;
+ BidirectedNode node;
+
+ public int refStart, refEnd; //1-based position on ref of the start and end of the alignment
+
+ //Position on read of the start and end of the alignment (using the direction of read)
+ //readStart map to refStart, readEnd map to refEnd.
+ //readStart < readEnd if strand = true, else readStart > readEnd
+ public int readStart = 0, readEnd = 0;
+
+ //read length
+ public int readLength = 0;
+
+ public boolean strand = true;//positive
+ public boolean prime = true;//primary alignment
+ public boolean goodMargin = false;
+ public boolean useful = false;
+ //SAMRecord mySam;
+
+ ArrayList alignmentCigars = new ArrayList();
+
+
+ //public int readLeft, readRight, readAlign, refLeft, refRight, refAlign;
+ //left and right are in the direction of the reference sequence
+
+ public Alignment(SAMRecord sam, BidirectedNode node) {
+// readID = Integer.parseInt(sam.getReadName().split("_")[0]);
+ readID = sam.getReadName();
+ quality = sam.getMappingQuality();
+ prime=!sam.getNotPrimaryAlignmentFlag();
+ this.node = node;
+
+ refStart = sam.getAlignmentStart();
+ refEnd = sam.getAlignmentEnd();
+
+ Cigar cigar = sam.getCigar();
+ boolean enterAlignment = false;
+ //////////////////////////////////////////////////////////////////////////////////
+
+ for (final CigarElement e : cigar.getCigarElements()) {
+ alignmentCigars.add(e);
+ final int length = e.getLength();
+ switch (e.getOperator()) {
+ case H :
+ case S :
+ case P : //pad is a kind of clipped
+ if (enterAlignment)
+ readEnd = readLength;
+ readLength += length;
+ break; // soft clip read bases
+ case I :
+ case M :
+ case EQ :
+ case X :
+ if (!enterAlignment){
+ readStart = readLength + 1;
+ enterAlignment = true;
+ }
+ readLength += length;
+ break;
+ case D :
+ case N :
+ if (!enterAlignment){
+ readStart = readLength + 1;
+ enterAlignment = true;
+ }
+ break;
+ default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + e.getOperator());
+ }//case
+ }//for
+ if (readEnd == 0)
+ readEnd = readLength;
+ //these temporary variable to determine usefulness
+ int readLeft = readStart -1;
+ int readRight = readLength - readEnd;
+
+ int refLeft = refStart - 1;
+ int refRight = ((Sequence) node.getAttribute("seq")).length() - refEnd;
+
+ alignLength = refEnd + 1 - refStart;
+ if (sam.getReadNegativeStrandFlag()){
+ strand = false;
+ //need to convert the alignment position on read the correct direction
+ readStart = 1 + readLength - readStart;
+ readEnd = 1 + readLength - readEnd;
+ }
+
+ if (
+ (readLeft < OVERHANG_THRES || refLeft < OVERHANG_THRES) &&
+ (readRight < OVERHANG_THRES || refRight < OVERHANG_THRES)
+ )
+ goodMargin=true;
+
+ if ( goodMargin
+ //prime && //TODO: should be separated as another attribute for further consideration??
+ && alignLength > BidirectedGraph.getKmerSize() //FIXME:
+ && quality >= MIN_QUAL
+ )
+ useful = true;
+
+ }
+
+
+ public int readAlignmentStart(){
+ return Math.min(readStart,readEnd);
+
+ }
+
+ public int readAlignmentEnd(){
+ return Math.max(readStart,readEnd);
+ }
+
+ public String toString() {
+ return node.getAttribute("name")
+ + ": " + refStart
+ + " -> " + refEnd
+ + " / " + ((Sequence) node.getAttribute("seq")).length()
+ + " map to "
+ + readID
+ + ": " + readStart
+ + " -> " + readEnd
+ + " / " + readLength
+ + ", strand: " + (strand?"+":"-")
+ + ", prime: " + (prime?"yes":"no")
+ + ", margin: " + (goodMargin?"good":"bad");
+ }
+ public static boolean isOverlap(Alignment alg1, Alignment alg2){
+ boolean retval=false;
+
+
+ return retval;
+ }
+ /* (non-Javadoc)
+ * @see java.lang.Comparable#compareTo(java.lang.Object)
+ */
+ @Override
+ public int compareTo(Alignment o) {
+ return readAlignmentStart() - o.readAlignmentStart();
+ }
+}
diff --git a/src/dev/java/japsadev/bio/hts/newscarf/BidirectedEdge.java b/src/dev/java/japsadev/bio/hts/newscarf/BidirectedEdge.java
new file mode 100644
index 0000000..0e262e5
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/newscarf/BidirectedEdge.java
@@ -0,0 +1,176 @@
+package japsadev.bio.hts.newscarf;
+
+import org.graphstream.graph.Edge;
+import org.graphstream.graph.Node;
+import org.graphstream.graph.implementations.AbstractEdge;
+import org.graphstream.graph.implementations.AbstractNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import japsa.seq.Sequence;
+
+public class BidirectedEdge extends AbstractEdge{
+ protected boolean dir0, dir1;//true: outward, false: inward
+ private int length=-BidirectedGraph.getKmerSize();//length of the edge (distance between tips of seqs represented by 2 nodes)
+ private BidirectedPath path; //path that represents this edge. Should be a list if metagenome/multiploidy
+
+
+ private static final Logger LOG = LoggerFactory.getLogger(BidirectedEdge.class);
+
+ protected BidirectedEdge(String id, AbstractNode src, AbstractNode dst, boolean dir0, boolean dir1) {
+ // id fuck off!!! we'll make one for ourselves
+ this(src,dst,dir0,dir1);
+ }
+ protected BidirectedEdge(AbstractNode src, AbstractNode dst, boolean dir0, boolean dir1) {
+ //this(createID(src,dst,dir0,dir1), src, dst);
+
+ super(createID(src,dst,dir0,dir1),src,dst,false);
+ this.dir0=dir0;
+ this.dir1=dir1;
+
+ path = new BidirectedPath();
+ path.setRoot(src);
+ path.add(this);
+ }
+
+ /* param id must have the form %s[o/i]%s[o/i], e.g. [1+2-]o[3]i
+ * the constructor will translate the id to the direction property
+ * of the bidirected edge
+ */
+// protected BidirectedEdge(String id, AbstractNode source, AbstractNode dest){
+// super(id, source, dest, false);
+// String pattern = "^\\[([0-9\\+\\-]*)\\]([oi])\\[([0-9\\+\\-]*)\\]([oi])$";
+// // Create a Pattern object
+// Pattern r = Pattern.compile(pattern);
+// // Now create matcher object.
+// Matcher m = r.matcher(id);
+// String leftID, rightID,
+// leftDir, rightDir;
+// if(m.find()){
+// leftID=m.group(1);
+// leftDir=m.group(2);
+// rightID=m.group(3);
+// rightDir=m.group(4);
+// if(source.getId().equals(leftID)){
+// dir0=(leftDir.equals("o")?true:false);
+// dir1=(rightDir.equals("o")?true:false);
+// }else if(source.getId().equals(rightID)){
+// dir0=(rightDir.equals("o")?true:false);
+// dir1=(leftDir.equals("o")?true:false);
+// }else{
+// System.err.println("ID does not match");
+// System.exit(1);
+// }
+// } else{
+// System.err.println("Illegal ID for a bidirected edge (id must have the form node_id[o/i]node_id[o/i])");
+// System.exit(1);
+// }
+//
+// }
+
+ /*
+ * To adapt Graph class from GraphStream library (called by newInstance())
+ */
+ protected BidirectedEdge(String id, AbstractNode source, AbstractNode dest){
+ super(id, source, dest, false);
+
+ assert (source.getGraph() == dest.getGraph()):"Nodes come from different graph " + source.getGraph().getId() + " and " + dest.getGraph().getId();
+ BidirectedGraph g = (BidirectedGraph) source.getGraph();
+ path = new BidirectedPath(g,id);
+ BidirectedNode n0 = (BidirectedNode) path.getRoot(),
+ n1 = (BidirectedNode) path.peekNode();
+
+ BidirectedEdge firstEdge = (BidirectedEdge) path.getEdgePath().get(0),
+ lastEdge = (BidirectedEdge) path.peekEdge();
+ if(n0.getId().equals(source.getId())){
+ dir0 = firstEdge.getDir(n0);
+ dir1 = lastEdge.getDir(n1);
+ }else if(n0.getId().equals(dest.getId())){
+ dir1 = firstEdge.getDir(n0);
+ dir0 = lastEdge.getDir(n1);
+ path = path.getReversedComplemented();
+ }else{
+ LOG.error("Path {} conflicts with src={} dst={}!", id, source.getId(), dest.getId());
+ System.exit(1);
+ }
+ }
+
+// public static String createID(AbstractNode source, AbstractNode dst, boolean dir0, boolean dir1){
+// String srcDes = "["+source.getId()+"]"+(dir0 ? "o":"i"),
+// dstDes = "["+dst.getId()+"]"+(dir1 ? "o":"i");
+// if(srcDes.compareTo(dstDes)<0)
+// return String.format("%s%s", srcDes, dstDes);
+// else
+// return String.format("%s%s", dstDes, srcDes);
+// }
+
+
+ //should have smt like this
+ public static BidirectedEdge makeEdge (AbstractNode src, AbstractNode dst, BidirectedPath path) {
+ return new BidirectedEdge(path.getId(), src, dst);
+ }
+
+// public BidirectedEdge getReversedComplemented(){
+// BidirectedEdge retval = makeEdge(getNode1(),getNode0(),path.getReversedComplemented());
+//
+// return retval;
+// }
+
+ public void setPath(BidirectedPath path){
+ this.path = path;
+ //here set the new length due to that path
+ Node curNode = path.getRoot();
+ for(Edge e:path.getEdgePath()){
+ curNode = e.getOpposite(curNode);
+ if(curNode == path.peekNode())
+ break;
+ else{
+ length+=((Sequence)curNode.getAttribute("seq")).length()-BidirectedGraph.getKmerSize();
+ }
+ }
+ }
+ public BidirectedPath getPath(){
+ return path;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s:%s-%s-%s-%s", getId(), source, (dir0?">":"<"), (dir1?"<":">"), target);
+ }
+
+ public void setDir0(boolean dir){
+ this.dir0=dir;
+ }
+ public void setDir1(boolean dir){
+ this.dir1=dir;
+ }
+ public boolean getDir0(){
+ return dir0;
+ }
+ public boolean getDir1(){
+ return dir1;
+ }
+ //if kmer!=127 we need to set initial lengths of edges again (easy+tedious way)
+ public void changeKmerSize(int kmer){
+ length=-kmer;
+ }
+ public int getLength(){
+ return length;
+ }
+
+ //TODO: include the case of tandem repeats
+ public boolean getDir(AbstractNode node){
+ assert node==getSourceNode()||node==getTargetNode():"Node " + node.getId() + " does not belong to this edge src=" + getSourceNode().getId() + " dst=" + getTargetNode().getId();
+ return node==getSourceNode()?getDir0():getDir1();
+ }
+
+
+ public static String createID(AbstractNode source, AbstractNode dst, boolean dir0, boolean dir1){
+ String srcDes = source.getId(),
+ dstDes = dst.getId();
+ if(srcDes.compareTo(dstDes)<0)
+ return String.format("%s%s,%s%s", srcDes, (dir0?"+":"-"), dstDes, (dir1?"-":"+"));
+ else
+ return String.format("%s%s,%s%s", dstDes, (dir1?"+":"-"), srcDes, (dir0?"-":"+"));
+ }
+}
diff --git a/src/dev/java/japsadev/bio/hts/newscarf/BidirectedGraph.java b/src/dev/java/japsadev/bio/hts/newscarf/BidirectedGraph.java
new file mode 100644
index 0000000..a3556d3
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/newscarf/BidirectedGraph.java
@@ -0,0 +1,586 @@
+package japsadev.bio.hts.newscarf;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+
+import org.graphstream.graph.*;
+import org.graphstream.graph.implementations.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import japsa.seq.Alphabet;
+import japsa.seq.FastaReader;
+import japsa.seq.Sequence;
+import japsa.seq.SequenceReader;
+
+
+public class BidirectedGraph extends AdjacencyListGraph{
+ static int kmer=127;
+ static final int TOLERATE=500;
+ static final int D_LIMIT=10000;
+ static final int S_LIMIT=50;
+
+ private static final Logger LOG = LoggerFactory.getLogger(BidirectedGraph.class);
+
+ // *** Constructors ***
+ /**
+ * Creates an empty graph.
+ *
+ * @param id
+ * Unique identifier of the graph.
+ * @param strictChecking
+ * If true any non-fatal error throws an exception.
+ * @param autoCreate
+ * If true (and strict checking is false), nodes are
+ * automatically created when referenced when creating a edge,
+ * even if not yet inserted in the graph.
+ * @param initialNodeCapacity
+ * Initial capacity of the node storage data structures. Use this
+ * if you know the approximate maximum number of nodes of the
+ * graph. The graph can grow beyond this limit, but storage
+ * reallocation is expensive operation.
+ * @param initialEdgeCapacity
+ * Initial capacity of the edge storage data structures. Use this
+ * if you know the approximate maximum number of edges of the
+ * graph. The graph can grow beyond this limit, but storage
+ * reallocation is expensive operation.
+ */
+ public BidirectedGraph(String id, boolean strictChecking, boolean autoCreate,
+ int initialNodeCapacity, int initialEdgeCapacity) {
+ super(id, strictChecking, autoCreate);
+ // All we need to do is to change the node & edge factory
+ setNodeFactory(new NodeFactory() {
+ public BidirectedNode newInstance(String id, Graph graph) {
+ return new BidirectedNode((AbstractGraph) graph, id);
+ }
+ });
+
+ setEdgeFactory(new EdgeFactory() {
+ public BidirectedEdge newInstance(String id, Node src, Node dst, boolean directed) { //stupid??
+ return new BidirectedEdge(id, (AbstractNode)src, (AbstractNode)dst);
+ }
+ });
+
+ }
+
+ /**
+ * Creates an empty graph with default edge and node capacity.
+ *
+ * @param id
+ * Unique identifier of the graph.
+ * @param strictChecking
+ * If true any non-fatal error throws an exception.
+ * @param autoCreate
+ * If true (and strict checking is false), nodes are
+ * automatically created when referenced when creating a edge,
+ * even if not yet inserted in the graph.
+ */
+ public BidirectedGraph(String id, boolean strictChecking, boolean autoCreate) {
+ this(id, strictChecking, autoCreate, DEFAULT_NODE_CAPACITY,
+ DEFAULT_EDGE_CAPACITY);
+ }
+
+ /**
+ * Creates an empty graph with strict checking and without auto-creation.
+ *
+ * @param id
+ * Unique identifier of the graph.
+ */
+ public BidirectedGraph(String id) {
+ this(id, true, false, 10000, 100000);
+ }
+
+ public BidirectedGraph(){
+ this("Assembly graph",true,false, 10000, 100000);
+ setKmerSize(127);//default kmer size used by SPAdes to assembly MiSeq data
+ }
+
+ //just to make AbstractGraph.removeEdge(AbstractEdge, boolean, boolean, boolean) visible
+ protected void removeEdgeDup(AbstractEdge edge, boolean graphCallback,
+ boolean sourceCallback, boolean targetCallback) {
+ this.removeEdge(edge, graphCallback, sourceCallback, targetCallback);
+
+ }
+ protected BidirectedEdge addEdge(AbstractNode src, AbstractNode dst, boolean dir0, boolean dir1){
+ BidirectedEdge tmp = addEdge(BidirectedEdge.createID(src, dst, dir0, dir1), src, dst);
+// String s1=tmp.toString();
+// //tmp.setDir0(dir0);
+// //tmp.setDir1(dir1);
+// if(!s1.equals(tmp.toString()))
+// System.out.println(s1 + " ---> " + tmp);
+ return tmp;
+ }
+
+ public String printEdgesOfNode(BidirectedNode node){
+ Iterator ins = getNode(node.getId()).getEnteringEdgeIterator(),
+ outs = getNode(node.getId()).getLeavingEdgeIterator();
+ String retval=node.getId() + ": IN={";
+ while(ins.hasNext())
+ retval += ins.next().getId() + " ";
+ retval+="}; OUT={";
+ while(outs.hasNext())
+ retval += outs.next().getId() + " ";
+ retval+="}";
+ return retval;
+ }
+ /**********************************************************************************
+ * ****************************Algorithms go from here*****************************
+ */
+ //TODO: read from ABySS assembly graph (graph of final contigs, not like SPAdes)
+ private static double aveCov; //TODO: replaced with more accurate method
+
+ public void loadFromFile(String graphFile) throws IOException{
+ setAutoCreate(true);
+ setStrict(false);
+ //1. next iterate over again to read the connections
+ SequenceReader reader = new FastaReader(graphFile);
+ Sequence seq;
+ int shortestLen = 10000;
+ int totReadLen=0, totGenomeLen=0;
+ while ((seq = reader.nextSequence(Alphabet.DNA())) != null){
+ if(seq.length() 1){
+ String[] nbList = adjList[1].split(",");
+ for(int i=0; i < nbList.length; i++){
+ String neighbor = nbList[i];
+ // note that the direction is read reversely in the dest node
+ boolean dir1=neighbor.contains("'")?true:false;
+ neighbor=neighbor.replaceAll("[^a-zA-Z0-9_.]", "").trim();
+
+ String neighborID = neighbor.split("_")[1];
+ AbstractNode nbr = addNode(neighborID);
+ nbr.setAttribute("cov", Double.parseDouble(neighbor.split("_")[5]));
+
+ addEdge(node, nbr, dir0, dir1);
+ //e.addAttribute("ui.label", e.getId());
+ }
+ }
+
+ }
+
+ //rough estimation of kmer used
+ if((shortestLen-1) != getKmerSize()){
+ setKmerSize(shortestLen-1);
+ for(Edge e:getEdgeSet()){
+ ((BidirectedEdge)e).changeKmerSize(kmer);
+ }
+ }
+
+ aveCov = totReadLen/totGenomeLen;
+ reader.close();
+
+ }
+
+
+ public static int getKmerSize(){
+ return BidirectedGraph.kmer;
+ }
+ public static void setKmerSize(int kmer){
+ BidirectedGraph.kmer=kmer;
+ }
+
+
+ /*
+ * Read paths from contigs.path and reduce the graph
+ */
+// public void readPathsFromSpades(String paths) throws IOException{
+//
+// BufferedReader pathReader = new BufferedReader(new FileReader(paths));
+//
+// String s;
+// //Read contigs from contigs.paths and refer themselves to contigs.fasta
+// boolean flag=false;
+// while((s=pathReader.readLine()) != null){
+// if(s.contains("NODE")){
+// flag=s.contains("'")?false:true;
+// continue;
+// }else if(flag){
+// BidirectedPath path=new BidirectedPath(this, s);
+//// System.out.println("Using path to reduce: " + path.getId());
+//// System.out.println("Before reduce => Node: " + getNodeCount() + " Edge: " + getEdgeCount());
+//
+//// AbstractNode comp=
+//
+// this.reduce(path);
+//
+//
+//// if(comp!=null){
+//// System.out.println("Reverting node: " + comp.getId());
+//// revert(comp);
+//// System.out.println("After revert => Node: " + getNodeCount() + " Edge: " + getEdgeCount());
+////
+//// }
+// }
+//
+//
+// }
+// pathReader.close();
+// }
+
+// /**
+// *
+// * @param p Path to be grouped as a virtually vertex
+// */
+// public AbstractNode reduce(BidirectedPath p){
+// //do nothing if the path has only one node
+// if(p==null || p.getEdgeCount()<1)
+// return null;
+//
+// //now only work with path containing more than 2 unique nodes
+// int uniqueCount=0;
+// for(Node n:p.getEachNode()){
+// if(isUnique(n))
+// uniqueCount++;
+// }
+// if(uniqueCount < 2)
+// {
+// System.out.println("ignore path with less than 1 unique contig!");
+// return null;
+// }
+// //add the new composite Node to the graph
+// //compare id from sense & anti-sense to get the unique one
+// AbstractNode comp = addNode(p.getId().compareTo(p.getReversedComplemented().getId())>0?
+// p.getReversedComplemented().getId():p.getId());
+//
+// comp.addAttribute("path", p);
+// comp.addAttribute("seq", p.spelling());
+// comp.addAttribute("ui.label", comp.getId());
+// comp.setAttribute("ui.style", "text-offset: -10;");
+// comp.setAttribute("ui.class", "marked");
+// try { Thread.sleep(100); } catch (Exception e) {}
+//
+// //store unique nodes on p for removing
+// ArrayList tobeRemoved=new ArrayList();
+// for(Node n:p.getEachNode()){
+// if(isUnique(n))
+// tobeRemoved.add(n.getId());
+// }
+// BidirectedNode start = (BidirectedNode) p.getRoot(),
+// end = (BidirectedNode) p.peekNode();
+// boolean startDir = ((BidirectedEdge) p.getEdgePath().get(0)).getDir(start),
+// endDir = ((BidirectedEdge) p.peekEdge()).getDir(end);
+// //set neighbors of the composite Node
+// Iterator startEdges = startDir?start.getEnteringEdgeIterator():start.getLeavingEdgeIterator(),
+// endEdges = endDir?end.getEnteringEdgeIterator():end.getLeavingEdgeIterator();
+// while(startEdges.hasNext()){
+// BidirectedEdge e = (BidirectedEdge) startEdges.next();
+// BidirectedNode opNode = e.getOpposite(start);
+// boolean opDir = e.getDir(opNode);
+// //Edge tmp=
+// addEdge(BidirectedEdge.createID(comp, opNode, false, opDir), comp, opNode);//always into start node
+// //System.out.println("From " + start.getId() + ": " + tmp.getId() + " added!");
+// }
+//
+// while(endEdges.hasNext()){
+// BidirectedEdge e = (BidirectedEdge) endEdges.next();
+// BidirectedNode opNode = e.getOpposite(end);
+// boolean opDir = e.getDir(opNode);
+// //Edge tmp=
+// addEdge(BidirectedEdge.createID(comp, opNode, true, opDir), comp, opNode);//always out of end node
+//
+// //System.out.println("From " + end.getId() + ": " + tmp.getId() + " added!");
+//
+// }
+//
+// for(String nLabel:tobeRemoved){
+// //System.out.println("About to remove " + nLabel);
+// removeNode(nLabel);
+// }
+//
+// //TODO: remove bubbles...
+// return comp;
+// }
+//
+//
+// /**
+// *
+// * @param v Node to be reverted (1-level reverting)
+// */
+// public void revert(AbstractNode v){
+// System.out.println("Reverting...");
+// Path p=v.getAttribute("path");
+// if(p==null) return;
+//
+// BidirectedNode start = (BidirectedNode) p.getRoot(),
+// end = (BidirectedNode) p.peekNode();
+// boolean startDir = ((BidirectedEdge) p.getEdgePath().get(0)).getDir(start),
+// endDir = ((BidirectedEdge) p.peekEdge()).getDir(end);
+//
+// //add back all neighbor edges of this composite vertex
+// Iterator startEdges = v.getEnteringEdgeIterator(),
+// endEdges = v.getLeavingEdgeIterator();
+// //add back all nodes from the path
+// for(Node n:p.getNodeSet()){
+// if(getNode(n.getId())!=null)
+// continue;
+// Node tmp = addNode(n.getId());
+// tmp.addAttribute("seq", (japsa.seq.Sequence)n.getAttribute("seq"));
+// tmp.addAttribute("name", (String)n.getAttribute("name"));
+// tmp.addAttribute("path", (BidirectedPath)n.getAttribute("path"));
+//
+// //System.out.println("Adding back edge "+tmp.getId());
+// }
+// while(startEdges.hasNext()){
+// BidirectedEdge e = (BidirectedEdge) startEdges.next();
+// BidirectedNode opNode = e.getOpposite(v);
+// boolean opDir = e.getDir(opNode);
+// //Edge tmp =
+// addEdge(BidirectedEdge.createID(start, opNode, !startDir, opDir), start, opNode);
+// //System.out.println("Adding back edge "+tmp.getId());
+// }
+//
+// while(endEdges.hasNext()){
+// BidirectedEdge e = (BidirectedEdge) endEdges.next();
+// BidirectedNode opNode = e.getOpposite(v);
+// boolean opDir = e.getDir(opNode);
+// //Edge tmp =
+// addEdge(BidirectedEdge.createID(end, opNode, !endDir, opDir), end, opNode);
+// //System.out.println("Adding back edge "+tmp.getId());
+// }
+//
+// //add back all edges from the path
+// for(Edge e:p.getEdgeSet()){
+// //Edge tmp =
+// addEdge(e.getId(), e.getSourceNode().getId(), e.getTargetNode().getId());
+// //System.out.println("Adding back edge "+tmp.getId());
+// }
+// //finally remove the composite node
+// removeNode(v);
+// }
+
+ /*
+ * This function deduces a full path in this graph between 2 nodes aligned with a long read
+ */
+ protected ArrayList getClosestPath(Alignment from, Alignment to, int distance){
+ BidirectedNode srcNode = from.node,
+ dstNode = to.node;
+ System.out.println("Looking for path between " + srcNode.getId() + " to " + dstNode.getId() + " with distance " + distance);
+ BidirectedPath tmp = new BidirectedPath();
+ ArrayList possiblePaths = new ArrayList(),
+ retval = new ArrayList();
+ tmp.setRoot(srcNode);
+
+ //traverse(tmp, dest, retval, distance+source.getSeq().length()+dest.getSeq().length());
+ traverse(tmp, dstNode, possiblePaths, distance, from.strand, to.strand, 0);
+ //only get the best ones
+ if(possiblePaths.isEmpty()){
+ //if a path couldn't be found between 2 dead-ends but alignments quality are insane high
+ //FIXME: return a pseudo path having an nanopore edge
+ if(isUnique(srcNode) && isUnique(dstNode) && srcNode.getDegree() == 1 && dstNode.getDegree()==1 &&
+ Math.min(from.quality, to.quality) >= Alignment.GOOD_QUAL)
+ {
+ BidirectedEdge pseudoEdge = new BidirectedEdge(srcNode, dstNode, from.strand, to.strand);
+ //TODO: save the corresponding content of long reads to this edge
+ pseudoEdge.setAttribute("pseudo", distance);
+ tmp.add(pseudoEdge);
+ retval.add(tmp);
+ System.out.println("pseudo path from " + srcNode.getId() + " to " + dstNode.getId());
+// HybridAssembler.promptEnterKey();
+ return retval;
+ }else
+ return null;
+ }
+ double bestScore=possiblePaths.get(0).getDeviation();
+ for(int i=0;i curResult,
+ int distance, boolean srcDir, boolean dstDir, int stepCount)
+ {
+ //stop if it's going too far!
+ if(stepCount >= S_LIMIT)
+ return;
+
+ BidirectedNode currentNode=(BidirectedNode) path.peekNode();
+ BidirectedEdge currentEdge;
+ boolean curDir;//direction to the next node, = ! previous'
+
+ Iterator ite;
+ if(path.size() <= 1) //only root
+ curDir=srcDir;//re-check
+ else{
+ currentEdge = (BidirectedEdge) path.peekEdge();
+ curDir = !((BidirectedEdge) currentEdge).getDir(currentNode);
+ }
+ ite=curDir?currentNode.getLeavingEdgeIterator():currentNode.getEnteringEdgeIterator();
+
+ while(ite.hasNext()){
+ BidirectedEdge e = ite.next();
+ path.add(e);
+
+ int toTarget=Math.abs(distance-e.getLength());
+ if(e.getOpposite(currentNode)==dst && e.getDir(dst)!=dstDir && toTarget < TOLERATE){
+ BidirectedPath curPath=curResult.isEmpty()?new BidirectedPath():curResult.get(0), //the best path saved among all possible paths from the list curResult
+ tmpPath=new BidirectedPath(path);
+ tmpPath.setDeviation(toTarget);
+ if( toTarget < curPath.getDeviation() )
+ curResult.add(0, tmpPath);
+ else
+ curResult.add(tmpPath);
+
+ System.out.println("Hit added: "+path.getId()+"(candidate deviation: "+toTarget+")");
+ }else{
+ int newDistance = distance - ((Sequence) e.getOpposite(currentNode).getAttribute("seq")).length() - e.getLength();
+// System.out.println("adding edge: " + e.getId() + " length=" + e.getLength() +" -> distance=" + newDistance);
+ if (newDistance - e.getLength() < -TOLERATE){
+ System.out.println("Stop go to edge " + e.getPath() + " from path with distance "+newDistance+" already! : "+path.getId());
+ }else
+ traverse(path, dst, curResult, newDistance, srcDir, dstDir, stepCount++);
+ }
+ path.popNode();
+
+ }
+ }
+
+ /*
+ * Find a path based on list of Alignments
+ */
+ public BidirectedPath pathFinding(ArrayList alignments) {
+ if(alignments.size()<=1)
+ return null;
+
+ System.out.println("=================================================");
+ for(Alignment alg:alignments)
+ System.out.println("\t"+alg.toString());
+ System.out.println("=================================================");
+ //First bin the alignments into different overlap regions
+ //only considering useful alignments
+ HashMap allAlignments = new HashMap();
+ ArrayList joinPaths = new ArrayList();
+
+ for(Alignment alg:alignments){
+ if(alg.useful){
+ Range range = new Range(alg.readAlignmentStart(),alg.readAlignmentEnd());
+ allAlignments.put(range, alg);
+ }
+ }
+ //now get all the bin in order
+ List baseRanges=new ArrayList(allAlignments.keySet());
+ List> rangeGroups = MetaRange.getOverlappingGroups(baseRanges);
+
+ if(rangeGroups.size() < 2)
+ return null;
+
+ System.out.println("Binning ranges: ");
+ for(List group : rangeGroups){
+ System.out.println(group);
+ }
+
+ //iterate all alignments in adjacent bins to find correct path
+ ArrayList curGroup = new ArrayList(),
+ nextGroup = new ArrayList();
+ List curRanges = rangeGroups.get(0);
+ for(Range r:curRanges)
+ curGroup.add(allAlignments.get(r));
+
+ for(int i=1; i nextRanges = rangeGroups.get(i);
+ nextGroup = new ArrayList();
+
+ for(Range r:nextRanges)
+ nextGroup.add(allAlignments.get(r));
+
+ ArrayList allPaths = new ArrayList();
+
+
+ for(Alignment curAlg:curGroup){
+ for(Alignment nextAlg:nextGroup){
+ int distance = nextAlg.readAlignmentStart()-curAlg.readAlignmentEnd();
+ if(distance bridges = getClosestPath(curAlg, nextAlg, distance);
+ if(bridges!=null)
+ allPaths.addAll(bridges);
+ }
+ }
+ }
+ //join all paths from previous to the new ones
+ //TODO:optimize it
+ if(joinPaths.isEmpty())
+ joinPaths=allPaths;
+ else{
+ System.out.println("=====Current list of paths: " + joinPaths);
+ System.out.println("=====Join to list of paths: " + allPaths);
+
+ for(BidirectedPath p:joinPaths)
+ for(BidirectedPath e:allPaths)
+ p.join(e);
+
+ }
+ curGroup=nextGroup;
+ }
+
+ for(BidirectedPath path:joinPaths)
+ System.out.println("A member Path: " + path.toString() + " deviation: " + path.getDeviation());
+ if(joinPaths.isEmpty())
+ return null;
+ else
+ return joinPaths.get(0);
+ }
+
+ /*
+ * Important function: determine if a node is able to be removed or not
+ * TODO: re-implement it based on statistics of coverage also
+ * 1. pick the least coverage ones among a path as the base
+ * 2. global base
+ */
+ public static boolean isUnique(Node node){
+ boolean res = false;
+
+ if(node.getDegree()<=2){ // not always true, e.g. unique node in a repetitive component
+ Sequence seq = node.getAttribute("seq");
+ if(seq.length() > 10000 || node.getNumber("cov")/aveCov < 1.3)
+ res=true;
+ }
+
+// if(res)
+// LOG.info(node.getAttribute("name") + " with coverage " + node.getNumber("cov") + " is a marker!");
+// else
+// LOG.info(node.getAttribute("name") + " with coverage " + node.getNumber("cov") + " is NOT a marker!");
+
+ return res;
+ }
+ /* need more powerful function:
+ * A-statistics?
+ * Mixture of Poisson distributions??? Kalman filter idea...
+ */
+// public static boolean isUnique(Node node, double cov){
+// boolean res = false;
+// if(node.getDegree()<=2 || Math.abs(node.getAttribute("cov" )) < cov){
+//// if(((Sequence)node.getAttribute("seq")).length() > 5000 || node.getDegree()==0)
+// res=true;
+// }
+//
+// return res;
+// }
+}
diff --git a/src/dev/java/japsadev/bio/hts/newscarf/BidirectedNode.java b/src/dev/java/japsadev/bio/hts/newscarf/BidirectedNode.java
new file mode 100644
index 0000000..0da20a4
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/newscarf/BidirectedNode.java
@@ -0,0 +1,301 @@
+package japsadev.bio.hts.newscarf;
+
+import java.security.AccessControlException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+import org.graphstream.graph.Edge;
+import org.graphstream.graph.Node;
+import org.graphstream.graph.implementations.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+/**
+ * Similar to {@link AdjacencyListNode}
+ *
+ */
+public class BidirectedNode extends AbstractNode {
+ protected int COPY=1;
+
+ protected static final int INITIAL_EDGE_CAPACITY;
+ protected static final double GROWTH_FACTOR = 1.1;
+
+ private static final Logger LOG = LoggerFactory.getLogger(BidirectedNode.class);
+
+ static {
+ String p = "org.graphstream.graph.node.initialEdgeCapacity";
+ int initialEdgeCapacity = 32;
+ try {
+ initialEdgeCapacity = Integer.valueOf(System.getProperty(p, "32"));
+ } catch (AccessControlException e) {
+ }
+ INITIAL_EDGE_CAPACITY = initialEdgeCapacity;
+ }
+ //edges are bidirected, here are 4 sub-types (name based on direction of the arrow relative to the corresponding node):
+ //note that neighbor edges here will treat their root node as *left* node, the opposite node as *right* node
+ protected static final byte OO_EDGE = 0b00; // src-->--<--dst
+ protected static final byte OI_EDGE = 0b01; // src-->-->--dst
+ protected static final byte IO_EDGE = 0b10; // src--<--<--dst
+ protected static final byte II_EDGE = 0b11; // src--<-->--dst
+
+ protected BidirectedEdge[] edges;//fast access to edges knowing direction from src
+ protected int oStart, degree;
+
+ protected HashMap> neighborMap; //fast access to edges knowing dst
+ // *** Constructor ***
+
+ protected BidirectedNode(AbstractGraph graph, String id) {
+ super(graph, id);
+ edges = new BidirectedEdge[INITIAL_EDGE_CAPACITY];
+ oStart = degree = 0;
+ neighborMap = new HashMap>(
+ 4 * INITIAL_EDGE_CAPACITY / 3 + 1);
+ }
+
+ // *** Helpers ***
+
+ protected byte edgeType(BidirectedEdge e) {
+ //return (byte) (e.getDir((AbstractNode) e.getOpposite(this))?0:1 + (e.getDir(this)?0:1)<<1); //cool but less efficient
+ BidirectedNode opposite = e.getOpposite(this);
+ if(e.getDir(this))
+ if(e.getDir(opposite))
+ return OO_EDGE;
+ else
+ return OI_EDGE;
+ else
+ if(e.getDir(opposite))
+ return IO_EDGE;
+ else
+ return II_EDGE;
+ }
+
+ @SuppressWarnings("unchecked")
+ protected BidirectedEdge locateEdge(Node opposite, byte type) {
+ List l = neighborMap.get(opposite);
+ if (l == null)
+ return null;
+
+ for (BidirectedEdge e : l) {
+ if(type==edgeType(e))
+ return e;
+ }
+ return null;
+ }
+
+ protected void removeEdge(int i) {
+// System.out.print("From node " + this.getId() + " remove edge number " + i);
+// System.out.println(" from a list of edges: ");
+// for(int j=0;j l = neighborMap.get(opposite);
+ l.remove(edges[i]);
+ if (l.isEmpty())
+ neighborMap.remove(opposite);
+ //remove from the array
+ if (i >= oStart) {
+ edges[i] = edges[--degree];
+ edges[degree] = null;
+ return;
+ }
+
+ edges[i] = edges[--oStart];
+ edges[oStart] = edges[--degree];
+ edges[degree] = null;
+
+ }
+
+ // *** Callbacks ***
+
+ @Override
+ protected boolean addEdgeCallback(AbstractEdge edge) {
+ //LOG.info("Adding edge callback " + edge.getId() + " from graph " + getGraph().getId());
+ AbstractNode opposite = edge.getOpposite(this);
+ List l = neighborMap.get(opposite);
+ if (l == null) {
+ l = new LinkedList();
+ neighborMap.put(opposite, l);
+ }
+ l.add((BidirectedEdge) edge);
+
+ // resize edges if necessary
+ if (edges.length == degree) {
+ BidirectedEdge[] tmp = new BidirectedEdge[(int) (GROWTH_FACTOR * edges.length) + 1];
+ System.arraycopy(edges, 0, tmp, 0, edges.length);
+ Arrays.fill(edges, null);
+ edges = tmp;
+ }
+
+ byte type = edgeType((BidirectedEdge) edge);
+
+ if (type <= OI_EDGE) {
+ edges[degree++] = (BidirectedEdge) edge;
+ return true;
+ }
+
+ edges[degree++] = edges[oStart];
+ edges[oStart++] = (BidirectedEdge) edge;
+ return true;
+ }
+
+ @Override
+ protected void removeEdgeCallback(AbstractEdge edge) {
+ //LOG.info("Removing edge callback " + edge.getId() + " from graph " + getGraph().getId());
+
+ // locate the edge first
+ byte type = edgeType((BidirectedEdge) edge);
+ int i = 0;
+ if (type <= OI_EDGE)
+ i = oStart;
+ while (i <= degree && edges[i] != edge)
+ i++;
+ if(i < degree){ //only remove iff edge is found
+ removeEdge(i);
+ }
+ }
+
+ @Override
+ protected void clearCallback() {
+ Arrays.fill(edges, 0, degree, null);
+ oStart = degree = 0;
+ }
+
+ // *** Access methods ***
+
+ @Override
+ public int getDegree() {
+ return degree;
+ }
+
+ @Override
+ public int getInDegree() {
+ return oStart;
+ }
+
+ @Override
+ public int getOutDegree() {
+ return degree - oStart;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public T getEdge(int i) {
+ if (i < 0 || i >= degree)
+ throw new IndexOutOfBoundsException("Node \"" + this + "\""
+ + " has no edge " + i);
+ return (T) edges[i];
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public T getEnteringEdge(int i) {
+ if (i < 0 || i >= getInDegree())
+ throw new IndexOutOfBoundsException("Node \"" + this + "\""
+ + " has no entering edge " + i);
+ return (T) edges[i];
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public T getLeavingEdge(int i) {
+ if (i < 0 || i >= getOutDegree())
+ throw new IndexOutOfBoundsException("Node \"" + this + "\""
+ + " has no edge " + i);
+ return (T) edges[oStart + i];
+ }
+
+ // FIXME: I must override these stupid functions, let's just return random edge among 4 types!!!
+ @SuppressWarnings("unchecked")
+ @Override
+ public T getEdgeBetween(Node node) {
+ return (T) locateEdge(node, IO_EDGE);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public T getEdgeFrom(Node node) {
+ return (T) locateEdge(node, OO_EDGE);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public T getEdgeToward(Node node) {
+ return (T) locateEdge(node, II_EDGE);
+ }
+
+ // *** Iterators ***
+
+ protected class EdgeIterator implements Iterator {
+ protected int iPrev, iNext, iEnd;
+ //0:in, 1:out, other(2):all
+ protected EdgeIterator(int ori) {
+ iPrev = -1;
+ iNext = 0;
+ iEnd = degree;
+ if (ori==0)
+ iEnd = oStart;
+ else if(ori==1)
+ iNext = oStart;
+
+// System.out.println("Iterator " + ori + " of " + getId() + " from " + iNext + " to " + iEnd + " of");
+// for(int i=0;i= iEnd)
+ throw new NoSuchElementException();
+ iPrev = iNext++;
+ return (T) edges[iPrev];
+ }
+
+ public void remove() {
+ if (iPrev == -1)
+ throw new IllegalStateException();
+ AbstractEdge e = edges[iPrev];
+ // do not call the callback because we already know the index
+ //graph.removeEdge(e);
+ ((BidirectedGraph)graph).removeEdgeDup(e, true, e.getSourceNode() != BidirectedNode.this,
+ e.getTargetNode() != BidirectedNode.this);
+ removeEdge(iPrev);
+ iNext = iPrev;
+ iPrev = -1;
+ iEnd--;
+
+ }
+ }
+
+ @Override
+ public Iterator getEdgeIterator() {
+ return new EdgeIterator(2);
+ }
+
+ @Override
+ public Iterator getEnteringEdgeIterator() {
+ return new EdgeIterator(0);
+ }
+
+ @Override
+ public Iterator getLeavingEdgeIterator() {
+ return new EdgeIterator(1);
+ }
+
+}
diff --git a/src/dev/java/japsadev/bio/hts/newscarf/BidirectedPath.java b/src/dev/java/japsadev/bio/hts/newscarf/BidirectedPath.java
new file mode 100644
index 0000000..0ba5f55
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/newscarf/BidirectedPath.java
@@ -0,0 +1,191 @@
+package japsadev.bio.hts.newscarf;
+
+import japsa.seq.Alphabet;
+import japsa.seq.Sequence;
+import japsa.seq.SequenceBuilder;
+
+
+import java.util.List;
+
+import org.graphstream.graph.Edge;
+import org.graphstream.graph.Node;
+import org.graphstream.graph.Path;
+import org.graphstream.graph.implementations.AbstractNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class BidirectedPath extends Path{
+ int deviation; //how this path differ to long read data (todo: by multiple-alignment??)
+ private double coverage=-1; //representative coverage of this path (basically the lowest cov from its nodes)
+ private static final Logger LOG = LoggerFactory.getLogger(BidirectedPath.class);
+
+ @Override
+ public void add(Edge edge) {
+ super.add(edge);
+
+ double newCoverage = peekNode().getNumber("cov");
+ if(coverage<=0)
+ coverage=newCoverage;
+ else
+ coverage=Math.min(coverage, newCoverage);
+
+ }
+
+ public BidirectedPath(){
+ super();
+ }
+ public BidirectedPath(BidirectedPath p){
+ super();
+ if(p!=null && !p.empty()){
+ setRoot(p.getRoot());
+ for(Edge e:p.getEdgePath())
+ add(e);
+ }
+ deviation=p.deviation;
+ }
+
+ //This constructor is only used to load in contigs.path from SPAdes
+ //So no recursive path here (path contains all primitive edges)
+ public BidirectedPath(BidirectedGraph graph, String paths){
+ super();
+ paths=paths.replace(";", ","); //optimized it!
+ String[] comps = paths.split(",");
+ if(comps.length<1)
+ return;
+ String curID = comps[0], nextID;
+ boolean curDir = curID.contains("+")?true:false,
+ nextDir;
+ BidirectedNode curNode = graph.getNode(curID.substring(0,curID.length()-1)),
+ nextNode;
+ setRoot(curNode);
+ for(int i=1; i edges = this.getEdgePath();
+ for(int i = edges.size()-1; i>=0; i--)
+ rcPath.add(edges.get(i));
+ return rcPath;
+ }
+ //It is not really ID because Path doesn't need an ID
+ public String getId(){
+ //need to make the Id unique for both sense and antisense spelling???
+ BidirectedNode curNode = (BidirectedNode) getRoot();
+ if(getEdgeCount()<1)
+ return curNode.getId();
+
+ String retval=curNode.getId(),
+ curDir=((BidirectedEdge) getEdgePath().get(0)).getDir(curNode)?"+":"-";
+ retval+=curDir;
+ for(Edge e:getEdgePath()){
+ curNode=e.getOpposite(curNode);
+ retval+=","+curNode.getId();
+ curDir=((BidirectedEdge) e).getDir(curNode)?"-":"+"; //note that curNode is target node
+ retval+=curDir;
+ }
+
+ return retval.trim();
+ }
+
+ public String toString(){
+ return "(" + getId() + ")";
+ }
+
+ public Sequence spelling(){
+
+ BidirectedNode curNode = (BidirectedNode) getRoot();
+ Sequence curSeq = curNode.getAttribute("seq");
+
+ if(getEdgeCount()<1)
+ return curSeq;
+
+ SequenceBuilder seq = new SequenceBuilder(Alphabet.DNA16(), 1024*1024, this.toString());
+ boolean curDir=((BidirectedEdge) getEdgePath().get(0)).getDir(curNode);
+ curSeq = curDir?curSeq:Alphabet.DNA.complement(curSeq);
+
+ seq.append(curSeq.subSequence(0, curSeq.length()-BidirectedGraph.getKmerSize()));
+ for(Edge edge:getEdgePath()){
+ for(Edge e:((BidirectedEdge) edge).getPath().getEdgePath()){
+ curNode=e.getOpposite(curNode);
+ curSeq= curNode.getAttribute("seq");
+ curDir=!((BidirectedEdge) e).getDir(curNode);
+ curSeq = curDir?curSeq:Alphabet.DNA.complement(curSeq);
+
+ seq.append(curSeq.subSequence(0, curSeq.length()-(curNode==peekNode()?
+ 0:BidirectedGraph.getKmerSize())));
+ }
+ }
+ return seq.toSequence();
+ }
+ /*
+ * Add a path to the current path. The path to be added must start with the last node
+ * of the current path.
+ */
+ public void join(BidirectedPath bridge) {
+ if(bridge==null || bridge.size() <=1)
+ return;
+ if(bridge.getRoot() != peekNode()){
+ LOG.error("Cannot join path with disagreed first node " + bridge.getRoot().getId());
+ return;
+ }
+ if(((BidirectedEdge) bridge.getEdgePath().get(0)).getDir((AbstractNode) bridge.getRoot())
+ == ((BidirectedEdge) peekEdge()).getDir((AbstractNode) peekNode())){
+ LOG.error("Conflict direction from the first node " + bridge.getRoot().getId());
+ return;
+ }
+ //TODO: need a way to check coverage consistent
+
+
+ for(Edge e:bridge.getEdgePath()){
+ add(e);
+ }
+
+ coverage=Math.min(coverage, bridge.coverage);
+ }
+
+ public int getDeviation(){
+ return this.deviation;
+ }
+ public void setDeviation(int deviation){
+ this.deviation=deviation;
+ }
+
+ public double getCoverage(){
+ return coverage;
+ }
+ /**
+ *
+ * @return average depth of this path
+ */
+// public double averageCov(){
+// int len=0;
+// double res=0;
+// for(Node n:getNodePath()){
+// if(BidirectedGraph.isUnique(n)){
+// Sequence seq = (Sequence) n.getAttribute("seq");
+// double cov = Double.parseDouble(seq.getName().split("_")[5]);
+// len+=(n==getRoot())?seq.length():seq.length()-BidirectedGraph.getKmerSize();
+// res+=seq.length()*cov;
+// }
+// }
+// return res/len;
+// }
+
+ public int length() {
+ int retval = 0;
+ for(Node n:getNodePath()){
+ Sequence seq = (Sequence) n.getAttribute("seq");
+ retval+=(n==getRoot())?seq.length():seq.length()-BidirectedGraph.getKmerSize();
+ }
+ return retval;
+ }
+}
diff --git a/src/dev/java/japsadev/bio/hts/newscarf/GraphExplore.java b/src/dev/java/japsadev/bio/hts/newscarf/GraphExplore.java
new file mode 100644
index 0000000..7f85338
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/newscarf/GraphExplore.java
@@ -0,0 +1,127 @@
+package japsadev.bio.hts.newscarf;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.graphstream.graph.*;
+import japsa.seq.Sequence;
+
+
+public class GraphExplore {
+
+ public static String spadesFolder="/home/sonhoanghguyen/Projects/scaffolding/data/spades_3.7/"; //imb desktop
+// public static String spadesFolder="/home/hoangnguyen/workspace/data/spades/"; //sony
+// public static String spadesFolder="/home/s_hoangnguyen/Projects/scaffolding/test-graph/spades/"; //dell
+
+
+ public static void main(String args[]) {
+ try {
+ new GraphExplore();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+
+ }
+
+ public GraphExplore() throws IOException{
+ //System.setProperty("org.graphstream.ui.renderer", "org.graphstream.ui.j2dviewer.J2DGraphRenderer");
+// String sample="EcK12S-careful";
+// String sample="Kp2146-careful";
+// String sample="meta-careful";
+ String sample="cp_S5";
+
+ HybridAssembler ass = new HybridAssembler(spadesFolder+sample+"/assembly_graph.fastg");
+ BidirectedGraph graph= ass.simGraph;
+// graph.addAttribute("ui.quality");
+// graph.addAttribute("ui.antialias");
+ graph.addAttribute("ui.stylesheet", styleSheet);
+ graph.addAttribute("ui.default.title", "New real-time hybrid assembler");
+
+
+ graph.display();
+
+ System.out.println("Node: " + graph.getNodeCount() + " Edge: " + graph.getEdgeCount());
+
+
+ for (Node node : graph) {
+// node.addAttribute("ui.label", node.getId());
+// node.setAttribute("ui.style", "text-offset: -10;");
+ if(BidirectedGraph.isUnique(node))
+ node.setAttribute("ui.class", "marked");
+ }
+
+ //explore(graph.getNode("A"));
+
+ /*
+ * Testing reduce function
+ */
+ try {
+ HybridAssembler.promptEnterKey();
+ ass.reduceFromSPAdesPaths(spadesFolder+sample+"/contigs.paths");
+ HybridAssembler.promptEnterKey();
+ ass.assembly(spadesFolder+sample+"/assembly_graph.sam");
+
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ //TODO: thorough cleaning... should have flag dead for each node
+// boolean dead=true;
+// while(dead){
+// dead=false;
+// for (Node node : graph) {
+// if((node.getDegree() < 2)
+//// || (node.getDegree()==0 && ((Sequence)node.getAttribute("seq")).length() < 1000)
+// ){
+// graph.removeNode(node);
+// dead=true;
+// }
+//
+// }
+// }
+ System.out.println("Node: " + graph.getNodeCount() + " Edge: " + graph.getEdgeCount());
+
+ /*
+ * Testing BidirectedEdge id pattern
+ */
+// String pattern = "^\\[([0-9\\+\\-]*)\\]([oi])\\[([0-9\\+\\-]*)\\]([oi])$";
+// // Create a Pattern object
+// Pattern r = Pattern.compile(pattern);
+// // Now create matcher object.
+// String id="[3]i[4+8+]o";
+// Matcher m = r.matcher(id);
+//
+// if(m.find()){
+// System.out.println(m.group(1)+"|"+m.group(2)+"|"+m.group(3)+"|"+m.group(4));
+// } else
+// System.out.println("Fuck");
+ }
+
+ public void explore(Node source) {
+ Iterator extends Node> k = source.getBreadthFirstIterator();
+
+ while (k.hasNext()) {
+ Node next = k.next();
+ next.setAttribute("ui.class", "marked");
+ sleep();
+ }
+ }
+
+ protected void sleep() {
+ try { Thread.sleep(1000); } catch (Exception e) {}
+ }
+
+ protected String styleSheet =
+ "node {" +
+ " fill-color: black;" +
+ "}" +
+ "node.marked {" +
+ " fill-color: red;" +
+ "}" +
+ "edge.marked {" +
+ " fill-color: red;" +
+ "}";
+
+}
\ No newline at end of file
diff --git a/src/dev/java/japsadev/bio/hts/newscarf/GraphTest.java b/src/dev/java/japsadev/bio/hts/newscarf/GraphTest.java
new file mode 100644
index 0000000..9d231c1
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/newscarf/GraphTest.java
@@ -0,0 +1,54 @@
+package japsadev.bio.hts.newscarf;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.util.Collections;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.graphstream.graph.Graph;
+import org.graphstream.graph.implementations.AdjacencyListGraph;
+
+import htsjdk.samtools.*;
+
+public class GraphTest {
+ public static void main(String[] args) throws Exception {
+// System.setProperty("org.graphstream.ui.renderer",
+// "org.graphstream.ui.j2dviewer.J2DGraphRenderer");
+// Graph g = new AdjacencyListGraph("g");
+// g.addNode("A").addAttribute("xyz", new double[] { 0, 0 });
+// g.addNode("B").addAttribute("xyz", new double[] { 10, 10 });
+//
+// g.addEdge("AB", "A", "B", false)
+// .addAttribute(
+// "ui.points",
+// (Object) new double[] { 0, 0, 0, 0, 5, 0, 5, 10, 0, 10,
+// 10, 0 });
+//
+// g.addAttribute("ui.stylesheet", "edge {shape: polyline; }"); // or shape: cubic-curve
+//
+// g.display(false);
+
+// String data="/home/hoangnguyen/workspace/data/spades/EcK12S-careful/assembly_graph.fastg"; //sony
+//
+// final SamFileValidator validator=new SamFileValidator(new PrintWriter(System.out),8000);
+// validator.setIgnoreWarnings(true);
+// validator.setVerbose(true,1000);
+// validator.setErrorsToIgnore(Collections.singletonList(SAMValidationError.Type.MISSING_READ_GROUP));
+// SamReaderFactory factory=SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
+// SamReader samReader=factory.open(new File(data));
+// boolean ischeck=false;
+// SAMRecordIterator iter = samReader.iterator();
+// System.out.println(iter.next());
+//
+// samReader.close();
+
+ Pattern versionPattern = Pattern.compile("^Version:\\s(\\d+\\.\\d+\\.\\d+).*");
+ String line="Version: 0.7.5a-r405";
+ Matcher matcher =versionPattern.matcher(line);
+ if (matcher.find()){
+ System.out.println(line);
+ System.out.println(matcher.group(1));
+ }
+ }
+}
diff --git a/src/dev/java/japsadev/bio/hts/newscarf/HybridAssembler.java b/src/dev/java/japsadev/bio/hts/newscarf/HybridAssembler.java
new file mode 100644
index 0000000..f25af64
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/newscarf/HybridAssembler.java
@@ -0,0 +1,308 @@
+package japsadev.bio.hts.newscarf;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Scanner;
+
+import org.graphstream.graph.Edge;
+import org.graphstream.graph.Node;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SamInputResource;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
+
+public class HybridAssembler {
+ private static final Logger LOG = LoggerFactory.getLogger(HybridAssembler.class);
+
+// final BidirectedGraph origGraph;
+ public BidirectedGraph simGraph; //original and simplified graph should be separated, no???
+
+ public HybridAssembler(){
+// origGraph=new BidirectedGraph("batch");
+ simGraph=new BidirectedGraph("real");
+ }
+
+
+ public HybridAssembler(String graphFile) throws IOException{
+ this();
+// origGraph.loadFromFile(graphFile);
+ simGraph.loadFromFile(graphFile);
+ }
+
+
+ public void assembly(String bamFile) throws IOException{
+ SamReaderFactory.setDefaultValidationStringency(ValidationStringency.SILENT);
+
+ SamReader reader;
+ if ("-".equals(bamFile))
+ reader = SamReaderFactory.makeDefault().open(SamInputResource.of(System.in));
+ else
+ reader = SamReaderFactory.makeDefault().open(new File(bamFile));
+
+ SAMRecordIterator iter = reader.iterator();
+
+ String readID = "";
+ //ReadFilling readFilling = null;
+ ArrayList samList = new ArrayList();;// alignment record of the same read;
+ BidirectedPath p = new BidirectedPath();
+ while (iter.hasNext()) {
+ SAMRecord rec = iter.next();
+ if (rec.getReadUnmappedFlag())
+ continue;
+// if (rec.getMappingQuality() < qual)
+// continue;
+
+ String refID = rec.getReferenceName().split("_")[1];
+ Alignment myRec = new Alignment(rec, simGraph.getNode(refID)); //FIXME: optimize
+
+ //////////////////////////////////////////////////////////////////
+ // make list of alignments of the same (Nanopore) read.
+
+ //not the first occurrance
+ if (!readID.equals("") && !readID.equals(myRec.readID)) {
+ //Collections.sort(samList);
+ //p=origGraph.pathFinding(samList);
+ p=simGraph.pathFinding(samList); // the graph MUST be the same as from new Alignment(...)
+
+ if(p!=null)
+ System.out.println("Final path found: " + p.getId());
+ reduce(p);
+// reduce2(p);
+ samList = new ArrayList();
+ //readID = myRec.readID;
+ }
+ readID = myRec.readID;
+ samList.add(myRec); // FIXME: (optimize) insert sort here
+
+ }// while
+ iter.close();
+
+ //outOS.close();
+ reader.close();
+
+ }
+ /*
+ * Read paths from contigs.path and reduce the graph
+ */
+ public void reduceFromSPAdesPaths(String paths) throws IOException{
+
+ BufferedReader pathReader = new BufferedReader(new FileReader(paths));
+
+ String s="", curpath="";
+ //Read contigs from contigs.paths and refer themselves to contigs.fasta
+ boolean flag=false;
+ while((s=pathReader.readLine()) != null){
+ if(s.contains("NODE")){
+ if(flag){
+ BidirectedPath path=new BidirectedPath(simGraph, curpath);
+ reduce(path);
+// reduce2(path);
+ }
+ flag=s.contains("'")?false:true;
+ curpath=new String();
+ continue;
+ }else if(flag){
+ curpath+=s;
+ }
+
+
+ }
+ pathReader.close();
+ }
+
+ /**
+ * Another reduce that doesn't remove the unique nodes
+ * Instead redundant edges are removed on a path way
+ * @param p Path to simplify the graph (from origGraph)
+ * @param target Subjected graph for the simplification
+ */
+ private void reduce(BidirectedPath p){
+ //do nothing if the path has only one node
+ if(p==null || p.getEdgeCount()<1)
+ return;
+
+ //loop over the edges of path (like spelling())
+ BidirectedNode markerNode = null,
+ curNodeFromSimGraph = (BidirectedNode) p.getRoot();
+
+ BidirectedPath curPath= null;
+ boolean markerDir=true, curDir;
+
+ if(BidirectedGraph.isUnique(curNodeFromSimGraph)){
+ markerNode=curNodeFromSimGraph;
+ markerDir=((BidirectedEdge) p.getEdgePath().get(0)).getDir(markerNode);
+ curPath = new BidirectedPath();
+ curPath.setRoot(curNodeFromSimGraph);
+ }
+
+
+ //search for an unique node as the marker.
+ ArrayList tobeRemoved = new ArrayList(),
+ tobeAdded = new ArrayList();
+ for(Edge e:p.getEdgePath()){
+
+ curNodeFromSimGraph=e.getOpposite(curNodeFromSimGraph);
+
+// curNodeFromSimGraph = simGraph.getNode(curNodeFromOrigGraph.getId()); //change back to Node belong to simGraph (instead of origGraph)
+ curDir=((BidirectedEdge) e).getDir(curNodeFromSimGraph);
+
+ if(BidirectedGraph.isUnique(curNodeFromSimGraph)){
+
+ if(markerNode!=null){
+ //this is when we have 1 jumping path (both ends are markers)
+ curPath.add(e);
+// LOG.info("Processing path {} with marker {}:{}:{} and curNode {}:{}:{}", curPath.getId(), markerNode.getId(), markerDir?"out":"in", markerNode.getGraph().getId(), curNodeFromSimGraph.getId(), curDir?"out":"in", curNodeFromSimGraph.getGraph().getId());
+ //create an edge connect markerNode to curNode with curPath
+ //Edge reducedEdge = simGraph.addEdge(markerNode, curNodeFromSimGraph, markerDir, curDir);
+ BidirectedEdge reducedEdge = new BidirectedEdge(markerNode, curNodeFromSimGraph, markerDir, curDir);
+
+// if(reducedEdge!=null)
+// reducedEdge.addAttribute("path", new BidirectedPath(curPath));
+
+ tobeAdded.add(reducedEdge);
+
+ //loop over curPath to find out edges needed to be removed
+ Node n0 = curPath.getRoot(),
+ n1 = null;
+ for(Edge ep:curPath.getEdgePath()){
+ n1 = ep.getOpposite(n0);
+ if(!BidirectedGraph.isUnique(n0) == BidirectedGraph.isUnique(n1)){
+ tobeRemoved.add((BidirectedEdge)ep);
+ }
+
+// if(!BidirectedGraph.isUnique(n1)){
+// n1.setAttribute("cov", n1.getNumber("cov")-markerNode.getNumber("cov"));
+// LOG.info("...coverage of " + n1.getAttribute("name") + " now is " + n1.getNumber("cov"));
+// }
+
+ n0=n1;
+ }
+
+ }
+
+
+ markerNode=curNodeFromSimGraph;
+ markerDir=!curDir; //in-out, out-in
+ curPath= new BidirectedPath();
+ curPath.setRoot(curNodeFromSimGraph);
+ }
+ else{
+ if(markerNode!=null){
+ curPath.add(e);
+ }
+ }
+
+ }
+
+ //remove appropriate edges
+ for(BidirectedEdge e:tobeRemoved){
+ LOG.info("REMOVING EDGE " + e.getId() + " from " + e.getNode0().getGraph().getId() + "-" + e.getNode1().getGraph().getId());
+ LOG.info("before: \n\t" + simGraph.printEdgesOfNode(e.getNode0()) + "\n\t" + simGraph.printEdgesOfNode(e.getNode1()));
+ simGraph.removeEdge(e.getId());
+ LOG.info("after: \n\t" + simGraph.printEdgesOfNode(e.getNode0()) + "\n\t" + simGraph.printEdgesOfNode(e.getNode1()));
+ }
+
+ //add appropriate edges
+ for(BidirectedEdge e:tobeAdded){
+ LOG.info("ADDING EDGE " + e.getId()+ " from " + e.getNode0().getGraph().getId() + "-" + e.getNode1().getGraph().getId());
+ LOG.info("before: \n\t" + simGraph.printEdgesOfNode(e.getNode0()) + "\n\t" + simGraph.printEdgesOfNode(e.getNode1()));
+
+ Edge reducedEdge = simGraph.addEdge(e.getSourceNode(),e.getTargetNode(),e.getDir0(),e.getDir1());
+ if(reducedEdge!=null){
+// reducedEdge.addAttribute("ui.label", reducedEdge.getId());
+// reducedEdge.setAttribute("ui.style", "text-offset: -10; text-alignment: along;");
+ reducedEdge.addAttribute("isReducedEdge", true);
+ reducedEdge.setAttribute("ui.class", "marked");
+// reducedEdge.addAttribute("layout.weight", 10);
+ }
+ LOG.info("after: \n\t" + simGraph.printEdgesOfNode(e.getNode0()) + "\n\t" + simGraph.printEdgesOfNode(e.getNode1()));
+
+ }
+
+ }
+ /**
+ * Another reduce that doesn't need to know unique contig
+ * @param p Path to simplify the graph (from origGraph)
+ * @param target Subjected graph for the simplification
+ */
+ private void reduce2(BidirectedPath p){
+ //do nothing if the path has only one node
+ if(p==null || p.getEdgeCount()<1)
+ return;
+ double coverage = p.getCoverage();
+ //loop over the edges of path (like spelling())
+ BidirectedNode firstNode = (BidirectedNode) p.getRoot(),
+ lastNode = (BidirectedNode) p.peekNode();
+
+ boolean firstDir=((BidirectedEdge)p.getEdgePath().get(0)).getDir(firstNode),
+ lastDir=((BidirectedEdge)p.peekEdge()).getDir(lastNode);
+
+ //search for an unique node as the marker.
+ ArrayList tobeRemoved = new ArrayList();
+ BidirectedNode curNode = firstNode;
+ boolean curDir;
+ for(Edge e:p.getEdgePath()){
+
+ double curCoverage=curNode.getNumber("cov"),
+ nextCoverage=e.getOpposite(curNode).getNumber("cov");
+ //if current node has the same coverage as path coverage
+ if(covLeft(curCoverage, coverage)==0 || covLeft(nextCoverage, coverage)==0){
+ tobeRemoved.add((BidirectedEdge) e);
+ }
+
+ curNode=e.getOpposite(curNode);
+ }
+
+ //remove appropriate edges
+ for(BidirectedEdge e:tobeRemoved){
+ LOG.info("REMOVING EDGE " + e.getId() + " from " + e.getNode0().getGraph().getId() + "-" + e.getNode1().getGraph().getId());
+ LOG.info("before: \n\t" + simGraph.printEdgesOfNode(e.getNode0()) + "\n\t" + simGraph.printEdgesOfNode(e.getNode1()));
+ simGraph.removeEdge(e.getId());
+ LOG.info("after: \n\t" + simGraph.printEdgesOfNode(e.getNode0()) + "\n\t" + simGraph.printEdgesOfNode(e.getNode1()));
+ }
+
+ //add appropriate edges
+ Edge reducedEdge = simGraph.addEdge(firstNode,lastNode,firstDir,lastDir);
+
+
+ }
+ private double covLeft(double cov, double pathCov){
+ double retval=0;
+ //TODO: need statistics here...
+ if((cov-pathCov)/pathCov > .2){
+ retval=cov-pathCov;
+ }
+ return retval;
+ }
+
+
+ @SuppressWarnings("resource")
+ public static void promptEnterKey(){
+ System.out.println("Press \"ENTER\" to continue...");
+ Scanner scanner = new Scanner(System.in);
+ scanner.nextLine();
+ }
+
+ protected void sleep() {
+ try { Thread.sleep(1000); } catch (Exception e) {}
+ }
+
+ public static void main(String[] argv) throws IOException{
+ HybridAssembler hbAss = new HybridAssembler(GraphExplore.spadesFolder+"EcK12S-careful/assembly_graph.fastg");
+ //For SAM file, run bwa first on the edited assembly_graph.fastg by running:
+ //awk -F '[:;]' -v q=\' 'BEGIN{flag=0;}/^>/{if(index($1,q)!=0) flag=0; else flag=1;}{if(flag==1) print $1;}' ../EcK12S-careful/assembly_graph.fastg > Eck12-careful.fasta
+ //TODO: need to make this easier
+
+ hbAss.assembly(GraphExplore.spadesFolder+"EcK12S-careful/assembly_graph.sam");
+
+ }
+
+}
diff --git a/src/dev/java/japsadev/bio/hts/newscarf/MetaRange.java b/src/dev/java/japsadev/bio/hts/newscarf/MetaRange.java
new file mode 100644
index 0000000..95259e6
--- /dev/null
+++ b/src/dev/java/japsadev/bio/hts/newscarf/MetaRange.java
@@ -0,0 +1,140 @@
+package japsadev.bio.hts.newscarf;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * A meta range is composed of sub ranges. Its left endpoint is
+ * the leftmost point of its subranges and its right endpoint is
+ * the rightmost point of its subranges.
+ */
+public class MetaRange extends Range{
+
+ /**
+ * meta Ranges are composed of lists of subRanges.
+ */
+ private List subranges;
+
+ public MetaRange(Range initialSubRange){
+ super(initialSubRange.getLeft(),initialSubRange.getRight());
+ this.subranges = new ArrayList();
+ this.subranges.add(initialSubRange);
+ }
+
+ public void join(MetaRange other){
+ verifyHomo(other);
+
+ this.setLeft(Math.min(this.getLeft(),other.getLeft()));
+ this.setRight(Math.max(this.getRight(),other.getRight()));
+ this.subranges.addAll(other.getSubRanges());
+
+ other.invalidate();
+ }
+
+ public void setSubRanges(List subRanges){
+ this.subranges = subRanges;
+ }
+
+ public List getSubRanges(){
+ return this.subranges;
+ }
+
+ private void invalidate(){
+ this.setSubRanges(null);
+ this.setLeft(0);
+ this.setRight(0);
+ }
+
+ public boolean isInvalid(){
+ return this.getSubRanges()==null;
+ }
+
+ /**
+ * This code is retrieved from StackOverFlow: credits to augray :).
+ * From here on, the term "overlap" actually means ranges that
+ * belong to the same homo-group
+ *
+ * Given a list of Ranges, returns a list of Range groups where
+ * the Ranges in the groups all overlap. So if A overlaps with B and
+ * B with C, then A,B,and C will be returned in a group. Supposing Ranges
+ * D and E share nothing with A, B, or C, but do share with each other, D and E
+ * will be returned as a separate group from A,B,C.
+ * @param baseRanges
+ * @return
+ */
+ public static List> getOverlappingGroups(List baseRanges){
+ List baseMetaRanges = toMetaRanges(baseRanges);
+
+ List mergedMetaRanges = getMergedMetaRanges(baseMetaRanges);
+
+ List> RangeGroups = metaRangesToGroups(mergedMetaRanges);
+ return RangeGroups;
+ }
+
+
+
+ private static List getMergedMetaRanges(
+ List metaRanges) {
+ if(metaRanges.isEmpty()){
+ return metaRanges;
+ }
+ //order the MetaRanges by their starting point.
+ Collections.sort(metaRanges);
+
+ //go through and merge the overlapping meta Ranges.
+ //This relies on the logic that if Range i overlaps with
+ //an Range that started before it, then once all the Ranges
+ //before i have been merged, Range i will have a starting point
+ //consecutive to the starting point of the the preceeding Range.
+ for(int i=0; i< metaRanges.size()-1; i++){
+ MetaRange thisRange = metaRanges.get(i);
+ MetaRange nextRange = metaRanges.get(i+1);
+
+ if(thisRange.isHomo(nextRange)){
+ nextRange.join(thisRange);
+ }
+ }
+
+ List resultRanges = new ArrayList();
+
+ //All Ranges from the original list either:
+ //(a) didn't overlap with any others
+ //(b) overlapped with others and were chosen to represent the merged group or
+ //(c) overlapped with others, were represented in the group in another
+ // MetaRange, and then marked as invalid.
+ //Go through and only add the valid Ranges to be returned.
+
+ for(MetaRange i : metaRanges){
+ if(!i.isInvalid()){
+ resultRanges.add(i);
+ }
+ }
+ return resultRanges;
+ }
+
+ /**
+ * Convert a list of MetaRanges into groups of Ranges.
+ * @param mergedMetaRanges
+ * @return
+ */
+ private static List> metaRangesToGroups(
+ List mergedMetaRanges) {
+ List> groups = new ArrayList<>(mergedMetaRanges.size());
+ for(MetaRange metaRange : mergedMetaRanges){
+ groups.add(metaRange.getSubRanges());
+ }
+ return groups;
+ }
+
+ private static List toMetaRanges(
+ List baseRanges) {
+ ArrayList metaRanges = new ArrayList