In [1]:
from bdsg.bdsg import HashGraph
from bdsg.bdsg import SnarlDistanceIndex
from bdsg.bdsg import PackedGraph

#### STEP 1: IMPORT THE DATA 
1. The graph in PackedGraph format (using ```vg convert -p``` ) 
2. The distance index for the snarl/chain tree

In [2]:
graph_path: str = 'test/test_graph.vg'
index_path: str = 'test/test_idx.dist'

In [3]:
graph = PackedGraph()
graph.deserialize(graph_path)
# graph.deserialize('test.vg')


idx = SnarlDistanceIndex()
idx.deserialize(index_path)
# idx.deserialize('idx.dist')

##### TEST 1: CHECK HOW THE TREE STRUCTURE WORKS. 
1. In theory the root of the tree is going to be a chain.
2. I iterate over its children and if I find a snarl, I check if it is a leaf snarl.
3. Each snarl has as a child a chain. If the children of the chain are not snarls, it is a leaf snarl.
4. Else if it has other snarls, I keep iterating until I find the leaf snarl.


Which functions do I need?
1. from a chain, traverse its children. 
2. If find a snarl. Iterate over its children (chains(s)).
3. If the childern of the chain of the snarl are all nodes, append the snarl to the list;
4. Else go to 2.

In [4]:
root_handle = idx.get_root()

In [5]:
leaf_snarls = [] 
contains_child_snarls = False
num_nodes = 0

def check_for_snarl(child_net_handle):
    global contains_child_snarls
    global num_nodes
    if idx.is_snarl(child_net_handle):
        contains_child_snarls = True
    elif idx.is_node(child_net_handle):
        num_nodes += 1
    return True

# THIS FUNCTION TAKES A SNARL. FOR EACH CHILD (CHAIN) OF THE SNARL, CHECK THEIR CHILDREN. 
# IF NO ONE HAS A SNARL, THE SNARL IS A LEAF
def snarl_iteratee(handle):
    global contains_child_snarls
    contains_child_snarls = False
    snarl_children = []
    idx.for_each_child(handle, lambda y: snarl_children.append(y) or True) 
    
    num_nodes = 0
    for s_c in snarl_children:
        idx.for_each_child(s_c, check_for_snarl)
    
    if ((not contains_child_snarls) and (num_nodes < 10)):
        leaf_snarls.append(handle)
    return True
idx.traverse_decomposition(snarl_iteratee, lambda x: True, lambda y: True)
# THIS FUNCTION TAKES A SNARL. IF IT HAS MORE THAN 1 CHILD, IT MEANS IT IS A LEAF
# THIS FROM THE INTUITION THAT AN INTERNAL SNARLS HAS AS 1 CHILD THAT IS A CHAIN OF NODES AND SNARL(S)
# NOT SURE IT IS TRUE ALWAYS
def snarl_inf(handle):
    snarl_children = []
    idx.for_each_child(handle, lambda y: snarl_children.append(y) or True)
    
    if len(snarl_children) > 1:
        leaf_snarls_inf.append(handle)
    return True


leaf_snarls_inf = [] 
idx.traverse_decomposition(snarl_inf, lambda x: True, lambda y: True)

print('printing out')
for el in leaf_snarls:
    print(idx.net_handle_as_string(el))
    start_bound = idx.get_start_bound(el)
    end_bound = idx.get_end_bound(el)

    # Inspect the orientations
    print(f"Start Bound ID: {graph.get_id(idx.get_handle(start_bound, graph))}, is_reverse: {graph.get_is_reverse(idx.get_handle(start_bound, graph))}")
    print(f"End Bound ID: {graph.get_id(idx.get_handle(end_bound, graph))}, is_reverse: {graph.get_is_reverse(idx.get_handle(end_bound, graph))}")
    
for el in leaf_snarls_inf:
    print(idx.net_handle_as_string(el))

printing out
simple snarl 6rev->5revtraversing start->end
Start Bound ID: 6, is_reverse: True
End Bound ID: 5, is_reverse: True
snarl 5rev->3revtraversing start->end
Start Bound ID: 5, is_reverse: True
End Bound ID: 3, is_reverse: True
simple snarl 6rev->5revtraversing start->end
snarl 5rev->3revtraversing start->end


#### STEP 2: GENERATE THE ANCHOR DICTIONARY
1. Traverse the SNARL TREE, using ```index.traverse_decomposition```
2. When detecting a LEAF SNARL, PASS IT TO THE SNARL CONSTRUCTION
3.  When detecting a LEAF SNARL:
    1. The snarl has to contain less than X = 10 elements; X is a parameter. 
    2. The number of paths passing by the snarl has to be > MIN and < MAX. Both parameters 
    3. For each path in the snarl, 

In [6]:
import sys
sys.path.append('./assembler')
from anchor import SnarlAnchor

In [7]:
anchoring = SnarlAnchor(10)

In [8]:
anchoring.build_graph(graph_path, index_path)

In [9]:
leaf_snarl_net_handles: list = anchoring.process_snarls()

Visiting snarl 2rev->1revtraversing start->end
Children: node 6rev has #nodes: 1 and has_snarls is: False
Children: simple snarl 6rev->5revtraversing start->end has #nodes: 1 and has_snarls is: True
Children: node 5rev has #nodes: 2 and has_snarls is: True
Children: snarl 5rev->3revtraversing start->end has #nodes: 2 and has_snarls is: True
Children: node 3rev has #nodes: 3 and has_snarls is: True
Visiting simple snarl 6rev->5revtraversing start->end
Children: node 9fd has #nodes: 1 and has_snarls is: False
Children: node 10fd has #nodes: 2 and has_snarls is: False
Visiting snarl 5rev->3revtraversing start->end
Children: node 4fd has #nodes: 1 and has_snarls is: False
Children: node 8fd has #nodes: 2 and has_snarls is: False
Children: node 7fd has #nodes: 3 and has_snarls is: False


In [16]:
for el in leaf_snarl_net_handles:
    print(idx.net_handle_as_string(el[0]))
    for n in el[1]:
        print(idx.net_handle_as_string(n), end=",")
    print()

simple snarl 6rev->5revtraversing start->end
node 9fd,node 10fd,
snarl 5rev->3revtraversing start->end
node 4fd,node 8fd,node 7fd,


In [11]:
leaf_snarl_net_handles

[(<bdsg.handlegraph.net_handle_t at 0x7fb8b40888b0>,
  [<bdsg.handlegraph.net_handle_t at 0x7fb8b4056830>,
   <bdsg.handlegraph.net_handle_t at 0x7fb8b4057270>]),
 (<bdsg.handlegraph.net_handle_t at 0x7fb8b4056d70>,
  [<bdsg.handlegraph.net_handle_t at 0x7fb8b4055e30>,
   <bdsg.handlegraph.net_handle_t at 0x7fb8b4057530>,
   <bdsg.handlegraph.net_handle_t at 0x7fb8b4056a30>])]

In [12]:
anchoring.print_tree_structure()

Chain: chain 2rev->1revtraversing start->end
Node: node 2rev
Snarl: snarl 2rev->1revtraversing start->end
Chain: chain 6rev->3revtraversing start->end
Node: node 6rev
Snarl: simple snarl 6rev->5revtraversing start->end
Chain: node 9fd pretending to be a chain in a simple snarl
Node: node 9fd
Chain: node 10fd pretending to be a chain in a simple snarl
Node: node 10fd
Node: node 5rev
Snarl: snarl 5rev->3revtraversing start->end
Chain: node 4fd pretending to be a chain
Node: node 4fd
Chain: node 8fd pretending to be a chain
Node: node 8fd
Chain: node 7fd pretending to be a chain
Node: node 7fd
Node: node 3rev
Node: node 1rev
