In [14]:
import gfapy

In [15]:
G = gfapy.Gfa(version='gfa2').from_file('../data/pangenome-graphs/DRB1-3123_unsorted.gfa')

In [16]:
def get_neighbors(seg):
    neighbors = []
    for edge in seg.edges:
        if edge.from_segment == seg:
            neighbors.append(edge.to_segment)
        else:
            neighbors.append(edge.from_segment)
    return neighbors

def DFS(G, start):
    visited = set()
    stack = [start]
    while stack:
        vertex = stack.pop()
        if vertex not in visited:
            visited.add(vertex)
            stack.extend(set(get_neighbors(vertex)) - visited)
    return visited

def connected_components(G):
    visited = set()
    components = []
    for segment in G.segments:
        if segment not in visited:
            component = DFS(G, segment)
            visited.update(component)
            components.append(component)
    return components

def get_all_paths_of_len_k(G, seg, k):
    paths = []
    def DFS_path(G, seg, k, path):
        if k == 0:
            paths.append(path)
            return
        for neighbor in get_neighbors(seg):
            if neighbor not in path:
                DFS_path(G, neighbor, k-1, path + [neighbor])
    DFS_path(G, seg, k, [seg])
    return paths

In [18]:
import networkx as nx

In [21]:
def dfa_to_nx(G):
    G_nx = nx.DiGraph()
    for node in G.segments:
        G_nx.add_node(node.name)
    for edge in G.edges:
        G_nx.add_edge(edge.from_segment.name, edge.to_segment.name)
    return G_nx

G_nx = dfa_to_nx(G)

# print useful info
print("Number of nodes: ", G_nx.number_of_nodes())
print("Number of edges: ", G_nx.number_of_edges())

Number of nodes:  3214
Number of edges:  4378
Number of connected components:  1


In [48]:
# print all the edges
print("Edges: ", G_nx.edges())

Edges:  [('1', '2'), ('2', '3'), ('2', '2222'), ('3', '4'), ('4', '5'), ('4', '2223'), ('5', '6'), ('6', '7'), ('6', '2224'), ('7', '8'), ('8', '9'), ('8', '2225'), ('9', '10'), ('10', '11'), ('10', '2226'), ('11', '12'), ('12', '13'), ('12', '2227'), ('13', '14'), ('14', '15'), ('14', '2228'), ('15', '16'), ('16', '17'), ('16', '2229'), ('17', '18'), ('18', '19'), ('18', '2230'), ('19', '20'), ('20', '21'), ('20', '2231'), ('21', '22'), ('22', '23'), ('22', '2233'), ('23', '24'), ('24', '25'), ('24', '2234'), ('25', '26'), ('26', '27'), ('26', '2235'), ('27', '28'), ('28', '29'), ('28', '2236'), ('29', '30'), ('30', '31'), ('30', '2237'), ('31', '32'), ('31', '2238'), ('32', '33'), ('33', '34'), ('33', '2239'), ('34', '35'), ('35', '36'), ('35', '2240'), ('36', '37'), ('37', '38'), ('37', '2241'), ('38', '39'), ('39', '40'), ('39', '2242'), ('40', '41'), ('40', '2243'), ('41', '42'), ('42', '43'), ('42', '2244'), ('43', '44'), ('44', '45'), ('44', '2245'), ('45', '46'), ('46', '47'), 

In [56]:
# check if the graph has a cycle
print("Is Acyclic?: ", nx.is_directed_acyclic_graph(G_nx))

Is Acyclic?:  False


In [57]:
# minimal spanning tree of the graph
print("Minimum Spanning Tree: ", nx.minimum_spanning_tree(G_nx).edges())

NetworkXNotImplemented: not implemented for directed type

# Idea

1. (?) Bisogna convertire il pangenome graph 
2. Calcolare il Min Spanning Tree (MST) del grafo. Da questo possiamo indurre un ordinamento parziale, da questo possiamo estrarne uno totale (topological sort).
3. In qusto poso posso rietichettare i nodi in modo che il nodo con etichetta 0 sia il nodo con grado entrante 0.


