In [1]:
import gfapy
import networkx as nx

In [4]:
def add_nodes_to_graph(G: gfapy.Gfa):
    """
    Adds nodes from a graph G to a networkx graph G_nx.

    Parameters:
    - G: The original graph with segments containing name and sequence attributes.
    - G_nx: A networkx graph instance to which nodes will be added.

    Each node is added to G_nx with its sequence as an attribute.
    """

    G_nx = nx.DiGraph()
    nodes_dict = {int(segment.name): segment.sequence for segment in G.segments}
    for node in nodes_dict.keys():
        G_nx.add_node(node, sequence=nodes_dict[node])

    return G_nx

def add_oriented_edges(G, G_nx):
    """
    Adds edges to the graph G_nx based on the orientation of edges in graph G.
    For each edge in G, nodes are added or connected in G_nx based on the orientation
    of the 'from' and 'to' segments of the edge. New nodes are created with unique IDs
    and their sequences are fetched from nodes_dict.

    Parameters:
    - G: The original graph with edges that have orientations.
    - G_nx: The networkx graph to which the oriented edges will be added.
    """

    nodes_dict = {segment.name: segment.sequence for segment in G.segments}

    for edge in G.edges:
        if edge.from_orient == "-" and edge.to_orient == "-":
            new_from = max([int(n) for n in G_nx.nodes]) + 1
            new_to = new_from + 1
            G_nx.add_node(new_from, sequence=nodes_dict[edge.from_segment.name])
            G_nx.add_node(new_to, sequence=nodes_dict[edge.to_segment.name])
            G_nx.add_edge(new_from, new_to)

        elif edge.from_orient == "-" and edge.to_orient == "+":
            new_from = max([int(n) for n in G_nx.nodes]) + 1
            G_nx.add_node(new_from, sequence=nodes_dict[edge.from_segment.name])
            G_nx.add_edge(new_from, int(edge.to_segment.name))

        elif edge.from_orient == "+" and edge.to_orient == "-":
            new_to = max([int(n) for n in G_nx.nodes]) + 1
            G_nx.add_node(new_to, sequence=nodes_dict[edge.to_segment.name])
            G_nx.add_edge(int(edge.from_segment.name), new_to)

        else:
            G_nx.add_edge(int(edge.from_segment.name), int(edge.to_segment.name))

def process_graph(G):
    """
    Processes a GFA graph and returns a networkx graph with nodes and oriented edges.

    Parameters:
    - G: A GFA graph with segments and edges.

    Returns:
    - G_nx: A networkx graph with nodes and oriented edges.
    """

    G_nx = add_nodes_to_graph(G)
    add_oriented_edges(G, G_nx)

    return G_nx

def print_info(G):
    # print the name of the graph
    print(f"Graph name: {G.name}")
    # Number of nodes and edges
    print(f"Number of nodes: {len(G.nodes)}")
    print(f"Number of edges: {len(G.edges)}")
    # Check if is a DAG and print in case the number of cycles
    print(f"Is DAG: {nx.is_directed_acyclic_graph(G)}")
    # print the number of nodes with in-degree 1
    print(f"Number of nodes with in-degree 1: {len([node for node in G.nodes if G.in_degree(node) == 1])}")
    # print the average in-degree
    print(f"Average in-degree: {sum([G.in_degree(node) for node in G.nodes]) / len(G.nodes)}")


In [None]:
G1 = gfapy.Gfa.from_file("../../data/pangenome-graphs/DRB1-3123_unsorted.gfa")
G2 = gfapy.Gfa.from_file("../../data/pangenome-graphs/chrX.pan.fa.a2fb268.4030258.6a1ecc2.smooth.gfa")
G3 = gfapy.Gfa.from_file("../../data/pangenome-graphs/chrX.pan.fa.a2fb268.4030258.6a1ecc2.smooth.gfa")

In [5]:
G1 = gfapy.Gfa.from_file("../../data/pangenome-graphs/DRB1-3123_unsorted.gfa")
G1_nx = process_graph(G1)
G1_nx.name = "DRB1-3123_unsorted"

print_info(G1_nx)

Graph name: DRB1-3123_unsorted
Number of nodes: 3845
Number of edges: 4380
Is DAG: False
Number of nodes with in-degree 1: 2690
Average in-degree: 1.1391417425227568


In [6]:
G2 = gfapy.Gfa.from_file("../../data/pangenome-graphs/chrX.pan.fa.a2fb268.4030258.6a1ecc2.smooth.gfa")
G2_nx = process_graph(G2)
G2_nx.name = "chrX.pan.fa.a2fb268.4030258.6a1ecc2.smooth"

In [None]:
print_info(G1_nx)

In [None]:
edge_types = {"Back": [], "Forward": [], "Cross": [], "Tree": []}

In [None]:
class Graph:
	# instance variables
	def __init__(self, v, e):
		# v is the number of nodes/vertices
		self.time = 0
		self.traversal_array = []
		self.v = v
		# e is the number of edge
		self.e = e
		# adj. list for graph
		self.graph_list = [[] for _ in range(v)]

	# function to print adj list
	def print_graph_list(self):
		print("Adjacency List Representation:")
		for i in range(self.v):
			print(i, "-->", *self.graph_list[i])
		print()

	# function the get number of edges
	def number_of_edges(self):
		return self.e

	# function for dfs
	def dfs(self):
		self.visited = [False]*self.v
		self.start_time = [0]*self.v
		self.end_time = [0]*self.v

		for node in range(self.v):
			if not self.visited[node]:
				self.traverse_dfs(node)
		print()
		print("DFS Traversal: ", self.traversal_array)
		print()

	def traverse_dfs(self, node):
		# mark the node visited
		self.visited[node] = True
		# add the node to traversal
		self.traversal_array.append(node)
		# get the starting time
		self.start_time[node] = self.time
		# increment the time by 1
		self.time += 1
		# traverse through the neighbours
		for neighbour in self.graph_list[node]:
			# if a node is not visited
			if not self.visited[neighbour]:
				# marks the edge as tree edge
				print('Tree Edge:', str(node)+'-->'+str(neighbour))
				# dfs from that node
				self.traverse_dfs(neighbour)
			else:
				# when the parent node is traversed after the neighbour node
				if self.start_time[node] > self.start_time[neighbour] and self.end_time[node] < self.end_time[neighbour]:
					print('Back Edge:', str(node)+'-->'+str(neighbour))
				# when the neighbour node is a descendant but not a part of tree
				elif self.start_time[node] < self.start_time[neighbour] and self.end_time[node] > self.end_time[neighbour]:
					print('Forward Edge:', str(node)+'-->'+str(neighbour))
				# when parent and neighbour node do not have any ancestor and a descendant relationship between them
				elif self.start_time[node] > self.start_time[neighbour] and self.end_time[node] > self.end_time[neighbour]:
					print('Cross Edge:', str(node)+'-->'+str(neighbour))
			self.end_time[node] = self.time
			self.time += 1