In [38]:
import networkx as nx

In [39]:
def build_digraph_from_file(file_path):
    # Create an empty directed graph
    G = nx.DiGraph()

    with open(file_path, 'r') as file:
        # Read the number of nodes (first line)
        num_nodes = int(file.readline().strip())

        # Read the next num_nodes lines to add nodes with weights
        for _ in range(num_nodes):
            line = file.readline().strip()
            node_id, node_weight = line.split('\t')
            G.add_node(int(node_id), weight=float(node_weight))

        # Read the remaining lines to add edges
        for line in file:
            node1, node2 = line.strip().split('\t')
            G.add_edge(int(node1), int(node2))

    return G

In [40]:
def compute_node_out(G, node_id):
    node_weight = G.nodes[node_id]['weight']
    predecessors = list(G.predecessors(node_id))

    node_out = []
    for predecessor in predecessors:
        predecessor_out = G.nodes[predecessor]['out']
        # Extend the list instead of union
        node_out.extend(predecessor_out)

    # Ensure uniqueness and sort
    node_out = sorted(set([x + node_weight for x in node_out]))
    return node_out

def optimal_father(G, node_id):
    G.nodes[node_id]['optimal_father'] = None
    successors = list(G.successors(node_id))
    if len(successors) == 0:
        return

    max_out = -1
    for successor in successors:
        successor_out = G.nodes[successor]['out']
        if len(successor_out) > max_out:
            max_out = len(successor_out)
            G.nodes[node_id]['optimal_father'] = successor

def offset(G, node_id):
    optimal_father = G.nodes[node_id]['optimal_father']
    father_weight = G.nodes[optimal_father]['weight']
    father_out = G.nodes[optimal_father]['out']
    node_out = G.nodes[node_id]['out']

    offset = [] #list of indexes of the father_out that are in node_out
    for i in range(len(father_out)):
        if father_out[i] - father_weight in node_out:
            offset.append(i)

    offset = [int(x) for x in offset]

    return offset

def process_graph(G):
    # remove isolated nodes
    if list(nx.isolates(G)):
        G.remove_nodes_from(list(nx.isolates(G)))
        print('Removed isolated nodes')

    root_id = 0 # Root node is 0

    # add an attribute 'out' to the root node as a set containing 0
    G.nodes[root_id]['out'] = []
    G.nodes[root_id]['out'].append(0)
    G.nodes[root_id]['weight'] = 0

    for node in list(nx.topological_sort(G))[1::]: # the root is already initialized with (0)
        G.nodes[node]['out'] = compute_node_out(G, node)
    for node in G.nodes():
        optimal_father(G, node)
    for node in G.nodes():
        if G.nodes[node]['optimal_father'] is not None:
            G.nodes[node]['offset'] = offset(G, node)
        else:
            G.nodes[node]['offset'] = None

In [41]:
G = build_digraph_from_file('dag.tsv')
process_graph(G)
# print number of nodes and edges
print("Nodes:", G.number_of_nodes())
print("Edges:", G.number_of_edges())

Nodes: 31
Edges: 136


In [42]:
class Node:
    def __init__(self, node_id, weight, successor_id=None, offset=None):
        self.node_id = node_id
        self.weight = weight
        self.successor_id = successor_id
        self.offset = offset

    def __repr__(self):
        return (f"Node(id={self.node_id}, weight={self.weight}, "
                f"successor_id={self.successor_id}, offset={self.offset})")

class DAG:
    def __init__(self):
        self.nodes = {}

    def add_node(self, node_id, weight, successor_id=None, offset=None):
        if node_id in self.nodes:
            raise ValueError(f"Node with id {node_id} already exists.")
        new_node = Node(node_id, weight, successor_id, offset)
        self.nodes[node_id] = new_node

    def remove_node(self, node_id):
        if node_id not in self.nodes:
            raise ValueError(f"Node with id {node_id} does not exist.")
        del self.nodes[node_id]

    def get_node(self, node_id):
        return self.nodes.get(node_id)

    def __repr__(self):
        return '\n'.join(str(node) for node in self.nodes.values())

    # Query functions
    def query(self, node_id, index):
        node = self.get_node(node_id)
        if node is None:
            raise ValueError(f"Node with id {node_id} does not exist.")
        if node.successor_id is None:
            return node.offset[index]

        sum_weights = 0
        current_node = node
        while current_node.successor_id is not None:
            index = current_node.offset[index]
            current_node = self.get_node(current_node.successor_id)
            sum_weights += current_node.weight
        return current_node.offset[index] - sum_weights  # Use current_node directly

In [43]:
dag = DAG()

for node in G.nodes():
    # if the node has a successor, then do not represent its explicit set out, but just the offset
    if G.nodes[node]['optimal_father'] is not None:
        dag.add_node(node, G.nodes[node]['weight'], G.nodes[node]['optimal_father'], G.nodes[node]['offset'])
    # if the node has no successor, then represent its explicit set out
    else:
        out = G.nodes[node]['out']
        dag.add_node(node, G.nodes[node]['weight'], None, out)

print(dag)

Node(id=0, weight=0, successor_id=1, offset=[0])
Node(id=1, weight=6.0, successor_id=26, offset=[4])
Node(id=2, weight=6.0, successor_id=23, offset=[2])
Node(id=3, weight=5.0, successor_id=24, offset=[2])
Node(id=4, weight=2.0, successor_id=28, offset=[0])
Node(id=5, weight=2.0, successor_id=27, offset=[0])
Node(id=6, weight=6.0, successor_id=29, offset=[4])
Node(id=7, weight=3.0, successor_id=27, offset=[1, 3])
Node(id=8, weight=3.0, successor_id=28, offset=[3, 6, 7])
Node(id=9, weight=8.0, successor_id=27, offset=[5])
Node(id=10, weight=10.0, successor_id=28, offset=[13])
Node(id=11, weight=1.0, successor_id=28, offset=[1, 4, 7, 8, 9, 14])
Node(id=12, weight=3.0, successor_id=30, offset=[1, 8])
Node(id=13, weight=1.0, successor_id=23, offset=[0, 3])
Node(id=14, weight=5.0, successor_id=25, offset=[7, 11, 15])
Node(id=15, weight=2.0, successor_id=27, offset=[2, 5, 6])
Node(id=16, weight=7.0, successor_id=26, offset=[6, 7, 9, 10, 11, 12, 13, 17, 18])
Node(id=17, weight=5.0, successor_i

In [45]:
# check the query function
# for all nodes print the result of the query function at inndex 0 and compare it with the first element of the out list of G
for node in G.nodes():
    print(dag.query(node, 0) == G.nodes[node]['out'][0])

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
