In [45]:
import networkx as nx
import matplotlib.pyplot as plt
import random
import sys
from pyEliasFano import EliasFano, UniformlyPartitionedEliasFano

In [46]:
MIN_PER_RANK = 1  # Nodes/Rank: How 'fat' the DAG should be.
MAX_PER_RANK = 5
MIN_RANKS = 3     # Ranks: How 'tall' the DAG should be.
MAX_RANKS = 5
PERCENT = 30      # Chance of having an Edge.

In [47]:
def generate_dag(min_w, max_w, total_nodes): # min_w, max_w: node values range, total_nodes: total number of nodes
    random.seed()  # Initialize the random number generator

    G = nx.DiGraph()

    current_nodes = 0 # Total number of nodes in the graph
    ranks = [] # Number of nodes in each rank

    # Generate ranks with nodes until the total number of nodes is reached
    while current_nodes < total_nodes:
        new_nodes = min(MAX_PER_RANK, total_nodes - current_nodes) # type: ignore # Number of nodes in the new rank
        ranks.append(new_nodes) # Add the new rank to the list of ranks
        current_nodes += new_nodes # Update the total number of nodes

    nodes = 1 # Total number of nodes in the graph starts from 1

    for rank in ranks:
        for k in range(rank):
            # Assign a random weight to each new node
            node_weight = random.randint(min_w, max_w)
            G.add_node(nodes + k, weight=node_weight)

        # Edges from old nodes ('nodes') to new ones ('rank').
        for j in range(nodes - 1): # Adjusted to start from 0
            for k in range(rank):
                if random.randint(0, 99) < PERCENT: # type: ignore # Randomly decide if there is an edge between the nodes
                    G.add_edge(j + 1, k + nodes) # Adjusted to start from 1

        nodes += rank  # Accumulate into old node set.

    # remove isolated nodes
    G.remove_nodes_from(list(nx.isolates(G)))

    root_id = 0 # Root node is 0

    roots = [node for node in G.nodes() if G.in_degree(node) == 0] # Find the root nodes
    for _ in roots:
        G.add_edge(root_id, _) # Add an edge from the root node to each root node

    # add an attribute 'out' to the root node as a set containing 0
    G.nodes[root_id]['out'] = []
    G.nodes[root_id]['out'].append(0)
    G.nodes[root_id]['weight'] = 0

    return G

def compute_node_out(G, node_id):
    node_weight = G.nodes[node_id]['weight']
    predecessors = list(G.predecessors(node_id))

    node_out = []
    for predecessor in predecessors:
        predecessor_out = G.nodes[predecessor]['out']
        # Extend the list instead of union
        node_out.extend(predecessor_out)

    # Ensure uniqueness and sort
    node_out = sorted(set([x + node_weight for x in node_out]))
    return node_out


def optimal_father(G, node_id):
    G.nodes[node_id]['optimal_father'] = None
    successors = list(G.successors(node_id))
    if len(successors) == 0:
        return

    max_out = -1
    for successor in successors:
        successor_out = G.nodes[successor]['out']
        if len(successor_out) > max_out:
            max_out = len(successor_out)
            G.nodes[node_id]['optimal_father'] = successor

def offset(G, node_id):
    optimal_father = G.nodes[node_id]['optimal_father']
    father_weight = G.nodes[optimal_father]['weight']
    father_out = G.nodes[optimal_father]['out']
    node_out = G.nodes[node_id]['out']

    offset = [] #list of indexes of the father_out that are in node_out
    for i in range(len(father_out)):
        if father_out[i] - father_weight in node_out:
            offset.append(i)

    return offset


def process_graph(G):
    for node in list(nx.topological_sort(G))[1::]: # the root is already initialized with (0)
        G.nodes[node]['out'] = compute_node_out(G, node)
    for node in G.nodes():
        optimal_father(G, node)
    for node in G.nodes():
        if G.nodes[node]['optimal_father'] is not None:
            G.nodes[node]['offset'] = offset(G, node)
        else:
            G.nodes[node]['offset'] = None

In [48]:
def compressed_graph(G) -> nx.DiGraph:
    G_compressed = G.copy()
    for node in G_compressed.nodes():
            if G_compressed.nodes[node]['offset'] is not None:
                offset = G_compressed.nodes[node]['offset']
            ef0 = EliasFano(offset)
            G_compressed.nodes[node]['offset'] = ef0
    return G_compressed

In [49]:
G = generate_dag(1, 10, 30)
process_graph(G)
for node in G.nodes():
    print(f"Node {node}: {G.nodes[node]}")

Node 1: {'weight': 2, 'out': [2], 'optimal_father': 30, 'offset': [0]}
Node 2: {'weight': 3, 'out': [3], 'optimal_father': 22, 'offset': [2]}
Node 3: {'weight': 1, 'out': [1], 'optimal_father': 22, 'offset': [0]}
Node 4: {'weight': 6, 'out': [6], 'optimal_father': 29, 'offset': [3]}
Node 5: {'weight': 4, 'out': [4], 'optimal_father': 26, 'offset': [2]}
Node 6: {'weight': 3, 'out': [4, 6, 9], 'optimal_father': 27, 'offset': [0, 2, 4]}
Node 7: {'weight': 1, 'out': [2], 'optimal_father': 26, 'offset': [0]}
Node 8: {'weight': 5, 'out': [7], 'optimal_father': 29, 'offset': [4]}
Node 9: {'weight': 1, 'out': [3], 'optimal_father': 17, 'offset': [2]}
Node 10: {'weight': 2, 'out': [5, 8], 'optimal_father': 29, 'offset': [2, 5]}
Node 11: {'weight': 4, 'out': [6, 7], 'optimal_father': 29, 'offset': [3, 4]}
Node 12: {'weight': 7, 'out': [9, 12, 15], 'optimal_father': 21, 'offset': [5, 8, 10]}
Node 13: {'weight': 2, 'out': [5, 6, 8, 11], 'optimal_father': None, 'offset': None}
Node 14: {'weight': 3

In [50]:
G_compressed = compressed_graph(G)
for node in G_compressed.nodes():
    print(f"Node {node}: {G_compressed.nodes[node]}")

Node 1: {'weight': 2, 'out': [2], 'optimal_father': 30, 'offset': <pyEliasFano.EliasFano.EliasFano object at 0x70e7cc602cb0>}
Node 2: {'weight': 3, 'out': [3], 'optimal_father': 22, 'offset': <pyEliasFano.EliasFano.EliasFano object at 0x70e7cc6026b0>}
Node 3: {'weight': 1, 'out': [1], 'optimal_father': 22, 'offset': <pyEliasFano.EliasFano.EliasFano object at 0x70e7cc6029b0>}
Node 4: {'weight': 6, 'out': [6], 'optimal_father': 29, 'offset': <pyEliasFano.EliasFano.EliasFano object at 0x70e7cc602050>}
Node 5: {'weight': 4, 'out': [4], 'optimal_father': 26, 'offset': <pyEliasFano.EliasFano.EliasFano object at 0x70e7cc600190>}
Node 6: {'weight': 3, 'out': [4, 6, 9], 'optimal_father': 27, 'offset': <pyEliasFano.EliasFano.EliasFano object at 0x70e7cc6015d0>}
Node 7: {'weight': 1, 'out': [2], 'optimal_father': 26, 'offset': <pyEliasFano.EliasFano.EliasFano object at 0x70e7cc603520>}
Node 8: {'weight': 5, 'out': [7], 'optimal_father': 29, 'offset': <pyEliasFano.EliasFano.EliasFano object at 0x7

In [51]:
class Node:
    def __init__(self, node_id, weight, successor_id=None, offset=None):
        self.node_id = node_id
        self.weight = weight
        self.successor_id = successor_id
        self.offset = offset

    def __repr__(self):
        return (f"Node(id={self.node_id}, weight={self.weight}, "
                f"successor_id={self.successor_id}, offset={self.offset})")

class DAG:
    def __init__(self):
        self.nodes = {}

    def add_node(self, node_id, weight, successor_id=None, offset=None):
        if node_id in self.nodes:
            raise ValueError(f"Node with id {node_id} already exists.")
        new_node = Node(node_id, weight, successor_id, offset)
        self.nodes[node_id] = new_node

    def remove_node(self, node_id):
        if node_id not in self.nodes:
            raise ValueError(f"Node with id {node_id} does not exist.")
        del self.nodes[node_id]

    def get_node(self, node_id):
        return self.nodes.get(node_id)

    def __repr__(self):
        return '\n'.join(str(node) for node in self.nodes.values())

    # Query functions
    def query(self, node_id, index):
        node = self.get_node(node_id)
        if node is None:
            raise ValueError(f"Node with id {node_id} does not exist.")
        if node.successor_id is None:
            return node.offset.select(index) # offset is an EliasFano object, select is a method of EliasFano that returns the index-th element

        sum_weights = 0
        current_node = node
        while current_node.successor_id is not None:
            index = current_node.offset.select(index)
            current_node = self.get_node(current_node.successor_id)
            sum_weights += current_node.weight
        return current_node.offset.select(index) - sum_weights  # Use current_node directly

In [52]:
dag = DAG()
for node in G_compressed.nodes():
    # if the node has a successor, then do not represent its explicit set out, but just the offset
    if G_compressed.nodes[node]['optimal_father'] is not None:
        dag.add_node(node, G_compressed.nodes[node]['weight'], G_compressed.nodes[node]['optimal_father'], G_compressed.nodes[node]['offset'])
    # if the node has no successor, then represent its explicit set out
    else:
        out = G_compressed.nodes[node]['out']
        ef0 = EliasFano(out)
        dag.add_node(node, G_compressed.nodes[node]['weight'], None, ef0)

print(dag)

Node(id=1, weight=2, successor_id=30, offset=<pyEliasFano.EliasFano.EliasFano object at 0x70e7cc602cb0>)
Node(id=2, weight=3, successor_id=22, offset=<pyEliasFano.EliasFano.EliasFano object at 0x70e7cc6026b0>)
Node(id=3, weight=1, successor_id=22, offset=<pyEliasFano.EliasFano.EliasFano object at 0x70e7cc6029b0>)
Node(id=4, weight=6, successor_id=29, offset=<pyEliasFano.EliasFano.EliasFano object at 0x70e7cc602050>)
Node(id=5, weight=4, successor_id=26, offset=<pyEliasFano.EliasFano.EliasFano object at 0x70e7cc600190>)
Node(id=6, weight=3, successor_id=27, offset=<pyEliasFano.EliasFano.EliasFano object at 0x70e7cc6015d0>)
Node(id=7, weight=1, successor_id=26, offset=<pyEliasFano.EliasFano.EliasFano object at 0x70e7cc603520>)
Node(id=8, weight=5, successor_id=29, offset=<pyEliasFano.EliasFano.EliasFano object at 0x70e7cc6011e0>)
Node(id=9, weight=1, successor_id=17, offset=<pyEliasFano.EliasFano.EliasFano object at 0x70e7cc601390>)
Node(id=10, weight=2, successor_id=29, offset=<pyEliasF

In [53]:
# for every node of the DAG, return the size in bits of the offset and compare it with the size in bits of the explicit set out. Use sys.getsizeof() to get the size in bytes of the EliasFano object
for node in dag.nodes.values():
    offset_size = sys.getsizeof(node.offset)
    if node.successor_id is None:
        out_size = sys.getsizeof(node.offset)
    else:
        out_size = sys.getsizeof(G_compressed.nodes[node.node_id]['out'])
    print(f"Node {node.node_id}: offset size: {offset_size}, out size: {out_size}")

Node 1: offset size: 48, out size: 72
Node 2: offset size: 48, out size: 72
Node 3: offset size: 48, out size: 72
Node 4: offset size: 48, out size: 72
Node 5: offset size: 48, out size: 72
Node 6: offset size: 48, out size: 88
Node 7: offset size: 48, out size: 72
Node 8: offset size: 48, out size: 72
Node 9: offset size: 48, out size: 72
Node 10: offset size: 48, out size: 72
Node 11: offset size: 48, out size: 72
Node 12: offset size: 48, out size: 88
Node 13: offset size: 48, out size: 48
Node 14: offset size: 48, out size: 88
Node 15: offset size: 48, out size: 88
Node 16: offset size: 48, out size: 88
Node 17: offset size: 48, out size: 48
Node 18: offset size: 48, out size: 120
Node 19: offset size: 48, out size: 152
Node 20: offset size: 48, out size: 120
Node 21: offset size: 48, out size: 48
Node 22: offset size: 48, out size: 184
Node 23: offset size: 48, out size: 120
Node 24: offset size: 48, out size: 72
Node 25: offset size: 48, out size: 48
Node 26: offset size: 48, out

In [54]:
for node in list(nx.topological_sort(G_compressed)):
    for i in range(len(G_compressed.nodes[node]['out'])):
        print(dag.query(node, i), G_compressed.nodes[node]['out'][i])

0 0
2 2
3 3
1 1
6 6
4 4
7 7
3 3
2 2
7 7
9 9
4 4
6 6
9 9
5 5
8 8
6 6
9 9
10 10
6 6
7 7
5 5
6 6
8 8
11 11
9 9
12 12
15 15
10 10
12 12
14 14
17 17
5 5
7 7
9 9
10 10
11 11
13 13
14 14
15 15
16 16
17 17
18 18
19 19
7 7
8 8
10 10
11 11
14 14
15 15
16 16
17 17
18 18
19 19
20 20
7 7
8 8
9 9
12 12
13 13
15 15
16 16
18 18
20 20
21 21
23 23
3 3
4 4
5 5
7 7
8 8
10 10
11 11
13 13
15 15
16 16
18 18
12 12
14 14
16 16
18 18
19 19
21 21
24 24
10 10
11 11
13 13
14 14
15 15
17 17
18 18
20 20
22 22
23 23
25 25
4 4
5 5
6 6
8 8
9 9
10 10
11 11
12 12
13 13
14 14
16 16
17 17
19 19
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
15 15
17 17
18 18
20 20
12 12
13 13
14 14
15 15
16 16
17 17
18 18
19 19
20 20
21 21
23 23
24 24
25 25
26 26
27 27
28 28
29 29
14 14
15 15
16 16
17 17
19 19
20 20
21 21
22 22
23 23
24 24
25 25
26 26
27 27
28 28
29 29
31 31
34 34
9 9
10 10
11 11
12 12
13 13
14 14
15 15
16 16
17 17
18 18
19 19
20 20
21 21
22 22
23 23
24 24
25 25
26 26
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 1

In [59]:
sys.getsizeof(G_compressed)

48