In [9]:
import random
import networkx as nx
import matplotlib.pyplot as plt

In [10]:
MIN_PER_RANK = 1  # Nodes/Rank: How 'fat' the DAG should be.
MAX_PER_RANK = 5
MIN_RANKS = 3     # Ranks: How 'tall' the DAG should be.
MAX_RANKS = 5
PERCENT = 30      # Chance of having an Edge.

def generate_dag(min_w, max_w, total_nodes): # min_w, max_w: node values range, total_nodes: total number of nodes
    random.seed()  # Initialize the random number generator

    G = nx.DiGraph()

    current_nodes = 0 # Total number of nodes in the graph
    ranks = [] # Number of nodes in each rank

    # Generate ranks with nodes until the total number of nodes is reached
    while current_nodes < total_nodes:
        new_nodes = min(MAX_PER_RANK, total_nodes - current_nodes) # Number of nodes in the new rank
        ranks.append(new_nodes) # Add the new rank to the list of ranks
        current_nodes += new_nodes # Update the total number of nodes

    nodes = 1 # Total number of nodes in the graph starts from 1

    for rank in ranks:
        for k in range(rank):
            # Assign a random weight to each new node
            node_weight = random.randint(min_w, max_w)
            G.add_node(nodes + k, weight=node_weight)

        # Edges from old nodes ('nodes') to new ones ('rank').
        for j in range(nodes - 1): # Adjusted to start from 0
            for k in range(rank):
                if random.randint(0, 99) < PERCENT: # Randomly decide if there is an edge between the nodes
                    G.add_edge(j + 1, k + nodes) # Adjusted to start from 1

        nodes += rank  # Accumulate into old node set.

    # remove isolated nodes
    G.remove_nodes_from(list(nx.isolates(G)))

    root_id = 0 # Root node is 0

    roots = [node for node in G.nodes() if G.in_degree(node) == 0] # Find the root nodes
    for _ in roots:
        G.add_edge(root_id, _) # Add an edge from the root node to each root node

    for node in G.nodes():
        G.nodes[node]['ranges'] = [(0,0)] # Initialize the ranges of the node

    ## root node has weight 0
    G.nodes[root_id]['weight'] = 0

    return G

In [11]:
def draw_graph(G):
    plt.figure(figsize=(10, 10))
    pos = nx.spring_layout(G, iterations=100, scale=4, k=0.5, center=(0,0))
    nx.draw(G, pos, with_labels=False, node_size=500, font_size=10, node_color='skyblue')
    node_labels = nx.get_node_attributes(G, 'weight')
    nx.draw_networkx_labels(G, pos, labels=node_labels)

    root = [node for node in G.nodes() if G.in_degree(node) == 0][0]
    nx.draw_networkx_nodes(G, pos, nodelist=[root], node_color='red', node_size=500)

    plt.show()

In [12]:
# def merge_ranges_and_remove_none(ranges):

#     sorted_ranges = sorted(ranges, key = lambda x: x[0])
#     merged_ranges = [sorted_ranges[0]]
#     for current_range in sorted_ranges[1:]:
#         if current_range[0] <= merged_ranges[-1][1] + 1:
#             merged_ranges[-1] = (merged_ranges[-1][0], max(merged_ranges[-1][1], current_range[1]))
#         else:
#             merged_ranges.append(current_range)
#     return [current_range for current_range in merged_ranges if current_range[0] != 0 or current_range[1] != 0]

def merge_ranges_and_remove_none(ranges):
    """
    This function merges overlapping ranges (merges iff the ranges overlap and the second entry is the same) and removes (0, 0) ranges.
    It takes a list of tuples (ranges) as input where each tuple represents a range (start, end).
    It returns a list of merged and filtered ranges.
    """
    sorted_ranges = sorted(ranges or [(0, 0)])  # Handle empty input and sort
    merged = [sorted_ranges[0]]                 # Start with the first range

    for start, end in sorted_ranges[1:]:        # Iterate through the rest
        last_start, last_end = merged[-1]       # Get the last merged range
        if start <= last_end + 1 and end == last_end:  # Check for overlap and same second entry
            merged[-1] = (last_start, end)      # Update the last range
        else:
            merged.append((start, end))         # Add a new non-overlapping range

    return [(s, e) for s, e in merged if s or e]  # Filter out (0, 0) ranges.


def compute_and_associate_ranges(G, node_id):
    """
    This function computes and associates ranges for a given node in a graph.
    It takes a networkx graph (G) and a node_id as input.
    It returns a list of ranges associated with the node.
    """

    ranges = []
    predecessors = list(G.predecessors(node_id))
    for predecessor in predecessors:
        for predecessor_range in G.nodes[predecessor]['ranges']:
            ranges.append((predecessor_range[1] + 1, predecessor_range[1] + G.nodes[node_id]['weight']))
        ranges = merge_ranges_and_remove_none(ranges)

    return ranges

def print_node_ranges(G):
    """
    This function prints the ranges associated with each node in a graph.
    It takes a networkx graph (G) as input.
    It doesn't return anything.
    """
    gen = nx.topological_generations(G)
    for i, generation in enumerate(gen):
        print(f"Level {i}: {generation}")

    print("\nNode ranges:\n")

    for node in G.nodes():
        print(f"Node: {node}")
        for node_range in G.nodes[node]['ranges']:
            print(f"\tRange: {node_range}")

def process_graph(G, print_output=False) -> nx.DiGraph:
    """
    This function processes a graph by computing and associating ranges for each node.
    It takes a networkx graph (G) and a boolean flag (print_output) as input.
    If print_output is True, it prints the ranges associated with each node.
    It returns the processed graph.
    """

    range_counter(G) # Pre-compute the number of ranges associated with each node

    for node in list(nx.topological_sort(G))[1:]: # Skip the root node (0)
        G.nodes[node]['ranges'] = compute_and_associate_ranges(G, node) # Compute and associate ranges for each node

    if print_output: print_node_ranges(G)

    return G

def remove_half_ranges(G) -> nx.DiGraph:
    """
    This function removes ranges associated with every alternate generation of nodes in a graph.
    It takes a networkx graph (G) as input.
    It returns a copy of the graph with ranges removed from every alternate generation of nodes.
    """
    G_half = G.copy()

    range_counter(G_half) # Pre-compute the number of ranges associated with each node

    for generation in list(nx.topological_generations(G_half))[2::2]: # Skip the root node (0) and the first level and alternate generations
        for node in generation: # Iterate through the nodes in the generation
            G_half.nodes[node]['ranges'] = [(0,0)] # Remove the ranges associated with the node
    return G_half

def get_node_ranges(G, node_id):
    """
    This function retrieves the ranges associated with a given node in a graph.
    It takes a networkx graph (G) and a node_id as input.
    If the node doesn't have any ranges associated with it, it computes the ranges.
    It returns the ranges associated with the node.
    """
    if G.nodes[node_id]['ranges'] == [(0,0)] and node_id != 0:
        print("Node has no ranges, computing ranges...")
        return compute_and_associate_ranges(G, node_id)

    print("Node already has ranges, returning them...")
    return G.nodes[node_id]['ranges']

def range_counter(G):
    """
    This function pre-computes the number of ranges associated with a node. For each node, the number of ranges is equal to the sum of number of ranges of its predecessors.
    It takes a networkx graph (G) as input and doesn't return anything.
    """

    # Initialize the root node with a range count of 1
    G.nodes[0]['range_count'] = 1

    for node in list(nx.topological_sort(G))[1:]: # Skip the root node (0)
        predecessors = list(G.predecessors(node))
        G.nodes[node]['range_count'] = 0
        for predecessor in predecessors:
            G.nodes[node]['range_count'] += G.nodes[predecessor]['range_count']

Col problema che posso mergiarli sse i massimi sono uguali (tenendo il minimo dei due minimi), ho tantissimi range contigui. Un modo per occupare meno spazio potrebbe essere per tutti i range con massimi contigui, salvare una solo tupla che ha come minimo, il minimo dei minimi e come massimo un range che va dal massimo del primo range al massimo dell'ultimo range. In questo modo se ho n range contigui, al posto di storarne n tuple di 2 elementi, ovvero 2*n elementi, posso storarne solo 3 (min, [max1, max2]).

Per valori piccoli dei nodi, questa cosa diventa molto efficiente.

Se invece ad ogni nodo associo un peso random con un range di valori molto ampio, diventa raro che ci siano tanti nodi con range contigui e perdiamo l'efficienza di questo metodo. 

In [13]:
G = process_graph(generate_dag(1, 10, 20), print_output=True)

Level 0: [0]
Level 1: [1, 2, 3, 4, 5, 9]
Level 2: [6, 7, 8, 10]
Level 3: [11, 12, 15, 13, 14, 18]
Level 4: [17, 16, 19, 20]

Node ranges:

Node: 1
	Range: (1, 9)
Node: 2
	Range: (1, 6)
Node: 3
	Range: (1, 2)
Node: 4
	Range: (1, 2)
Node: 5
	Range: (1, 9)
Node: 6
	Range: (3, 6)
Node: 7
	Range: (3, 8)
Node: 8
	Range: (3, 4)
	Range: (10, 11)
Node: 9
	Range: (1, 2)
Node: 10
	Range: (7, 16)
	Range: (10, 19)
Node: 11
	Range: (5, 11)
	Range: (9, 15)
	Range: (10, 16)
	Range: (12, 18)
Node: 12
	Range: (3, 9)
	Range: (5, 11)
	Range: (9, 15)
	Range: (12, 18)
Node: 13
	Range: (5, 6)
	Range: (12, 13)
	Range: (17, 18)
	Range: (20, 21)
Node: 14
	Range: (3, 4)
	Range: (9, 10)
	Range: (10, 11)
	Range: (17, 18)
	Range: (20, 21)
Node: 15
	Range: (3, 4)
	Range: (5, 6)
	Range: (9, 10)
	Range: (10, 11)
	Range: (12, 13)
Node: 16
	Range: (5, 9)
	Range: (7, 11)
	Range: (9, 13)
	Range: (10, 14)
	Range: (12, 16)
	Range: (14, 18)
	Range: (16, 20)
	Range: (17, 21)
	Range: (19, 23)
	Range: (22, 26)
Node: 17
	Range: 

Rimuoviamo metà dei range dal nodo, salvando i range solo un livello si e un livello no in topological generation order. In questo modo, per un nodo a cui non ho associato un range, posso calcolarlo usando _solo_ i suoi predecessori (che sappiamo essere tutti nel livello precedente, che ha i range esplicitamente computati). In questo modo il costo dell'operazione dipende dal numero di predecessori del nodo e dal numero di range associati ad ognuno di questi (calcolare un range poi diventa fare due operazioni atomiche). Quindi il costo totale per calcolare i range di un singolo nodo in livello non computato è O(|n.predecessors| + |numero totale di range nell'insieme dei predecessori|)

> TODO: raffinare la complessità


**NOTA:** Se scelgo questo approccio di pre-computare un livello si e un livello no, dovrò pure storare delle informazioni riguardanti i predecessori dei nodi nei livelli non computati. Questa cosa non dovrebbe prendere troppo spazio in quanto per ogni nodo di un livello non computato dovrò salvare solo i suoi predecessori, che è facilmente comprimibile.

In [14]:
G_half = remove_half_ranges(G)
print_node_ranges(G_half)

Level 0: [0]
Level 1: [1, 2, 3, 4, 5, 9]
Level 2: [6, 7, 8, 10]
Level 3: [11, 12, 15, 13, 14, 18]
Level 4: [17, 16, 19, 20]

Node ranges:

Node: 1
	Range: (1, 9)
Node: 2
	Range: (1, 6)
Node: 3
	Range: (1, 2)
Node: 4
	Range: (1, 2)
Node: 5
	Range: (1, 9)
Node: 6
	Range: (0, 0)
Node: 7
	Range: (0, 0)
Node: 8
	Range: (0, 0)
Node: 9
	Range: (1, 2)
Node: 10
	Range: (0, 0)
Node: 11
	Range: (5, 11)
	Range: (9, 15)
	Range: (10, 16)
	Range: (12, 18)
Node: 12
	Range: (3, 9)
	Range: (5, 11)
	Range: (9, 15)
	Range: (12, 18)
Node: 13
	Range: (5, 6)
	Range: (12, 13)
	Range: (17, 18)
	Range: (20, 21)
Node: 14
	Range: (3, 4)
	Range: (9, 10)
	Range: (10, 11)
	Range: (17, 18)
	Range: (20, 21)
Node: 15
	Range: (3, 4)
	Range: (5, 6)
	Range: (9, 10)
	Range: (10, 11)
	Range: (12, 13)
Node: 16
	Range: (0, 0)
Node: 17
	Range: (0, 0)
Node: 18
	Range: (3, 12)
	Range: (17, 26)
	Range: (20, 29)
Node: 19
	Range: (0, 0)
Node: 20
	Range: (0, 0)
Node: 0
	Range: (0, 0)


In [15]:
gen = nx.topological_generations(G)

for i, generation in enumerate(gen):
    print(f"Level {i}: {generation}")

Level 0: [0]
Level 1: [1, 2, 3, 4, 5, 9]
Level 2: [6, 7, 8, 10]
Level 3: [11, 12, 15, 13, 14, 18]
Level 4: [17, 16, 19, 20]


In [16]:
gen = nx.topological_generations(G)

for i, generation in enumerate(gen):
    print(f"Level {i}: {generation}\n")
    for node in generation:
        print(f"Node: {node}, Range count: {G.nodes[node]['range_count']}")
    print("\n")

Level 0: [0]

Node: 0, Range count: 1


Level 1: [1, 2, 3, 4, 5, 9]

Node: 1, Range count: 1
Node: 2, Range count: 1
Node: 3, Range count: 1
Node: 4, Range count: 1
Node: 5, Range count: 1
Node: 9, Range count: 1


Level 2: [6, 7, 8, 10]

Node: 6, Range count: 1
Node: 7, Range count: 1
Node: 8, Range count: 3
Node: 10, Range count: 2


Level 3: [11, 12, 15, 13, 14, 18]

Node: 11, Range count: 5
Node: 12, Range count: 6
Node: 15, Range count: 7
Node: 13, Range count: 5
Node: 14, Range count: 5
Node: 18, Range count: 3


Level 4: [17, 16, 19, 20]

Node: 17, Range count: 26
Node: 16, Range count: 20
Node: 19, Range count: 27
Node: 20, Range count: 6


