### Problem generator
Problem generator as given by professor, with default N values.

In [18]:
N_VALUES = [5, 10, 20, 100, 500, 1000]
SEED = 42

import random

def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

### Greedy solution
Greed solution as given by professor.

In [19]:
import logging

def greedy(N):
    goal = set(range(N))
    covered = set()
    solution = list()
    nodes = 0
    all_lists = sorted(problem(N, seed=SEED), key=lambda l: len(l))
    while goal != covered:
        x = all_lists.pop(0)
        nodes+=1
        if not set(x) < covered:
            solution.append(x)
            covered |= set(x)

    logging.info(
        f"Greedy solution for N={N}: w={sum(len(_) for _ in solution)} (bloat={(sum(len(_) for _ in solution)-N)/N*100:.0f}%, Nodes visited: {nodes})"
    )
    logging.debug(f"{solution}")

In [20]:
logging.getLogger().setLevel(logging.INFO)
for N in N_VALUES:
    greedy(N)

INFO:root:Greedy solution for N=5: w=5 (bloat=0%, Nodes visited: 13)
INFO:root:Greedy solution for N=10: w=13 (bloat=30%, Nodes visited: 14)
INFO:root:Greedy solution for N=20: w=46 (bloat=130%, Nodes visited: 14)
INFO:root:Greedy solution for N=100: w=332 (bloat=232%, Nodes visited: 23)
INFO:root:Greedy solution for N=500: w=2162 (bloat=332%, Nodes visited: 28)
INFO:root:Greedy solution for N=1000: w=4652 (bloat=365%, Nodes visited: 27)


### "longest first" Greedy
Greedy solution as seen in the professor version, just starting from the longest random list first.
Performs generally worse, fairly better in just one case, N = 20. It is probably a random consequence of the list generation.

In [21]:
def longest_greedy(N):
    goal = set(range(N))
    covered = set()
    solution = list()
    nodes=0
    all_lists = sorted(problem(N, seed=SEED), key=lambda l: len(l))
    first_iter = True
    while goal != covered:
        if first_iter:
            x = x = all_lists.pop()
            nodes+=1
            first_iter = False
        else:
            x = all_lists.pop(0)
            nodes+=1
        if not set(x) < covered:
            solution.append(x)
            covered |= set(x)

    logging.info(
            f"Alternate greedy version for N={N}: w={sum(len(_) for _ in solution)} (bloat={(sum(len(_) for _ in solution)-N)/N*100:.0f}%, Nodes visited: {nodes})")


logging.getLogger().setLevel(logging.INFO)
for N in N_VALUES:
   longest_greedy(N)

INFO:root:Alternate greedy version for N=5: w=5 (bloat=0%, Nodes visited: 5)
INFO:root:Alternate greedy version for N=10: w=14 (bloat=40%, Nodes visited: 15)
INFO:root:Alternate greedy version for N=20: w=36 (bloat=80%, Nodes visited: 15)
INFO:root:Alternate greedy version for N=100: w=340 (bloat=240%, Nodes visited: 19)
INFO:root:Alternate greedy version for N=500: w=2187 (bloat=337%, Nodes visited: 28)
INFO:root:Alternate greedy version for N=1000: w=4699 (bloat=370%, Nodes visited: 28)


### Different solution
Another possible solution with the implementation of Dijkstra search.
Note - this is incomplete.

In [22]:
class Graph:
    def __init__(self, num_vert):
        self.v = num_vert
        self.edges = [[-1 for i in range(num_vert)] for j in range(num_vert)]
        self.visited = []

    def add_edge(self, u, v, weight):
        self.edges[u][v] = weight
        self.edges[v][u] = weight

def create_graph(graph, problem_list):
    for i in graph.v:
        for j in graph.v:
            graph.add_edge(i, j, len(problem_list[j]))

In [23]:
from queue import PriorityQueue

def problem_start(N):
    p = sorted(problem(N, seed=SEED), key=lambda l: len(l))
    g = Graph(len(p))
    create_graph(g, p)

    return g, p

def dijkstra(graph, problem, start_vertex):
    D = {v:float('inf') for v in range(graph.v)}
    D[start_vertex] = 0

    pq = PriorityQueue()
    pq.put((0, start_vertex))

    while not pq.empty():
        (dist, current_vertex) = pq.get()
        graph.visited.append(current_vertex)

        for neighbor in range(graph.v):
            if graph.edges[current_vertex][neighbor] != -1:
                distance = graph.edges[current_vertex][neighbor]
                if neighbor not in graph.visited:
                    old_cost = D[neighbor]
                    new_cost = D[current_vertex] + distance
                    if new_cost < old_cost:
                        pq.put((new_cost, neighbor))
                        D[neighbor] = new_cost
    return D