Copyright **`(c)`** 2024 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# Set Cover problem

See: https://en.wikipedia.org/wiki/Set_cover_problem

In [33]:
from random import random, seed
from itertools import product
import numpy as np

from icecream import ic

## Reproducible Initialization

If you want to get reproducible results, use `rng` (and restart the kernel); for non-reproducible ones, use `np.random`.

In [34]:
UNIVERSE_SIZE = 10000
NUM_SETS = 1000
DENSITY = 0.1

rng = np.random.Generator(np.random.PCG64([UNIVERSE_SIZE, NUM_SETS, int(10_000 * DENSITY)]))

In [35]:
# DON'T EDIT THESE LINES!

SETS = np.random.random((NUM_SETS, UNIVERSE_SIZE)) < DENSITY
for s in range(UNIVERSE_SIZE):
    if not np.any(SETS[:, s]):
        SETS[np.random.randint(NUM_SETS), s] = True
COSTS = np.pow(SETS.sum(axis=1), 1.1)

## Helper Functions

In [36]:
def valid(solution):
    """Checks wether solution is valid (ie. covers all universe)"""
    return np.all(np.logical_or.reduce(SETS[solution]))


def cost(solution):
    """Returns the cost of a solution (to be minimized)"""
    return COSTS[solution].sum()

## Have Fun!

In [37]:
# A dumb solution of "all" sets
solution = np.full(NUM_SETS, True)
valid(solution), cost(solution)

(np.True_, np.float64(1995860.2608245336))

In [38]:
# A random solution with random 50% of the sets
solution = rng.random(NUM_SETS) < .5
valid(solution), cost(solution)

(np.True_, np.float64(1047404.859237449))

# Greedy Algorithm
feasible with small numbers 
(UNIVERSE_SIZE = 10000,
NUM_SETS = 1000,
DENSITY = 0.1)

(source: I used the algorithm from this website https://www.geeksforgeeks.org/greedy-approximate-algorithm-for-set-cover-problem/)

In [39]:
def set_cover_greedy(SETS, COSTS):
    universe = set(range(SETS.shape[1])) #all elements
    covered = set()                      #covered elements
    selected = []                        #indexes of selected subsets

    while covered!=universe:
        best_subset = None
        best_ratio = float("inf")

        for i in range(len(SETS)):
            subset = SETS[i]                            
            subset_elements = set(np.where(subset)[0]) #for each subset
            new_elements = subset_elements - covered   #I take the uncovered elements

            if new_elements:                  #if there are uncovered elements  
                current_cost = COSTS[i]       #I compute the ration cost/coverage
                coverage = len(new_elements)
                ratio = current_cost/coverage

                if ratio < best_ratio:        #if the ratio is the best to this moment
                    best_ratio = ratio        #I choose the current as best subset
                    best_subset = i

        if best_subset is not None:
            selected.append(best_subset)
            covered.update(set(np.where(SETS[best_subset])[0]))
        else:
            raise RuntimeError("No valid subset found. The problem may not be solvable with these sets and costs.")
        
        ic(valid(selected), cost(selected))
    
    return selected

In [40]:
solution = set_cover_greedy(SETS=SETS, COSTS=COSTS)
ic(valid(solution), cost(solution))

ic| valid(selected): np.False_
    cost(selected): np.float64(1802.9941688882707)
ic| valid(selected): np.False_
    cost(selected): np.float64(3756.5953452790673)
ic| valid(selected): np.False_
    cost(selected): np.float64(5721.152220468346)
ic| valid(selected): np.False_
    cost(selected): np.float64(7644.107221659979)
ic| valid(selected): np.False_
    cost(selected): np.float64(9718.522636328327)
ic| valid(selected): np.False_
    cost(selected): np.float64(11650.229188488529)
ic| valid(selected): np.False_
    cost(selected): np.float64(13658.66417826467)
ic| valid(selected): np.False_
    cost(selected): np.float64(15667.099168040811)
ic| valid(selected): np.False_
    cost(selected): np.float64(17662.361483009692)
ic| valid(selected): np.False_
    cost(selected): np.float64(19769.83918174949)
ic| valid(selected): np.False_
    cost(selected): np.float64(21771.686849124988)
ic| valid(selected): np.False_
    cost(selected): np.float64(23729.669636562503)
ic| valid(selected): 

(np.True_, np.float64(87154.36795453311))