In [40]:
from itertools import product
from random import random, randint, shuffle, seed
import numpy as np
from scipy import sparse

In [41]:
def make_set_covering_problem(num_points, num_sets, density):
    """Returns a sparse array where rows are sets and columns are the covered items"""
    seed(num_points*2654435761+num_sets+density)
    sets = sparse.lil_array((num_sets, num_points), dtype=bool)
    for s, p in product(range(num_sets), range(num_points)):
        if random() < density:
            sets[s, p] = True
    for p in range(num_points):
        sets[randint(0, num_sets-1), p] = True
    return sets

In [42]:
# some print for me to understand
sets = make_set_covering_problem(10,20, 0.3)
print(sets)
print(sets.shape[0])
print(sets.getrow(0).toarray().ravel())

  (0, 1)	True
  (0, 5)	True
  (0, 9)	True
  (1, 0)	True
  (1, 4)	True
  (1, 5)	True
  (1, 7)	True
  (1, 8)	True
  (2, 0)	True
  (2, 3)	True
  (2, 4)	True
  (2, 5)	True
  (3, 1)	True
  (3, 4)	True
  (3, 5)	True
  (3, 6)	True
  (4, 4)	True
  (5, 5)	True
  (5, 8)	True
  (6, 1)	True
  (6, 8)	True
  (7, 0)	True
  (7, 3)	True
  (7, 4)	True
  (7, 8)	True
  (7, 9)	True
  (8, 0)	True
  (8, 1)	True
  (8, 2)	True
  (8, 6)	True
  (9, 7)	True
  (10, 0)	True
  (10, 1)	True
  (10, 3)	True
  (10, 4)	True
  (10, 8)	True
  (11, 1)	True
  (11, 2)	True
  (11, 3)	True
  (11, 5)	True
  (11, 6)	True
  (11, 7)	True
  (11, 8)	True
  (11, 9)	True
  (12, 1)	True
  (12, 7)	True
  (12, 9)	True
  (13, 1)	True
  (13, 2)	True
  (13, 4)	True
  (14, 2)	True
  (14, 5)	True
  (14, 6)	True
  (14, 7)	True
  (14, 8)	True
  (15, 3)	True
  (15, 5)	True
  (15, 6)	True
  (15, 7)	True
  (15, 8)	True
  (16, 1)	True
  (16, 2)	True
  (16, 3)	True
  (16, 4)	True
  (16, 6)	True
  (16, 7)	True
  (17, 0)	True
  (17, 1)	True
  (17, 5)	T

Basically I set an initial array of all False and I tried using all the sets in order to find the best improvement at each step. In order to find the best combination I just counted the number of covered elements. 

In [43]:
def set_cover(sets, num_points):
    covered = np.zeros(num_points, dtype=bool)
    chosen_sets = [] # array that contains the indexes of the chosen sets

    while not all(covered):
        max_covered_points = 0
        best_set = 0
        for i in range(sets.shape[0]):
            set = sets.getrow(i).toarray().ravel() # row represented in the following form: [False True True False ...]
            covered_points = np.logical_or(set, covered).sum()
            if covered_points > max_covered_points:
                max_covered_points = covered_points
                best_set = i

        chosen_sets.append(best_set)
        covered.__ior__(sets.getrow(best_set).toarray().ravel()) # update with OR operator in-place

    return chosen_sets

In [45]:
num_points = 1000 # try [100, 1_000, 5_000]
num_sets = num_points
density = [.3, .7]
for d in density:
    sets = make_set_covering_problem(num_points, num_sets, d)
    chosen_sets = set_cover(sets, num_points)
    print("Chosen sets:", chosen_sets)

Chosen sets: [714, 404, 991, 572, 736, 951, 153, 212, 113, 6]
Chosen sets: [414, 105, 46, 15]
