Copyright **`(c)`** 2024 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# Set Cover problem

See: https://en.wikipedia.org/wiki/Set_cover_problem

In [1]:
from random import random, seed
from itertools import product
import numpy as np

from icecream import ic

## Reproducible Initialization

If you want to get reproducible results, use `rng` (and restart the kernel); for non-reproducible ones, use `np.random`.

In [2]:
UNIVERSE_SIZE = 100_000
NUM_SETS = 10_000
DENSITY = 0.3 #how dense are the sets, how many elements are covered by each set


rng = np.random.Generator(np.random.PCG64([UNIVERSE_SIZE, NUM_SETS, int(10_000 * DENSITY)]))

In [3]:
# DON'T EDIT THESE LINES!

SETS = np.random.random((NUM_SETS, UNIVERSE_SIZE)) < DENSITY
for s in range(UNIVERSE_SIZE):
    if not np.any(SETS[:, s]):
        SETS[np.random.randint(NUM_SETS), s] = True
COSTS = np.pow(SETS.sum(axis=1), 1.1)

## Helper Functions

In [4]:
def valid(solution):
    """Checks wether solution is valid (ie. covers all universe)"""
    return np.all(np.logical_or.reduce(SETS[solution]))


def cost(solution):
    """Returns the cost of a solution (to be minimized)"""
    return COSTS[solution].sum()

def tweak(solution: np.ndarray)->np.ndarray:
    new_solution = solution.copy()
    i = rng.integers(0, NUM_SETS)
    new_solution[i] = not new_solution[i]
    return new_solution
"""
def tweak(solution):
    
    new_solution = solution.copy()
    # remove a random number of sets that is a percentage of the total number of sets
    for _ in range(int(NUM_SETS * 0.1)):
        v = np.random.randint(NUM_SETS)
        new_solution[v] = False
    return new_solution
def tweak2(solution):
    
    new_solution = solution.copy()
    # remove a random number of sets that is a percentage of the total number of sets
    n = int(NUM_SETS*0.05)
    if n==0: #slightly better with 0.05
        n=1
    for _ in range(n):
        v = np.random.randint(NUM_SETS)
        new_solution[v] = not new_solution[v]
    return new_solution
"""

'\ndef tweak(solution):\n    \n    new_solution = solution.copy()\n    # remove a random number of sets that is a percentage of the total number of sets\n    for _ in range(int(NUM_SETS * 0.1)):\n        v = np.random.randint(NUM_SETS)\n        new_solution[v] = False\n    return new_solution\ndef tweak2(solution):\n    \n    new_solution = solution.copy()\n    # remove a random number of sets that is a percentage of the total number of sets\n    n = int(NUM_SETS*0.05)\n    if n==0: #slightly better with 0.05\n        n=1\n    for _ in range(n):\n        v = np.random.randint(NUM_SETS)\n        new_solution[v] = not new_solution[v]\n    return new_solution\n'

In [5]:
def fitness(solution: np.ndarray):
    return (valid(solution), -cost(solution))

## Have Fun!

In [6]:
# A dumb solution of "all" sets
solution = np.full(NUM_SETS, True)
valid(solution), cost(solution)
#If a value is covered just by one set, we don't need to hill climb, we can take it 


(np.True_, np.float64(841045518.9525087))

In [7]:
# A random solution with random 50% of the sets
solution = rng.random(NUM_SETS) < .5
valid(solution), cost(solution)

(np.True_, np.float64(423487690.3441791))

In [8]:
solution = np.full(NUM_SETS, True)
ic(fitness(solution))

for steps in range(100_000):
    new_solution = tweak(solution)
    if fitness(new_solution) > fitness(solution):
        solution = new_solution
        ic(fitness(solution))
ic(fitness(solution))
"""
solution = np.full(NUM_SETS, True)
#Let's take all of the sets
valid(solution), cost(solution)
num_it=1000
#while(num_it>0):
    #num_it-=1
    #new_solution = tweak(solution)
    #if valid(new_solution) and cost(new_solution) < cost(solution):
        #ic(cost(new_solution))
        #solution = new_solution
#num_it=10000
valid(solution), cost(solution)
while(num_it>0):
    num_it-=1
    new_solution = tweak2(solution)
    if valid(new_solution) and np.exp(-(cost(solution)-cost(new_solution))/t)>p:
        ic(cost(new_solution))
        solution = new_solution
valid(solution), cost(solution)
"""

ic| fitness(solution): (np.True_, np.float64(-841045518.9525087))
ic| fitness(solution): (np.True_, np.float64(-840961834.1149039))
ic| fitness(solution): (np.True_, np.float64(-840878158.5248156))
ic| fitness(solution): (np.True_, np.float64(-840794834.2714214))
ic| fitness(solution): (np.True_, np.float64(-840711466.8786577))
ic| fitness(solution): (np.True_, np.float64(-840627569.3225565))
ic| fitness(solution): (np.True_, np.float64(-840543782.7561438))
ic| fitness(solution): (np.True_, np.float64(-840459299.2003136))
ic| fitness(solution): (np.True_, np.float64(-840376132.080305))
ic| fitness(solution): (np.True_, np.float64(-840292533.5492378))


KeyboardInterrupt: 