One state is composed by a set of 'taken tiles' and a set 'not-taken tiles' as {T} {N}
The cost of a state is the difference between how many new points will be covering the new state with respect to the current one.
The distance of a state is the number of points that still need to be covered.
The A* algorithm searches for the optimal set coverage with the minimum number of tiles taken: the function f(n) which computes the priority of a state in this implementation sums its cost and distance. The search stops when a solution is reached (which corresponds to the optimal one).

Credits: https://github.com/squillero/computational-intelligence/blob/master/2023-24/set-covering.ipynb

TODO: minimum overlapping tiles in the optimal solution

In [6]:
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue

import numpy as np

In [7]:
PROBLEM_SIZE = 50
NUM_SETS = 100
SETS = tuple(
    np.array([random() < 0.2 for _ in range(PROBLEM_SIZE)])
    for _ in range(NUM_SETS)
)
State = namedtuple("State", ["taken", "not_taken"])

In [8]:
def goal_check(state):
    return np.all(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    )

def cost(state, next_state):
    return PROBLEM_SIZE - (sum(
        reduce(
            np.logical_or,
            [SETS[i] for i in next_state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )) + sum(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )))

def distance(state):
    return PROBLEM_SIZE - sum(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        ))

In [9]:
assert goal_check(
    State(set(range(NUM_SETS)), set())
), "Problem not solvable"

In [10]:
frontier = PriorityQueue()
state = State(set(), set(range(NUM_SETS)))
frontier.put((distance(state), state))

counter = 0
_, current_state = frontier.get()
while not goal_check(current_state):
    counter += 1
    for action in current_state[1]:
        new_state = State(
            current_state.taken ^ {action},
            current_state.not_taken ^ {action},
        )
        frontier.put((cost(current_state, new_state) + distance(new_state), new_state))
    _, current_state = frontier.get()

print(f"Solved in {counter} steps ({len(current_state.taken)} tiles)")
print(current_state)

Solved in 6 steps (6 tiles)
State(taken={96, 5, 41, 47, 49, 56}, not_taken={0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 48, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 97, 98, 99})
