In [61]:
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue, SimpleQueue, LifoQueue
import numpy as np

In [62]:
PROBLEM_SIZE = 50
NUM_SETS = 80

SETS = tuple(np.array([random() < .3 for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS))
State = namedtuple('State', ['taken', 'not_taken'])


In [63]:
def goal_check(state):
     #The function checks whether all elements of a set are covered by the selected subsets. 
     #Uses the reduce function with the np.logical_or operator to combine all the selected subsets into a single Boolean array. 
     #Then, use the np.all function to check whether all elements of the set are covered. 
     #If all elements are covered, the function returns True, otherwise it returns False.
    return np.all(reduce(np.logical_or, [SETS[i] for i in state.taken], np.array([False for _ in range(PROBLEM_SIZE)])))

In [64]:
assert goal_check(State(set(range(NUM_SETS)), set())), "Probelm not solvable"

In [65]:
def overlap(i, taken):
    # Calcola la sovrapposizione tra l'insieme i e gli insiemi già selezionati
    overlap = sum(np.logical_and(SETS[i], np.any([SETS[j] for j in taken], axis=0)))
    return overlap


In [69]:
def g(state):
     return len(state.taken)

def covered(state):
    return reduce(
        np.logical_or,  #or function applied
        [SETS[i] for i in state.taken],     #between each already taken SETS 
        np.array([False for _ in range(PROBLEM_SIZE)]), #and a vector full of FALSE
    )



# my own h function - DENSITY concept has been added
def h(state):
    already_covered = covered(state)        #boolean vector
    if np.all(already_covered):             #if all the elements are equal to True then the problem is already solved
        return 0
    missing_size = PROBLEM_SIZE - sum(already_covered)  #number of sets not covered yet
    
    #calculate the density of each set as the number of uncovered elements it can cover divided by its total coverage
    #an OVERLAP factor is added to Calculate the overlap between set i and every already selected set. 
    #Finally, the sum function adds up all the true values (which represent an overlap) to get the total number of overlapped elements. Less overlap is better.
    densities = [(i, sum(np.logical_and(SETS[i], np.logical_not(already_covered))) / (1 + overlap(i, state.taken)) / sum(SETS[i])) for i in state.not_taken]
        
    #order the sets in descending order with respect to the density
    candidates = [SETS[i] for i, _ in sorted(densities, key=lambda x: x[1], reverse=True)]
    
    taken = 1
    while sum(sum(candidates[i]) for i in range(taken)) < missing_size:   #take the first "n_taken" elements from the candidates vector and verify if the sum of the elements covered by them is sufficient to reach the goal state
        taken += 1
    return taken






In [67]:
def a_f(state):
    return g(state) + h(state)

In [68]:

####  My own A* APPROACH  where the h function takes the optimistic info from the DENSITY of each set ####

#This heuristic is optimistic because it assumes that every set selected in the future will be able to cover the maximum possible number of uncovered elements 
#(i.e., the sets with the highest density). 
#Therefore, it should work well with the A* algorithm.

#set DENSITY is computed as the ratio between n_stil_uncovered_elements_coverable_by_the_set / n_total_elements_coverable_by_the_set

frontier = PriorityQueue() # we use a PriorityQueue because we want to define a cost funzion a_f = g(state) + h(state)
initial_state = State(set(), set(range(NUM_SETS)))
frontier.put((a_f(initial_state),initial_state)) #the first state is the one with no taken sets

counter = 0 #counter used just to count the number of occurrencies needed to solve the problem
_,current_state = frontier.get()  #start the resolution taking the first element from the frontier queue
while not goal_check(current_state):    #iterate until the problem is not resolved
    counter += 1
    for action in current_state[1]: #an ACTION is represented as the activity of taking one set from 
        # The ^ operator in Python is a bitwise XOR (exclusive OR) operator. It returns True if and only if its arguments differ (one is True, the other is False)
        #so here it equals to take an action (set) from not_taken and put it into taken
        # new_state = State(current_state.taken | {action}, current_state.not_taken - {action}) -> this would be the same
        new_state = State(current_state.taken ^ {action}, current_state.not_taken ^ {action})
        a_funct = a_f(new_state)
        #it puts all the states generated into the frontier queue
        frontier.put((a_funct,new_state))
    
    #endly it takes one state at time and analyze its condition (if can be considered a goal state in the while above there)
    _,current_state = frontier.get()
    #print("h cost: " , a_f(current_state) - g(current_state))
    
print(f"Solved in {counter:,} steps")
print(a_f(current_state))
print(current_state)

Solved in 210 steps
4
State(taken={8, 74, 30, 14}, not_taken={0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 76, 77, 78, 79})


In [70]:
#######  PROFESSOR h3 function comparison ##########

def h(state):
    already_covered = covered(state)
    if np.all(already_covered):
        return 0
    missing_size = PROBLEM_SIZE - sum(already_covered)
    candidates = sorted((sum(np.logical_and(s, np.logical_not(already_covered))) for s in SETS), reverse=True)
    taken = 1
    while sum(candidates[:taken]) < missing_size:
        taken += 1
    return taken


####  A STAR APPROACH  with ordered version####

frontier = PriorityQueue() # we use a PriorityQueue because we want to define a cost funzion a_f = g(state) + h(state)
initial_state = State(set(), set(range(NUM_SETS)))
frontier.put((a_f(initial_state),initial_state)) #the first state is the one with no taken sets

counter = 0 #counter used just to count the number of occurrencies needed to solve the problem
_,current_state = frontier.get()  #start the resolution taking the first element from the frontier queue
while not goal_check(current_state):    #iterate until the problem is not resolved
    counter += 1
    for action in current_state[1]: #an ACTION is represented as the activity of taking one set from 
        # The ^ operator in Python is a bitwise XOR (exclusive OR) operator. It returns True if and only if its arguments differ (one is True, the other is False)
        #so here it equals to take an action (set) from not_taken and put it into taken
        # new_state = State(current_state.taken | {action}, current_state.not_taken - {action}) -> this would be the same
        new_state = State(current_state.taken ^ {action}, current_state.not_taken ^ {action})
        a_funct = a_f(new_state)
        #it puts all the states generated into the frontier queue
        frontier.put((a_funct,new_state))
    
    #endly it takes one state at time and analyze its condition (if can be considered a goal state in the while above there)
    _,current_state = frontier.get()
    #print("h cost: " , a_f(current_state) - g(current_state))
    
print(f"Solved in {counter:,} steps")
print(a_f(current_state))
print(current_state)

Solved in 792 steps
4
State(taken={8, 74, 14, 30}, not_taken={0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 76, 77, 78, 79})


In [38]:
#BREADHT FIRST


frontier = PriorityQueue() # we use a PriorityQueue because we want to define a cost funzion a_f = g(state) + h(state)
initial_state = State(set(), set(range(NUM_SETS)))
frontier.put((g(initial_state),initial_state)) #the first state is the one with no taken sets

counter = 0 #counter used just to count the number of occurrencies needed to solve the problem
_,current_state = frontier.get()  #start the resolution taking the first element from the frontier queue
while not goal_check(current_state):    #iterate until the problem is not resolved
    counter += 1
    for action in current_state[1]: #an ACTION is represented as the activity of taking one set from 
        # The ^ operator in Python is a bitwise XOR (exclusive OR) operator. It returns True if and only if its arguments differ (one is True, the other is False)
        #so here it equals to take an action (set) from not_taken and put it into taken
        # new_state = State(current_state.taken | {action}, current_state.not_taken - {action}) -> this would be the same
        new_state = State(current_state.taken ^ {action}, current_state.not_taken ^ {action})
        a_funct = g(new_state)
        #it puts all the states generated into the frontier queue
        frontier.put((a_funct,new_state))
    
    #endly it takes one state at time and analyze its condition (if can be considered a goal state in the while above there)
    b_value,current_state = frontier.get()
    
    
print(f"Solved in {counter:,} steps")
print(b_value)
current_state

Solved in 95,752 steps
4


State(taken={33, 18, 4, 30}, not_taken={0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 36, 37, 38, 39})

In [20]:
goal_check(current_state)

True

In [39]:
                                                                #  FINAL CONSIDERATIONS  #

#The heuristic h3(state) might be more efficient if the size of the sets (i.e., the number of elements they cover) is a critical factor in your problem. 
#Since h3(state) prioritizes sets that cover more elements, it might be able to cover all the uncovered elements more quickly.

#On the other hand, my personal heuristic h(state) might be more efficient if the “density” of the sets (i.e., the ratio between the number of uncovered elements they can cover and their total coverage)
#is a critical factor. Since h(state) prioritizes sets with a higher density, it might be able to cover all the uncovered elements using fewer sets.
#The overlap factor added as ulterior denominator is used to encourage the algorithm to prefer solutions with a lower number of overlaps
