In [3]:
!pip install pulp

Collecting pulp
  Downloading PuLP-2.9.0-py3-none-any.whl.metadata (5.4 kB)
Downloading PuLP-2.9.0-py3-none-any.whl (17.7 MB)
   ---------------------------------------- 0.0/17.7 MB ? eta -:--:--
   ---- ----------------------------------- 2.1/17.7 MB 14.7 MB/s eta 0:00:02
   -------------- ------------------------- 6.6/17.7 MB 19.2 MB/s eta 0:00:01
   --------------------- ------------------ 9.7/17.7 MB 17.8 MB/s eta 0:00:01
   --------------------------------- ------ 14.9/17.7 MB 19.6 MB/s eta 0:00:01
   ---------------------------------------- 17.7/17.7 MB 19.9 MB/s eta 0:00:00
Installing collected packages: pulp
Successfully installed pulp-2.9.0



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
import random
import time
import pulp
import numpy as np
from collections import defaultdict

# Set random seed for reproducibility
random.seed(42)

# Instance generation function
def generate_set_cover_instance(U_size, num_subsets, min_weight, max_weight, min_set_size, max_set_size):
    universe = set(range(1, U_size + 1))
    subsets = []
    weights = []
    universe_list = sorted(universe)  # Convert the set to a sorted list for sampling
    for _ in range(num_subsets):
        set_size = random.randint(min_set_size, max_set_size)
        subset = set(random.sample(universe_list, set_size))  # Sample from the list
        weight = random.uniform(min_weight, max_weight)
        subsets.append(subset)
        weights.append(weight)
    return universe, subsets, weights


# Greedy set cover function
def greedy_set_cover(universe, subsets, weights):
    covered = set()
    total_weight = 0
    subsets_remaining = list(zip(subsets, weights, range(len(subsets))))
    while covered != universe:
        best_ratio = float('inf')
        best_subset, best_weight, best_index = None, None, None
        for subset, weight, index in subsets_remaining:
            uncovered_elements = subset - covered
            if uncovered_elements:
                ratio = weight / len(uncovered_elements)
                if ratio < best_ratio:
                    best_ratio = ratio
                    best_subset, best_weight, best_index = subset, weight, index
        if best_subset is None:
            break
        total_weight += best_weight
        covered.update(best_subset)
        subsets_remaining = [s for s in subsets_remaining if s[2] != best_index]
    return total_weight, len(covered)

# Optimal set cover function
def optimal_set_cover(universe, subsets, weights):
    prob = pulp.LpProblem('SetCover', pulp.LpMinimize)
    x = [pulp.LpVariable(f'x_{i}', cat='Binary') for i in range(len(subsets))]
    prob += pulp.lpSum([weights[i] * x[i] for i in range(len(subsets))])
    for element in universe:
        prob += pulp.lpSum([x[i] if element in subsets[i] else 0 for i in range(len(subsets))]) >= 1
    prob.solve()
    total_weight = sum(weights[i] for i in range(len(subsets)) if x[i].varValue == 1.0)
    covered_elements = {e for i, subset in enumerate(subsets) if x[i].varValue == 1.0 for e in subset}
    return total_weight, len(covered_elements)

# Simulation function to run multiple instances
def simulate_set_cover(U_size_list, num_subsets_list, min_weight, max_weight, min_set_size, max_set_size, num_trials=5):
    results = defaultdict(list)

    for U_size in U_size_list:
        for num_subsets in num_subsets_list:
            for trial in range(num_trials):
                universe, subsets, weights = generate_set_cover_instance(U_size, num_subsets, min_weight, max_weight, min_set_size, max_set_size)
                
                # Greedy solution
                start_time = time.time()
                greedy_weight, greedy_covered = greedy_set_cover(universe, subsets, weights)
                greedy_time = time.time() - start_time
                
                # Optimal solution (only for small instances)
                if U_size <= 100:
                    start_time = time.time()
                    optimal_weight, optimal_covered = optimal_set_cover(universe, subsets, weights)
                    optimal_time = time.time() - start_time
                else:
                    optimal_weight, optimal_covered, optimal_time = None, None, None

                # Store results
                results['U_size'].append(U_size)
                results['num_subsets'].append(num_subsets)
                results['trial'].append(trial + 1)
                results['greedy_weight'].append(greedy_weight)
                results['greedy_covered'].append(greedy_covered)
                results['greedy_time'].append(greedy_time)
                results['optimal_weight'].append(optimal_weight)
                results['optimal_covered'].append(optimal_covered)
                results['optimal_time'].append(optimal_time)

    return results

# Run the simulation
U_size_list = [10, 50, 100]  # Test small to medium universe sizes
num_subsets_list = [15, 30, 50]  # Vary the number of subsets
min_weight, max_weight = 1, 10
min_set_size, max_set_size = 1, 5

results = simulate_set_cover(U_size_list, num_subsets_list, min_weight, max_weight, min_set_size, max_set_size)

# Display results
import pandas as pd
results_df = pd.DataFrame(results)
print(results_df)


    U_size  num_subsets  trial  greedy_weight  greedy_covered  greedy_time  \
0       10           15      1      13.923483              10     0.000000   
1       10           15      2      20.859047              10     0.000000   
2       10           15      3      27.634934              10     0.000000   
3       10           15      4       6.992965              10     0.000000   
4       10           15      5      14.846972              10     0.000000   
5       10           30      1       7.227078              10     0.000000   
6       10           30      2      10.321715              10     0.000000   
7       10           30      3       6.219115              10     0.000000   
8       10           30      4       5.308234              10     0.000000   
9       10           30      5       8.288729              10     0.000000   
10      10           50      1       6.253246              10     0.000258   
11      10           50      2       3.951081              10   