In [None]:
from paretoCardinalityTeams import *
import matplotlib.cm as cm
import matplotlib.pyplot as plt

#Import datasets
#IMDB
imdb_experts_1, imdb_tasks_1, imdb_costs_1, imdb_graphmat_1 = import_pickled_datasets('imdb', 1)
imdb_experts_2, imdb_tasks_2, imdb_costs_2, imdb_graphmat_2 = import_pickled_datasets('imdb', 2)
imdb_experts_3, imdb_tasks_3, imdb_costs_3, imdb_graphmat_3 = import_pickled_datasets('imdb', 3)

#Bibsonomy
bbsm_experts_1, bbsm_tasks_1, bbsm_costs_1, bbsm_graphmat_1 = import_pickled_datasets('bbsm', 1)
bbsm_experts_2, bbsm_tasks_2, bbsm_costs_2, bbsm_graphmat_2 = import_pickled_datasets('bbsm', 2)
bbsm_experts_3, bbsm_tasks_3, bbsm_costs_3, bbsm_graphmat_3 = import_pickled_datasets('bbsm', 3)

#Freelancer
fl_experts_1, fl_tasks_1, fl_costs_1, fl_graphmat_1 = import_pickled_datasets('freelancer', 1)
fl_experts_2, fl_tasks_2, fl_costs_2, fl_graphmat_2 = import_pickled_datasets('freelancer', 2)

2026-01-22 21:22:30,847 |INFO: Imported imdb experts, Num Experts: 200
2026-01-22 21:22:30,848 |INFO: Imported imdb tasks, Num Tasks: 300
2026-01-22 21:22:30,849 |INFO: Imported imdb costs, Num Costs: 200
2026-01-22 21:22:30,853 |INFO: Imported imdb graph matrix, Shape: (1000, 1000)

2026-01-22 21:22:30,854 |INFO: Imported imdb experts, Num Experts: 400
2026-01-22 21:22:30,855 |INFO: Imported imdb tasks, Num Tasks: 300
2026-01-22 21:22:30,856 |INFO: Imported imdb costs, Num Costs: 400
2026-01-22 21:22:30,863 |INFO: Imported imdb graph matrix, Shape: (3000, 3000)

2026-01-22 21:22:30,865 |INFO: Imported imdb experts, Num Experts: 1000
2026-01-22 21:22:30,866 |INFO: Imported imdb tasks, Num Tasks: 300
2026-01-22 21:22:30,868 |INFO: Imported imdb costs, Num Costs: 1000
2026-01-22 21:22:30,879 |INFO: Imported imdb graph matrix, Shape: (4000, 4000)

2026-01-22 21:22:30,881 |INFO: Imported bbsm experts, Num Experts: 250
2026-01-22 21:22:30,882 |INFO: Imported bbsm tasks, Num Tasks: 300
2026-

In [2]:
def findApproximateParetoSolutions(tasks_list, experts_list, size_univ, numTasks, k_max, numExperts):
    '''
    Run algorithms over multiple tasks, aggregate results, and plot mean +/- std.
    Parameters:
    - tasks_list: List of tasks (each task is a list of required skills) 
    - experts_list: List of experts (each expert is a list of skills)
    - size_univ: Size of the universe of skills
    - numTasks: Number of tasks to evaluate
    - k_max: Maximum team size (cardinality)
    - numExperts: Number of experts to consider from experts_list
    '''
    algo_names = ["ParetoGreedy", "TopK", "Random"]

    # containers across tasks
    all_coverages = {alg: [] for alg in algo_names}
    all_runtimes = {alg: [] for alg in algo_names}

    # iterate tasks
    for task_index in range(numTasks):
        # per-task containers
        task_coverages = {alg: [] for alg in algo_names}
        task_runtimes = {alg: [] for alg in algo_names}

        # Initialize Pareto cardinality object
        paretoCard = paretoCardinalityTeams(task=tasks_list[task_index],
                                       n_experts=experts_list[:numExperts],  # assuming numExperts is defined
                                       size_univ=size_univ,
                                       k_max=k_max)

        # Greedy Cardinality
        _, _, _, runTime = paretoCard.greedyCardinality()
        coverages = [paretoCard.kSolDict.get(k, {'Coverage': 0})['Coverage'] for k in range(1, k_max + 1)]
        coverages = []
        current_coverage = 0
        for k in range(1, k_max + 1):
            if k in paretoCard.kSolDict:
                current_coverage = paretoCard.kSolDict[k]['Coverage']
            coverages.append(current_coverage)
        task_coverages['ParetoGreedy'] = coverages
        task_runtimes['ParetoGreedy'] = runTime

        # Top K
        paretoCard2 = paretoCardinalityTeams(task=tasks_list[task_index],
                                        n_experts=experts_list[:numExperts],
                                        size_univ=size_univ,
                                        k_max=k_max)
        _, _, _, runTime = paretoCard2.top_k()
        coverages = [paretoCard2.kSolDict.get(k, {'Coverage': 0})['Coverage'] for k in range(1, k_max + 1)]
        task_coverages['TopK'] = coverages
        task_runtimes['TopK'] = runTime

        # Random
        paretoCard3 = paretoCardinalityTeams(task=tasks_list[task_index],
                                        n_experts=experts_list[:numExperts],
                                        size_univ=size_univ,
                                        k_max=k_max)
        _, _, _, runTime = paretoCard3.random_selection()
        coverages = [paretoCard3.kSolDict.get(k, {'Coverage': 0})['Coverage'] for k in range(1, k_max + 1)]
        task_coverages['Random'] = coverages
        task_runtimes['Random'] = runTime

        # convert per-task lists to numpy arrays and store
        for alg in algo_names:
            arr = np.array(task_coverages[alg], dtype=float)
            all_coverages[alg].append(arr)
            all_runtimes[alg].append(task_runtimes[alg])

    # compute mean and std across tasks for each algorithm
    mean_coverages = {}
    std_coverages = {}
    for alg in algo_names:
        stacked = np.vstack(all_coverages[alg])  # shape (numTasks, k_max)
        mean_coverages[alg] = np.mean(stacked, axis=0)
        std_coverages[alg] = np.std(stacked, axis=0) * 0.5

    # Plot mean coverage with shaded std band
    colors = cm.magma(np.linspace(0.01, 0.8, len(algo_names)))
    linestyles = ['-', '--', ':']
    markers = ['o', 's', '^']

    fig, ax = plt.subplots(figsize=(8, 5))
    for i, alg in enumerate(algo_names):
        mean = mean_coverages[alg]
        std = std_coverages[alg]

        ax.plot(range(1, k_max + 1), mean,
                label=alg,
                color=colors[i],
                linestyle=linestyles[i],
                marker=markers[i],
                markersize=5,
                markeredgewidth=0.8,
                markeredgecolor='k',
                linewidth=1.2,
                zorder=3)
        ax.fill_between(range(1, k_max + 1),
                        np.clip(mean - std, 0, 1),
                        np.clip(mean + std, 0, 1),
                        color=colors[i],
                        alpha=0.18,
                        zorder=2)

    ax.set_xlabel('Cardinality (k)')
    ax.set_ylabel('Mean Task Coverage')
    ax.set_title('Mean Coverage across tasks (shaded = ±0.5 std)')
    ax.grid(alpha=0.3)
    ax.legend(fontsize=8)
    plt.show()

    # Plot total runtime per algorithm with mean +/- std bars
    means_rt = [np.mean(all_runtimes[alg]) for alg in algo_names]
    stds_rt = [np.std(all_runtimes[alg]) for alg in algo_names]
    x = np.arange(len(algo_names))
    fig2, ax2 = plt.subplots(figsize=(8, 3))
    bars = ax2.bar(x, means_rt, yerr=stds_rt, capsize=5, color=[colors[i] for i in range(len(algo_names))])
    ax2.set_xticks(x)
    ax2.set_xticklabels(algo_names, rotation=30, ha='right', fontsize=8)
    ax2.set_ylabel('Total Runtime (s)')
    ax2.set_title('Mean Total Runtime per Algorithm (±std)')
    ax2.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()

    return None

### IMDB

In [3]:
#Parameters
numTasks = 50  # number of tasks
k_max = 10 

numExperts = 75
findApproximateParetoSolutions(imdb_tasks_1, imdb_experts_1, 24, numTasks, k_max, numExperts)

2026-01-22 21:22:37,802 |INFO: Initialized Pareto Coverage - Cardinality Cost Instance, Task:[0, 1, 2, 3, 4, 5, 6, 7], Num Experts:75, k=10
2026-01-22 21:22:37,804 |INFO: Cardinality Greedy Solution for k_max:[[1, 4, 5, 7, 9], [0, 1, 3, 13, 14], [2, 3, 22], [3, 6]], Coverage:1.000, Runtime = 0.00 seconds
2026-01-22 21:22:37,805 |INFO: Initialized Pareto Coverage - Cardinality Cost Instance, Task:[0, 1, 2, 3, 4, 5, 6, 7], Num Experts:75, k=10
2026-01-22 21:22:37,806 |INFO: Top-k Solution for k_max:[[1, 4, 5, 7, 9], [1, 4, 5, 7, 15], [1, 3, 4, 18], [1, 2, 7, 16], [1, 3, 5], [1, 2, 3, 16], [1, 3, 4], [1, 3, 7], [1, 6, 7, 16], [2, 3, 4]], Coverage:0.875, Runtime = 0.00 seconds
2026-01-22 21:22:37,807 |INFO: Initialized Pareto Coverage - Cardinality Cost Instance, Task:[0, 1, 2, 3, 4, 5, 6, 7], Num Experts:75, k=10
2026-01-22 21:22:41,686 |INFO: Random Selection Solution for k_max:[[1, 3, 17], [1, 4], [1, 8, 10], [1, 4, 5, 7, 9], [5, 7, 16], [1, 3, 13], [1, 15], [3, 8, 15], [4, 7, 15], [7, 

NameError: name 'plt' is not defined

In [None]:
numExperts = 100
findApproximateParetoSolutions(imdb_tasks_2, imdb_experts_2, 24, numTasks, k_max, numExperts)

### Bibsonomy

In [None]:
numExperts = 100
findApproximateParetoSolutions(bbsm_tasks_1, bbsm_experts_1, 75, numTasks, k_max, numExperts)

### Freelancer

In [None]:
numExperts = 100
findApproximateParetoSolutions(fl_tasks_1, fl_experts_1, 50, numTasks, k_max, numExperts)