### Pareto Teams

In [None]:
from pareto import *
from utils import *
import matplotlib.cm as cm

#Import data.  [Task 8, numExperts 75, budget=45]
imdb_experts_1, imdb_tasks_1, imdb_costs_1 = import_pickled_datasets('imdb', 1)
task_index, numExperts = 0, 200

### IMDB

In [None]:
#Initialize Cost Array with (1 + eps) log scale
eps = 0.2
min_cost, max_cost = 5, 100
cost_arr = [min_cost]

while min_cost*(1+eps) < max_cost:
    min_cost = round(min_cost*(1 + eps), 2)
    cost_arr.append(min_cost) 

cost_arr.append(max_cost)


#Keep track of coverages for different budgets
imdb1_coverages = {"PlainGreedy":[], "GreedyPlus":[], "TwoGuessPlainGreedy":[], "OneGuessGreedyPlus":[]}
imdb1_costs = {"PlainGreedy":[], "GreedyPlus":[], "TwoGuessPlainGreedy":[], "OneGuessGreedyPlus":[]}
imdb1_runtimes = {"PlainGreedy":[], "GreedyPlus":[], "TwoGuessPlainGreedy":[], "OneGuessGreedyPlus":[]}

In [None]:
for i, budgetVal in enumerate(cost_arr):
    logging.info("==="*50)
    logging.info("Finding Teams for Budget={}".format(budgetVal))
    logging.info("==="*50)

    #Initialize Pareto teams object
    paretoTeams = paretoCoverageCost(task=imdb_tasks_1[task_index], 
                                    n_experts = imdb_experts_1[:numExperts],
                                    costs = imdb_costs_1, size_univ = 24, 
                                    budget=budgetVal)
    
    #Plain Greedy
    plainGreedy_explist, plainGreedy_solskills, plainGreedy_cov, plainGreedy_cost, plainGreedy_time = paretoTeams.plainGreedy()
    imdb1_coverages['PlainGreedy'].append(plainGreedy_cov)
    imdb1_costs['PlainGreedy'].append(plainGreedy_cost)
    imdb1_runtimes['PlainGreedy'].append(plainGreedy_time)

    #Greedy Plus
    greedyPlus_explist, greedyPlus_solskills, greedyPlus_cov, greedyPlus_cost, greedyPlus_time = paretoTeams.greedyPlus()
    imdb1_coverages['GreedyPlus'].append(greedyPlus_cov)
    imdb1_costs['GreedyPlus'].append(greedyPlus_cost)
    imdb1_runtimes['GreedyPlus'].append(greedyPlus_time)

    #Two Guess Plain Greedy
    twoGuessPG_explist, twoGuessPG_solskills, twoGuessPG_cov, twoGuessPG_cost, twoGuessPG_time = paretoTeams.twoGuessPlainGreedy()
    imdb1_coverages['TwoGuessPlainGreedy'].append(twoGuessPG_cov)
    imdb1_costs['TwoGuessPlainGreedy'].append(twoGuessPG_cost)
    imdb1_runtimes['TwoGuessPlainGreedy'].append(twoGuessPG_time)

    #One Guess Greedy Plus
    oneGuessGP_explist, oneGuessGP_solskills, oneGuessGP_cov, oneGuessGP_cost, oneGuessGP_time = paretoTeams.oneGuessGreedyPlus()
    imdb1_coverages['OneGuessGreedyPlus'].append(oneGuessGP_cov)
    imdb1_costs['OneGuessGreedyPlus'].append(oneGuessGP_cost)
    imdb1_runtimes['OneGuessGreedyPlus'].append(oneGuessGP_time)

    #Break if all coverages are max
    if plainGreedy_cov == 1 and greedyPlus_cov == 1 and twoGuessPG_cov == 1 and oneGuessGP_cov == 1:
        break

In [None]:
#Plot performance and runtimes
algo_names = list(imdb1_coverages.keys())
colors = cm.magma(np.linspace(0.01, 0.8, 4))  # Generate 4 distinct colors

fig, axs= plt.subplots(2, 1, figsize=(10, 8))

for alg, line_color in zip(algo_names, colors):
    axs[0].plot(cost_arr[:len(imdb1_coverages[alg])], imdb1_coverages[alg], '*--', alpha=0.5, label=alg, color=line_color) #Plot coverage vs. cost
    axs[1].plot(cost_arr[:len(imdb1_coverages[alg])], imdb1_runtimes[alg], '*--', alpha=0.7, label=alg, color=line_color) #Plot running times


axs[0].set_title('Coverage vs. Cost')
axs[0].set_ylabel("Task Coverage")
axs[0].set_xlabel("Cost Budget")
axs[0].grid(alpha=0.3)

axs[1].set_title('Runtime')
axs[1].set_ylabel("Runtime (s)")
axs[1].set_xlabel("Cost Budget")
axs[1].grid(alpha=0.3)

# Create a single legend for both subplots (from left subplot handles)
handles, labels = axs[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper center', ncol=4)

plt.tight_layout(rect=[0, 0, 1, 0.97]) # Make room for the legend on top
plt.show()