In [None]:
from paretoKnapsackTeams import *

import shutil
import matplotlib as mpl

# Enable LaTeX rendering if available (fallback to Matplotlib text otherwise)
if shutil.which("latex"):
    mpl.rcParams.update({
        "text.usetex": True,
        "font.family": "serif",
        "text.latex.preamble": r"\usepackage{amsmath}\usepackage{amssymb}"
    })
else:
    mpl.rcParams.update({"text.usetex": False})

#Import datasets
#IMDB
imdb_experts_1, imdb_tasks_1, imdb_costs_1, imdb_graphmat_1 = import_pickled_datasets('imdb', 1)
imdb_experts_2, imdb_tasks_2, imdb_costs_2, imdb_graphmat_2 = import_pickled_datasets('imdb', 2)
# imdb_experts_3, imdb_tasks_3, imdb_costs_3, imdb_graphmat_3 = import_pickled_datasets('imdb', 3)

#Bibsonomy
bbsm_experts_1, bbsm_tasks_1, bbsm_costs_1, bbsm_graphmat_1 = import_pickled_datasets('bbsm', 1)
# bbsm_experts_2, bbsm_tasks_2, bbsm_costs_2, bbsm_graphmat_2 = import_pickled_datasets('bbsm', 2)
# bbsm_experts_3, bbsm_tasks_3, bbsm_costs_3, bbsm_graphmat_3 = import_pickled_datasets('bbsm', 3)

#Freelancer
fl_experts_1, fl_tasks_1, fl_costs_1, fl_graphmat_1 = import_pickled_datasets('freelancer', 1)
# fl_experts_2, fl_tasks_2, fl_costs_2, fl_graphmat_2 = import_pickled_datasets('freelancer', 2)

### Single Task Plotting

In [None]:
def findSingleTaskSolutions(tasks_list, experts_list, costs_list,
                             sizeUniverse, numExperts, maxBudget,
                             dataset_name=None, task_index=0, task_indices=None):
    '''
    Run algorithms for one or more single tasks and plot results (no averaging or interpolation).
    '''
    # Cost grid (same for all budgets within this task)
    num_steps, min_cost = 15, 5
    cost_arr = np.linspace(min_cost, maxBudget, num_steps)

    algo_names = ["ParetoGreedy", "C-Greedy", "F-Greedy", "TopK"]
    plot_algos = {
        "ParetoGreedy": True,
        "F-Greedy": True,
        "C-Greedy": True,
        "TopK": True
    }
    plot_algo_names = [alg for alg in algo_names if plot_algos.get(alg, False)]

    def sort_pairs(costs, covs):
        if len(costs) == 0:
            return np.array([]), np.array([])
        pairs = sorted(zip(costs, covs), key=lambda x: x[0])
        return np.array([p[0] for p in pairs], dtype=float), np.array([p[1] for p in pairs], dtype=float)

    def format_points(costs, covs):
        return [f"({c:.2f}, {v:.3f})" for c, v in zip(costs, covs)]

    # Determine which tasks to run
    if task_indices is None:
        task_indices = [task_index]
    task_indices = list(task_indices)
    num_tasks_total = len(tasks_list)
    task_indices = [i for i in task_indices if 0 <= i < num_tasks_total]
    if len(task_indices) == 0:
        raise ValueError("No valid task indices provided for this dataset.")

    # Plot settings
    tab10_colors = plt.get_cmap("tab10").colors
    color_map = {
        "TopK": tab10_colors[4],
        "C-Greedy": tab10_colors[1],
        "F-Greedy": tab10_colors[2],
        "ParetoGreedy": tab10_colors[3],
    }
    marker_map = {
        "TopK": "o",
        "F-Greedy": "s",
        "C-Greedy": "^",
        "ParetoGreedy": "X",
    }
    marker_sizes = {
        "ParetoGreedy": 8,
        "F-Greedy": 7,
        "C-Greedy": 7,
        "TopK": 6,
    }
    linestyle_map = {
        "TopK": (0, (1, 1)),
        "F-Greedy": (0, (2, 2)),
        "C-Greedy": (0, (3, 2)),
        "ParetoGreedy": (0, (4, 2)),
    }
    zorder_map = {"ParetoGreedy": 6, "TopK": 5, "F-Greedy": 4, "C-Greedy": 3}
    alpha_map = {"ParetoGreedy": 1.0, "F-Greedy": 0.9, "TopK": 0.9, "C-Greedy": 0.9}

    # Prepare figure with shared legend (2 rows x 3 columns)
    n_tasks = len(task_indices)
    n_rows, n_cols = 2, 3
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(16, 8))
    axes = np.array(axes).reshape(-1)
    legend_handles = None
    legend_labels = None
    label_map = {}

    # Per-dataset runtime summary
    runtime_totals = {alg: 0.0 for alg in algo_names}

    for ax_idx, task_idx in enumerate(task_indices):
        ax = axes[ax_idx]
        task = tasks_list[task_idx]

        # Prefix Pareto + Coverage Linear (computed once at full budget)
        paretoTeams_full = paretoKnapsackTeams(task=task,
                                          n_experts=experts_list[:numExperts],
                                          costs=costs_list, size_univ=sizeUniverse,
                                          budget=maxBudget)
        pp1_costs, pp1_coverages, pp1_map, pp1_time = paretoTeams_full.prefixParetoGreedy_1Guess()
        cl_costs, cl_coverages, cl_map, cl_time = paretoTeams_full.F_Greedy()

        # Per-budget results
        coverages = {alg: [] for alg in algo_names}
        runtimes = {alg: [] for alg in algo_names}

        for budgetVal in cost_arr:
            paretoTeams = paretoKnapsackTeams(task=task,
                                         n_experts=experts_list[:numExperts],
                                         costs=costs_list, size_univ=sizeUniverse,
                                         budget=budgetVal)

            # One Guess Greedy Plus
            _, _, ogCov, ogCost, ogTime = paretoTeams.oneGuessGreedyPlus()
            coverages['C-Greedy'].append(ogCov)
            runtimes['C-Greedy'].append(ogTime)

            # Top-k (add experts until budget is hit)
            _, _, tkCov, tkCost, tkTime = paretoTeams.top_k()
            coverages['TopK'].append(tkCov)
            runtimes['TopK'].append(tkTime)

        runtimes['ParetoGreedy'].append(pp1_time)
        runtimes['F-Greedy'].append(cl_time)

        # Log pareto points per algorithm
        pg_costs, pg_covs = sort_pairs(pp1_costs, pp1_coverages)
        cl_costs_sorted, cl_covs_sorted = sort_pairs(cl_costs, cl_coverages)
        logging.info("Task %d pareto points - ParetoGreedy: %s", task_idx, format_points(pg_costs, pg_covs))
        logging.info("Task %d pareto points - F-Greedy: %s", task_idx, format_points(cl_costs_sorted, cl_covs_sorted))
        logging.info("Task %d pareto points - C-Greedy: %s", task_idx, format_points(cost_arr, coverages['C-Greedy']))
        logging.info("Task %d pareto points - TopK: %s", task_idx, format_points(cost_arr, coverages['TopK']))

        series_map = {
            "ParetoGreedy": sort_pairs(pp1_costs, pp1_coverages),
            "F-Greedy": sort_pairs(cl_costs, cl_coverages),
            "C-Greedy": (cost_arr, np.array(coverages['C-Greedy'], dtype=float)),
            "TopK": (cost_arr, np.array(coverages['TopK'], dtype=float))
        }

        for i, alg in enumerate(plot_algo_names):
            color = color_map.get(alg, tab10_colors[i % len(tab10_colors)])
            marker = marker_map.get(alg, 'o')
            label = rf"\texttt{{{alg}}}"
            label_map[alg] = label
            x_vals, y_vals = series_map[alg]
            ax.plot(x_vals, y_vals,
                    label=(label if ax_idx == 0 else "_nolegend_"),
                    color=color,
                    linestyle=linestyle_map.get(alg, (0, (1, 1))),
                    marker=marker,
                    markersize=marker_sizes.get(alg, 6),
                    markeredgewidth=1.2,
                    markeredgecolor='k',
                    markerfacecolor=color,
                    linewidth=1.8,
                    alpha=alpha_map.get(alg, 0.9),
                    zorder=zorder_map.get(alg, 1))

        ax.set_title("")
        ax.grid(alpha=0.3)
        ax.tick_params(axis='both', labelsize=24)

        if ax_idx == 0:
            legend_handles, legend_labels = ax.get_legend_handles_labels()

        # Runtime summary for this task
        for alg in algo_names:
            total_runtime = float(np.nansum(np.array(runtimes.get(alg, []), dtype=float)))
            runtime_totals[alg] += total_runtime

    # Hide unused subplots if any
    for ax in axes[n_tasks:]:
        ax.axis('off')

    if legend_handles is not None:
        handle_map = dict(zip(legend_labels, legend_handles))
        ordered_labels = [
            label_map["C-Greedy"],
            label_map["F-Greedy"],
            label_map["ParetoGreedy"],
            label_map["TopK"],
        ]
        ordered_handles = [handle_map[l] for l in ordered_labels if l in handle_map]
        fig.legend(ordered_handles, ordered_labels, loc='upper center', ncol=4, fontsize=20, frameon=False)

    for r in range(n_rows):
        for c in range(n_cols):
            idx = r * n_cols + c
            if idx >= len(axes):
                continue
            ax = axes[idx]
            if r == n_rows - 1:
                ax.set_xlabel(r'Team cost, $c_\ell$', fontsize=24)
            if c == 0:
                ax.set_ylabel(r'Task coverage, $f$', fontsize=24)

    fig.tight_layout(rect=[0.03, 0.03, 1, 0.9])

    # Save figure
    from pathlib import Path
    base_dir = Path.cwd().resolve().parents[1]
    plots_dir = base_dir / "plots" / "knapsack" / "single"
    plots_dir.mkdir(parents=True, exist_ok=True)
    safe_name = (dataset_name or "dataset").replace(" ", "_")
    out_path = plots_dir / f"{safe_name}_knapsack_single_task.pdf"
    fig.savefig(out_path, bbox_inches="tight")

    plt.show()

    # Runtime summary (seconds)
    runtime_lines = ["Runtime summary (seconds):"]
    for alg in algo_names:
        runtime_lines.append(f"  - {alg}: {runtime_totals[alg]:.3f}")
    logging.info("\n".join(runtime_lines))

## Freelancer-1

In [None]:
findSingleTaskSolutions(tasks_list=fl_tasks_1, experts_list=fl_experts_1, costs_list=fl_costs_1,
                        sizeUniverse=50, numExperts=50, maxBudget=200,
                        dataset_name="Freelancer", task_indices=range(6))

## IMDB

In [None]:
findSingleTaskSolutions(tasks_list=imdb_tasks_1, experts_list=imdb_experts_1, costs_list=imdb_costs_1,
                        sizeUniverse=24, numExperts=150, maxBudget=60,
                        dataset_name="IMDB-1", task_indices=range(6))

In [None]:
findSingleTaskSolutions(tasks_list=imdb_tasks_2, experts_list=imdb_experts_2, costs_list=imdb_costs_2,
                        sizeUniverse=24, numExperts=300, maxBudget=30,
                        dataset_name="IMDB-2", task_indices=range(6))

## Bbsm-1

In [None]:
findSingleTaskSolutions(tasks_list=bbsm_tasks_1, experts_list=bbsm_experts_1, costs_list=bbsm_costs_1,
                        sizeUniverse=75, numExperts=250, maxBudget=30,
                        dataset_name="Bbsm", task_indices=range(6))