In [None]:
from paretoCardinalityInfluence import *
import matplotlib.cm as cm
import matplotlib.pyplot as plt

# Import influence datasets
data_path_HEPT = '../../datasets/raw_data/influence/NetHEPT/hep.txt'
data_path_PHY = '../../datasets/raw_data/influence/NetPHY/phy.txt'

G_HEPT, node_costs_HEPT = import_influence_data(data_path_HEPT)
G_PHY, node_costs_PHY = import_influence_data(data_path_PHY)

In [None]:
def findApproximateParetoSolutionsInfluence(G, node_costs, k_max, num_samples=35, num_runs=10, dataset_name=""):
    '''
    Run algorithms over multiple runs, aggregate results, and plot mean +/- std.
    Parameters:
    - G: Graph
    - node_costs: Node costs dict
    - k_max: Maximum seed set size (cardinality)
    - num_samples: Number of Monte Carlo samples (per run)
    - num_runs: Number of independent runs
    - dataset_name: Name of the dataset for plotting
    '''
    algo_names = ["ParetoGreedy", "TopK", "Random"]

    # containers across runs
    all_influences = {alg: [] for alg in algo_names}
    all_runtimes = {alg: [] for alg in algo_names}

    for run in range(num_runs):
        # Generate graph samples once to share across algorithms (per run)
        graph_samples = []
        for i in range(num_samples):
            G_sample = nx.Graph()
            neighbors = defaultdict(set)
            connected_components = defaultdict()
            for u, v, data in G.edges(data=True):
                success = np.random.uniform(0, 1)
                if success < data['weight']:
                    G_sample.add_edge(u, v)
                    neighbors[u].add(v)
                    neighbors[v].add(u)
            for c in nx.connected_components(G_sample):
                for node in c:
                    connected_components[node] = c
            graph_samples.append((G_sample, neighbors, connected_components))

        # Greedy Cardinality
        paretoCard = paretoCardinalityInfluence(G=G,
                                       node_costs=node_costs,
                                       k_max=k_max,
                                       num_samples=num_samples,
                                       graph_samples=graph_samples)
        _, _, runTime = paretoCard.greedyCardinality()
        infl = []
        current_influence = 0
        for k in range(1, k_max + 1):
            if k in paretoCard.kSolDict:
                current_influence = paretoCard.kSolDict[k]['Influence']
            infl.append(current_influence)
        all_influences['ParetoGreedy'].append(np.array(infl, dtype=float))
        all_runtimes['ParetoGreedy'].append(runTime)

        # Top K
        paretoCard2 = paretoCardinalityInfluence(G=G,
                                        node_costs=node_costs,
                                        k_max=k_max,
                                        num_samples=num_samples,
                                        graph_samples=graph_samples)
        _, _, runTime = paretoCard2.top_k()
        infl = [paretoCard2.kSolDict.get(k, {'Influence': 0})['Influence'] for k in range(1, k_max + 1)]
        all_influences['TopK'].append(np.array(infl, dtype=float))
        all_runtimes['TopK'].append(runTime)

        # Random
        paretoCard3 = paretoCardinalityInfluence(G=G,
                                        node_costs=node_costs,
                                        k_max=k_max,
                                        num_samples=num_samples,
                                        graph_samples=graph_samples)
        _, _, runTime = paretoCard3.random_selection()
        infl = [paretoCard3.kSolDict.get(k, {'Influence': 0})['Influence'] for k in range(1, k_max + 1)]
        all_influences['Random'].append(np.array(infl, dtype=float))
        all_runtimes['Random'].append(runTime)

    # compute mean and std across runs for each algorithm
    mean_influences = {}
    std_influences = {}
    for alg in algo_names:
        stacked = np.vstack(all_influences[alg])  # shape (num_runs, k_max)
        mean_influences[alg] = np.mean(stacked, axis=0)
        std_influences[alg] = np.std(stacked, axis=0) * 0.5

    # Plot mean influence with shaded std band
    colors = cm.magma(np.linspace(0.01, 0.8, len(algo_names)))
    linestyles = ['-', '--', ':']
    markers = ['o', 's', '^']

    fig, ax = plt.subplots(figsize=(8, 5))
    for i, alg in enumerate(algo_names):
        mean = mean_influences[alg]
        std = std_influences[alg]

        ax.plot(range(1, k_max + 1), mean,
                label=alg,
                color=colors[i],
                linestyle=linestyles[i],
                marker=markers[i],
                markersize=5,
                markeredgewidth=0.8,
                markeredgecolor='k',
                linewidth=1.2,
                zorder=3)
        ax.fill_between(range(1, k_max + 1),
                        np.clip(mean - std, 0, None),
                        mean + std,
                        color=colors[i],
                        alpha=0.18,
                        zorder=2)

    ax.set_xlabel('Cardinality (k)')
    ax.set_ylabel('Mean Influence Spread')
    title = f'Mean Influence Spread on {dataset_name} (shaded = ±0.5 std)' if dataset_name else 'Mean Influence Spread (shaded = ±0.5 std)'
    ax.set_title(title)
    ax.grid(alpha=0.3)
    ax.legend(fontsize=8)
    plt.show()

    # Plot total runtime per algorithm with mean +/- std bars
    means_rt = [np.mean(all_runtimes[alg]) for alg in algo_names]
    stds_rt = [np.std(all_runtimes[alg]) for alg in algo_names]
    x = np.arange(len(algo_names))
    fig2, ax2 = plt.subplots(figsize=(8, 3))
    bars = ax2.bar(x, means_rt, yerr=stds_rt, capsize=5, color=[colors[i] for i in range(len(algo_names))])
    ax2.set_xticks(x)
    ax2.set_xticklabels(algo_names, rotation=30, ha='right', fontsize=8)
    ax2.set_ylabel('Total Runtime (s)')
    ax2.set_title('Mean Total Runtime per Algorithm (±std)')
    ax2.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()

    # Print runtime summary
    print(f"Runtime summary for {dataset_name}:")
    for alg in algo_names:
        print(f"{alg}: {np.mean(all_runtimes[alg]):.2f} ± {np.std(all_runtimes[alg]):.2f} seconds")

    return None

### NetHEPT Experiments

In [None]:
# Parameters
k_max = 25
num_samples = 50
num_runs = 10

# Run for NetHEPT
findApproximateParetoSolutionsInfluence(G_HEPT, node_costs_HEPT, k_max, num_samples, num_runs, "NetHEPT")

### NetPHY Experiments

In [None]:
# Run for NetPHY
findApproximateParetoSolutionsInfluence(G_PHY, node_costs_PHY, k_max, num_samples, num_runs, "NetPHY")