In [None]:
from paretoCardinalityInfluence import *
import matplotlib.cm as cm
import matplotlib.pyplot as plt

# Import influence datasets
data_path_HEPT = '../../datasets/raw_data/influence/NetHEPT/hep.txt'
data_path_PHY = '../../datasets/raw_data/influence/NetPHY/phy.txt'

G_HEPT, node_costs_HEPT = import_influence_data(data_path_HEPT)
G_PHY, node_costs_PHY = import_influence_data(data_path_PHY)

In [None]:
def findApproximateParetoSolutionsInfluence(G, node_costs, k_max, num_samples=35, dataset_name=""):
    '''
    Run algorithms on a single graph and plot results.
    Parameters:
    - G: Graph
    - node_costs: Node costs dict
    - k_max: Maximum seed set size (cardinality)
    - num_samples: Number of Monte Carlo samples
    - dataset_name: Name of the dataset for plotting
    '''
    algo_names = ["ParetoGreedy", "TopK", "Random"]

    # Generate graph samples once to share across algorithms
    graph_samples = []
    for i in range(num_samples):
        G_sample = nx.Graph()
        neighbors = defaultdict(set)
        connected_components = defaultdict()
        for u, v, data in G.edges(data=True):
            success = np.random.uniform(0, 1)
            if success < data['weight']:
                G_sample.add_edge(u, v)
                neighbors[u].add(v)
                neighbors[v].add(u)
        for c in nx.connected_components(G_sample):
            for node in c:
                connected_components[node] = c
        graph_samples.append((G_sample, neighbors, connected_components))

    # containers
    influences = {alg: [] for alg in algo_names}
    runtimes = {alg: [] for alg in algo_names}

    # Initialize Pareto cardinality object for Greedy
    paretoCard = paretoCardinalityInfluence(G=G,
                                   node_costs=node_costs,
                                   k_max=k_max,
                                   num_samples=num_samples,
                                   graph_samples=graph_samples)

    # Greedy Cardinality
    _, _, runTime = paretoCard.greedyCardinality()
    infl = []
    current_influence = 0
    for k in range(1, k_max + 1):
        if k in paretoCard.kSolDict:
            current_influence = paretoCard.kSolDict[k]['Influence']
        infl.append(current_influence)
    influences['ParetoGreedy'] = infl
    runtimes['ParetoGreedy'] = runTime

    # Initialize for Top K with same samples
    paretoCard2 = paretoCardinalityInfluence(G=G,
                                    node_costs=node_costs,
                                    k_max=k_max,
                                    num_samples=num_samples,
                                    graph_samples=graph_samples)
    _, _, runTime = paretoCard2.top_k()
    infl = [paretoCard2.kSolDict.get(k, {'Influence': 0})['Influence'] for k in range(1, k_max + 1)]
    influences['TopK'] = infl
    runtimes['TopK'] = runTime

    # Initialize for Random with same samples
    paretoCard3 = paretoCardinalityInfluence(G=G,
                                    node_costs=node_costs,
                                    k_max=k_max,
                                    num_samples=num_samples,
                                    graph_samples=graph_samples)
    _, _, runTime = paretoCard3.random_selection()
    infl = [paretoCard3.kSolDict.get(k, {'Influence': 0})['Influence'] for k in range(1, k_max + 1)]
    influences['Random'] = infl
    runtimes['Random'] = runTime

    # Plot influence curves
    colors = cm.magma(np.linspace(0.01, 0.8, len(algo_names)))
    linestyles = ['-', '--', ':']
    markers = ['o', 's', '^']

    fig, ax = plt.subplots(figsize=(8, 5))
    for i, alg in enumerate(algo_names):
        ax.plot(range(1, k_max + 1), influences[alg],
                label=alg,
                color=colors[i],
                linestyle=linestyles[i],
                marker=markers[i],
                markersize=5,
                markeredgewidth=0.8,
                markeredgecolor='k',
                linewidth=1.2)

    ax.set_xlabel('Cardinality (k)')
    ax.set_ylabel('Influence Spread')
    title = f'Influence Spread on {dataset_name}' if dataset_name else 'Influence Spread'
    ax.set_title(title)
    ax.grid(alpha=0.3)
    ax.legend(fontsize=8)
    plt.show()

    # Print runtimes
    print(f"Runtimes for {dataset_name}:")
    for alg in algo_names:
        print(f"{alg}: {runtimes[alg]:.2f} seconds")

    return None

### NetHEPT Experiments

In [None]:
# Parameters
k_max = 25
num_samples = 50

# Run for NetHEPT
findApproximateParetoSolutionsInfluence(G_HEPT, node_costs_HEPT, k_max, num_samples, "NetHEPT")

### NetPHY Experiments

In [None]:
# Run for NetPHY
findApproximateParetoSolutionsInfluence(G_PHY, node_costs_PHY, k_max, num_samples, "NetPHY")