In [None]:
from paretoKnapsackInfluence import *
import matplotlib.cm as cm
import matplotlib.pyplot as plt

# Import influence datasets
data_path_HEPT = '../../datasets/raw_data/influence/NetHEPT/hep.txt'
data_path_PHY = '../../datasets/raw_data/influence/NetPHY/phy.txt'

G_HEPT, node_costs_HEPT = import_influence_data(data_path_HEPT)
G_PHY, node_costs_PHY = import_influence_data(data_path_PHY)

In [None]:
def findApproximateParetoSolutionsKnapsackInfluence(G, node_costs, budget, epsilon_val=0.1, num_samples=35, dataset_name=""):
    '''
    Run algorithms on a single graph and plot results.
    Parameters:
    - G: Graph
    - node_costs: Node costs dict
    - budget: Knapsack budget
    - epsilon_val: Approximation parameter
    - num_samples: Number of Monte Carlo samples
    - dataset_name: Name of the dataset for plotting
    '''
    algo_names = ["SubmodularWithBudget", "PlainGreedy", "GreedyPlus"]

    # Generate graph samples once to share across algorithms
    graph_samples = []
    for i in range(num_samples):
        G_sample = nx.Graph()
        neighbors = defaultdict(set)
        connected_components = defaultdict()
        for u, v, data in G.edges(data=True):
            success = np.random.uniform(0, 1)
            if success < data['weight']:
                G_sample.add_edge(u, v)
                neighbors[u].add(v)
                neighbors[v].add(u)
        for c in nx.connected_components(G_sample):
            for node in c:
                connected_components[node] = c
        graph_samples.append((G_sample, neighbors, connected_components))

    # containers
    solutions = {alg: [] for alg in algo_names}
    influences = {alg: [] for alg in algo_names}
    costs = {alg: [] for alg in algo_names}
    runtimes = {alg: [] for alg in algo_names}

    # Submodular with Budget
    paretoKnapsack1 = paretoKnapsackInfluence(G=G,
                                             node_costs=node_costs,
                                             budget=budget,
                                             num_samples=num_samples,
                                             graph_samples=graph_samples)
    sol, runTime = paretoKnapsack1.submodularWithBudget(influence_x=1.0, epsilon_val=epsilon_val)
    solutions['SubmodularWithBudget'] = sol
    influences['SubmodularWithBudget'] = paretoKnapsack1.compute_influence(sol)
    costs['SubmodularWithBudget'] = sum(node_costs[node] for node in sol)
    runtimes['SubmodularWithBudget'] = runTime

    # Plain Greedy
    paretoKnapsack2 = paretoKnapsackInfluence(G=G,
                                             node_costs=node_costs,
                                             budget=budget,
                                             num_samples=num_samples,
                                             graph_samples=graph_samples)
    sol, infl, cost, runTime = paretoKnapsack2.plainGreedy()
    solutions['PlainGreedy'] = sol
    influences['PlainGreedy'] = infl
    costs['PlainGreedy'] = cost
    runtimes['PlainGreedy'] = runTime

    # Greedy Plus
    paretoKnapsack3 = paretoKnapsackInfluence(G=G,
                                             node_costs=node_costs,
                                             budget=budget,
                                             num_samples=num_samples,
                                             graph_samples=graph_samples)
    sol, infl, cost, runTime = paretoKnapsack3.greedyPlus()
    solutions['GreedyPlus'] = sol
    influences['GreedyPlus'] = infl
    costs['GreedyPlus'] = cost
    runtimes['GreedyPlus'] = runTime

    # Print results
    print(f"Results for {dataset_name}:")
    for alg in algo_names:
        print(f"{alg}: Influence={influences[alg]:.3f}, Cost={costs[alg]:.1f}, Runtime={runtimes[alg]:.2f}s, Nodes={len(solutions[alg])}")

    return solutions, influences, costs, runtimes

### NetHEPT Experiments

In [None]:
# Parameters
budget = 50
epsilon_val = 0.1
num_samples = 50

# Run for NetHEPT
solutions_HEPT, influences_HEPT, costs_HEPT, runtimes_HEPT = findApproximateParetoSolutionsKnapsackInfluence(G_HEPT, node_costs_HEPT, budget, epsilon_val, num_samples, "NetHEPT")

### NetPHY Experiments

In [None]:
# Run for NetPHY
solutions_PHY, influences_PHY, costs_PHY, runtimes_PHY = findApproximateParetoSolutionsKnapsackInfluence(G_PHY, node_costs_PHY, budget, epsilon_val, num_samples, "NetPHY")