In [2]:
import corneto as cn
import pandas as pd
from corneto.methods.steiner import exact_steiner_tree, create_exact_multi_steiner_tree
import os
import json
import numpy as np

dataset = "datasets/d1/"
lambd = 0.01
mode = "single" # single or multi
max_time = 600

def get_prizes(G):
    return {v: G.get_attr_vertex(v).get("prize",0) for v in G.V}

def eval_sol(G, s):
    total_prizes = sum(list(get_prizes(G).values()))
    Gs = G.edge_subgraph(np.flatnonzero(s))
    sel_prizes = sum(list(get_prizes(Gs).values()))
    return sel_prizes/total_prizes, Gs.shape

def single_condition(samples, lambd=lambd):
    sel = np.zeros(samples[0].shape[1])
    total_rec_prize = 0
    results = []
    problems = []
    for i, G in enumerate(samples):
        prizes = get_prizes(G)
        total_prizes = sum(list(prizes.values()))
        print(i, G.shape, total_prizes)
        P, Gc = exact_steiner_tree(G, prizes, edge_weights=lambd)
        P.solve(solver="GUROBI", IntegralityFocus=1, TimeLimit=max_time, verbosity=0)
        problems.append(P)
        for n, o in zip(["Edge cost", "Prizes"], P.objectives):
            print(f"- {n}:", o.value)
        total_rec_prize += P.objectives[1].value
        # Selected edges
        s = (P.expr._flow.value[:G.shape[1]] >= 1e-6).astype(int)
        results.append(eval_sol(G, s))
        sel += s
    return problems, results, sel

def multi_condition(samples, lambd=lambd):
    prizes_per_condition = dict()
    for i, G in enumerate(samples):
        prizes_per_condition[i] = {v: G.get_attr_vertex(v).get("prize",0) for v in G.V}
    P = create_exact_multi_steiner_tree(G, prizes_per_condition, lam=lambd)
    P.solve(solver="GUROBI", verbosity=1, IntegralityFocus=1, TimeLimit=max_time)
    mc_sel_edges = np.zeros(G.shape[1])
    results = []
    for i, G in enumerate(samples):
        s = (P.expr[f"flow{i}"].value[:G.shape[1]]>=1e-6).astype(int)
        results.append(eval_sol(G, s))
        mc_sel_edges += s
    
    return P, results, mc_sel_edges

with open(os.path.join(dataset, "dataset_config.json"), "r") as f:
    config = json.load(f)
    print(config)

# Import graphs
samples = []
for i in range(config["num_samples"]):
    g = cn.Graph.load(os.path.join(dataset, f"graph_sample_{i}.pkl.xz"))
    samples.append(g)
    print(g.shape)

if mode == "multi":
    P, result, selected_edges = multi_condition(samples)
else:
    P, result, selected_edges = single_condition(samples)

total_edges_across_samples = sum(selected_edges > 0)
mean_prop_prizes = np.mean([r[0] for r in result])

df_result = pd.DataFrame({
    'dataset': [dataset], 
    'num_samples': [config["num_samples"]],
    'num_nodes': [config["num_nodes"]],
    'num_terminals': [config["num_terminals"]],
    'num_common_nodes': config["num_common_nodes"],
    'mode': [mode],
    'lambda': [lambd],
    'max_time': [max_time],
    'mean_prop_score': [mean_prop_prizes],
    'total_edges_across_samples': [total_edges_across_samples]
})
df_result


{'num_nodes': 1000, 'new_edges_per_node': 3, 'num_terminals': 5, 'num_common_nodes': 0, 'num_samples': 10, 'edge_cost': 0.01}
(1000, 2991)
(1000, 2991)
(1000, 2991)
(1000, 2991)
(1000, 2991)
(1000, 2991)
(1000, 2991)
(1000, 2991)
(1000, 2991)
(1000, 2991)
0 (1000, 2991) 38.54826796353882
- Edge cost: 0.16999999999999998
- Prizes: 38.54826796353882
1 (1000, 2991) 28.486967901519794
- Edge cost: 0.16
- Prizes: 28.48696790151979
2 (1000, 2991) 37.96384932310346
- Edge cost: 0.15
- Prizes: 37.96384932310346
3 (1000, 2991) 31.391669582685974
- Edge cost: 0.18
- Prizes: 31.39166958268597
4 (1000, 2991) 27.23134384633558
- Edge cost: 0.16
- Prizes: 27.23134384633558
5 (1000, 2991) 21.84609343735116
- Edge cost: 0.16999999999999998
- Prizes: 21.84609343735116
6 (1000, 2991) 21.57162697714521
- Edge cost: 0.18
- Prizes: 21.57162697714521
7 (1000, 2991) 34.2252197601
- Edge cost: 0.16999999999999998
- Prizes: 34.2252197601
8 (1000, 2991) 31.654939797883372
- Edge cost: 0.17
- Prizes: 31.65493979

In [3]:
total_edges_across_samples, mean_prop_prizes

(89, 1.0)

In [16]:
df_result = pd.DataFrame({
    'dataset': [dataset], 
    'num_samples': [config["num_samples"]],
    'num_nodes': [config["num_nodes"]],
    'num_terminals': [config["num_terminals"]],
    'num_common_nodes': config["num_common_nodes"],
    'mode': [mode],
    'lambda': [lambd],
    'max_time': [max_time],
    'mean_prop_score': [mean_prop_prizes],
    'total_edges_across_samples': [total_edges_across_samples]
})
df_result

Unnamed: 0,dataset,num_samples,num_nodes,num_terminals,num_common_nodes,mode,lambda,max_time,mean_prop_score,total_edges_across_samples
0,datasets/d1/,10,1000,5,0,single,0.01,600,1.0,89


In [18]:
filename = f"pcst_{os.path.basename(dataset.strip('/'))}_lambda{lambd}_time{max_time}_{mode}.csv"
filename

'pcst_d1_lambda0.01_time600_single.csv'