In [17]:
import os
import random
import pandas as pd
import networkx as nx
from tqdm import tqdm

In [18]:
from NetworkGen import *
from GurobiSolver import *

In [19]:
parent_dir = os.path.dirname(os.getcwd())

# Construct train data set

In [20]:
# Initialize directories
data = "Train"
network_dir = os.path.join(parent_dir, "Networks", data)
models_dir = os.path.join(parent_dir, "Models-gp", data)
opt_0_dir = os.path.join(models_dir, "Model_0")
data_dir = os.path.join(parent_dir, "Data")
os.makedirs(data_dir, exist_ok=True)

In [22]:
# Initialize columns and collections
cols = ["id", "k", "arc", "tail_in_degree", "tail_out_degree", "head_in_degree", "head_out_degree", "d_s", "d_t", "C_r", "F_r"]
vals = []
# Iterate over the train networks
for f in tqdm(os.listdir(network_dir)):
    name, ext = f.split('.')
    G = load_network(f"{network_dir}/{name}")
    # Get max flow
    opt_0 = load_json(name, opt_0_dir)["ObjVal"]
    # Initialize collections
    z_bar = {} # arcs that have been interdicted so far and their corresponding objective function value
    # Create the basic model
    m = create_model(G, name, B=1, unit_cost=True)
    for i in range(G.number_of_edges()):
        # Update and solve the model
        m.update()
        m.optimize()
        # Break the loop if the objective function value is the same as the max flow (no more arcs to interdict while minimizing the maximum flow)
        if m.ObjVal == opt_0:
            break
        # Get the interdicted arc
        for var in m.getVars():
            if "z" in var.VarName and var.x > 0:
                (u,v) = tuple(var.VarName[2:-1].split(','))
                # Add the arc to the collection of interdicted arcs
                z_bar[(u,v)] = m.ObjVal
                # Add the interdiction constraint
                m.addConstr(m.getVarByName(f"z[{u},{v}]") == 0)
                break
    # Construct the labeled train set
    id, k = name.split('_')[1:]
    N = list(G.nodes())
    sink_level = G.nodes[N[-1]]["subset"]
    c = nx.get_edge_attributes(G, 'c')
    c_max = max(c.values())
    for u,v in G.edges:
        in_degree_u = G.in_degree(u) if u != N[0] else 1
        out_degree_v = G.out_degree(v) if v != N[-1] else 1
        d_s = G.nodes[u]["subset"]/sink_level # tail distance from the source / number of levels
        d_t = (sink_level - G.nodes[u]["subset"])/sink_level # tail distance from the sink / number of levels
        # Reduction in the maximum flow
        F_r = ((opt_0 - z_bar[(u,v)]) / opt_0) * 100 if (u,v) in z_bar else 0
        # Append the values
        vals.append([id, k, f"({u},{v})", in_degree_u, G.out_degree(u), G.in_degree(v), out_degree_v, d_s, d_t, c[(u, v)]/c_max, F_r])
# Save the labeled train set
df = pd.DataFrame(vals, columns=cols)
df.to_csv(f"{parent_dir}/Data/train.csv", index=False)

100%|██████████| 20/20 [00:11<00:00,  1.72it/s]


# Construct test data set

In [4]:
# Initialize directories
data = "Test"
network_dir = os.path.join(parent_dir, "Networks", data)
data_dir = os.path.join(parent_dir, "Data")
os.makedirs(data_dir, exist_ok=True)

In [28]:
# Initialize columns and collections
cols = ["id", "k", "arc", "tail_in_degree", "tail_out_degree", "head_in_degree", "head_out_degree", "d_s", "d_t", "C_r"]
vals = []
# Iterate over the train networks
for f in tqdm(os.listdir(network_dir)):
    name, ext = f.split('.')
    G = load_network(f"{network_dir}/{name}")
    # Construct the labeled test set
    id, k = name.split('_')[1:]
    N = list(G.nodes())
    sink_level = G.nodes[N[-1]]["subset"]
    c = nx.get_edge_attributes(G, 'c')
    c_max = max(c.values())
    for u,v in G.edges:
        in_degree_u = G.in_degree(u) if u != N[0] else 1
        out_degree_v = G.out_degree(v) if v != N[-1] else 1
        d_s = G.nodes[u]["subset"]/sink_level # tail distance from the source / number of levels
        d_t = (sink_level - G.nodes[u]["subset"])/sink_level # tail distance from the sink / number of levels
        # Append the values
        vals.append([id, k, f"({u},{v})", in_degree_u, G.out_degree(u), G.in_degree(v), out_degree_v, d_s, d_t, c[(u, v)]/c_max])
# Save the labeled train set
df = pd.DataFrame(vals, columns=cols)
df.to_csv(f"{parent_dir}/Data/test.csv", index=False)

100%|██████████| 53/53 [00:21<00:00,  2.44it/s]
