In [108]:
import os
import pandas as pd
from joblib import load

In [109]:
# Initialize directories
parent_dir = os.path.dirname(os.getcwd())
models_dir = os.path.join(parent_dir, "Trained Models")
data_dir = os.path.join(parent_dir, "Data")
predictions_dir = os.path.join(parent_dir, "Predictions")
os.makedirs(predictions_dir, exist_ok=True)

# Import ML models

In [110]:
# Decision tree model
dt = load(os.path.join(models_dir, "dt.joblib"))
# Random forest model
rf = load(os.path.join(models_dir, "rf.joblib"))

# Predict using train data

## Train data preparation

In [111]:
# Load train data set
train = pd.read_csv(os.path.join(data_dir, "train.csv"))
train.head()

Unnamed: 0,id,k,arc,tail_in_degree,tail_out_degree,head_in_degree,head_out_degree,d_s,d_t,C_r,F_r
0,1,1,"(0,1)",1,5,1,2,0.0,1.0,0.75,21.126761
1,1,1,"(0,2)",1,5,1,2,0.0,1.0,0.9,22.535211
2,1,1,"(0,3)",1,5,1,2,0.0,1.0,0.75,21.126761
3,1,1,"(0,4)",1,5,1,2,0.0,1.0,0.75,12.676056
4,1,1,"(0,5)",1,5,1,2,0.0,1.0,0.85,22.535211


In [112]:
# Drop the first three columns (id, k, and arc) as well as the target variable (F_r)
X_train = train.drop(columns=["id", "k", "arc", "F_r"])

## Predict the target variable (reduction percentage in max flow)

### Decision tree model


In [113]:
# Initialize directories
train_dir = os.path.join(predictions_dir, "dt", "Train")
os.makedirs(train_dir, exist_ok=True)

In [114]:
# Predictions
train_dt = train.copy()
train_dt["dt"] = dt.predict(X_train)

In [115]:
# Save the predictions for each network
groups = train_dt.groupby(["id", "k"])
for (id, k), group in groups:
    group.to_csv(os.path.join(train_dir, f"network_{id}_{k}.csv"), index=False)

### Random forest model

In [116]:
# Initialize directories
train_dir = os.path.join(predictions_dir, "rf", "Train")
os.makedirs(train_dir, exist_ok=True)

In [117]:
# Predictions
train_rf = train.copy()
train_rf["rf"] = rf.predict(X_train)

In [118]:
# Save the predictions for each network
groups = train_rf.groupby(["id", "k"])
for (id, k), group in groups:
    group.to_csv(os.path.join(train_dir, f"network_{id}_{k}.csv"), index=False)

# Predict using test data

## Test data preparation

In [119]:
# Load test data set
test = pd.read_csv(os.path.join(data_dir, "test.csv"))
test.head()

Unnamed: 0,id,k,arc,tail_in_degree,tail_out_degree,head_in_degree,head_out_degree,d_s,d_t,C_r
0,1,1,"(0,1)",1,5,1,2,0.0,1.0,0.25
1,1,1,"(0,2)",1,5,1,2,0.0,1.0,0.15
2,1,1,"(0,3)",1,5,1,2,0.0,1.0,0.2
3,1,1,"(0,4)",1,5,1,2,0.0,1.0,0.75
4,1,1,"(0,5)",1,5,1,2,0.0,1.0,0.65


In [120]:
# Drop the first three columns (id, k, and arc)
X_test = test.drop(columns=["id", "k", "arc"])

# Predict the target variable (reduction percentage in max flow)

### Decision tree model


In [121]:
# Initialize directories
test_dir = os.path.join(predictions_dir, "dt", "Test")
os.makedirs(test_dir, exist_ok=True)

In [122]:
# Predictions
test_dt = test.copy()
test_dt["dt"] = dt.predict(X_test)

In [123]:
# Save the predictions for each network
groups = test_dt.groupby(["id", "k"])
for (id, k), group in groups:
    group.to_csv(os.path.join(test_dir, f"network_{id}_{k}.csv"), index=False)

### Random forest model

### Decision tree model


In [124]:
# Initialize directories
train_dir = os.path.join(predictions_dir, "dt", "Train")
os.makedirs(train_dir, exist_ok=True)

In [125]:
# Predictions
train_dt = train.copy()
train_dt["dt"] = dt.predict(X_train)

In [126]:
# Save the predictions for each network
groups = train_dt.groupby(["id", "k"])
for (id, k), group in groups:
    group.to_csv(os.path.join(train_dir, f"network_{id}_{k}.csv"), index=False)

### Random forest model

In [127]:
# Initialize directories
train_dir = os.path.join(predictions_dir, "rf", "Train")
os.makedirs(train_dir, exist_ok=True)

In [128]:
# Predictions
train_rf = train.copy()
train_rf["rf"] = rf.predict(X_train)

In [129]:
# Save the predictions for each network
groups = train_rf.groupby(["id", "k"])
for (id, k), group in groups:
    group.to_csv(os.path.join(train_dir, f"network_{id}_{k}.csv"), index=False)

In [130]:
# Initialize directories
test_dir = os.path.join(predictions_dir, "rf", "Test")
os.makedirs(test_dir, exist_ok=True)

In [131]:
# Predictions
test_rf = test.copy()
test_rf["rf"] = rf.predict(X_test)

In [132]:
# Save the predictions for each network
groups = test_rf.groupby(["id", "k"])
for (id, k), group in groups:
    group.to_csv(os.path.join(test_dir, f"network_{id}_{k}.csv"), index=False)