In [198]:
# Reload modules automatically
# https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [199]:
from collections import Counter
import logging

import numpy as np
import torch
from strn_and_rbstness.data import GraphDataset, split
from strn_and_rbstness.helper.utils import accuracy
from strn_and_rbstness.models import create_model
from strn_and_rbstness.train import _train
from common import CSBM, get_sbm_model, add_adversarial_edge

logger = logging.getLogger()
logger.setLevel(logging.INFO)

## Experiment Code

In [200]:
def perform_experiment(n, seed, n_iter=1000, avg_intra_degree = 1.5 * 2, avg_inter_degree = 0.5 * 2):
    # X, A ~ CSBM(n, p, q, mu, cov)
    csbm = get_sbm_model(n, avg_intra_degree, avg_inter_degree)
    X, A, y = csbm.sample(n, seed)
    # Train
    torch.manual_seed(seed)
    np.random.seed(seed)
    split_ids = split(y, split_params, seed)
    X_gpu = torch.tensor(X, dtype=torch.float32, device=device)
    A_gpu = torch.tensor(A, dtype=torch.float32, device=device)
    y_gpu = torch.tensor(y, device=device)
    graph = GraphDataset((X_gpu, A_gpu, y_gpu), split_ids)
    model_params_trn = dict(**model_params, 
                            n_features=graph.get_n_features(), 
                            n_classes=graph.get_n_classes())
    model = create_model(model_params_trn).to(device)
    statistics = _train(model, graph, train_params, verbosity_params, None)
    
    # Evalute Robustness of GNN w.r.t. Bayes

    model.eval()
    # Statistics Regarding Bayes & GNN Predictions
    c_acc_bayes = 0 # Count nodes correctly classified by bayes classifier
    c_acc_bayes_deg = Counter()  # Above but for each degree
    c_acc_bayes_structure = 0 # Count nodes separable by structure alone
    c_acc_bayes_structure_deg = Counter() # Above but for each degree
    c_acc_bayes_feature = 0 # Count nodes separable by features alone (degree dependent
                            # doesn't make sense as features independent of connections)
    c_acc_bayes_not_gnn = 0 # Decisions where BC correct but GNN wrong
    c_acc_bayes_not_gnn_deg = Counter() # Above but for each degree
    c_acc_gnn = 0 # Count nodes correctly classified by gnn
    c_acc_gnn_deg = Counter() # Above but for each degree
    c_acc_gnn_not_bayes = 0 # Decisions where GNN correctly says true even though BC violated
    c_acc_gnn_not_bayes_deg = Counter() # Above but for each degree
    c_acc_bayes_gnn = 0 # Count nodes correctly classified by bc & gnn
    c_acc_bayes_gnn_deg = Counter() # Above but for each degree
    c_degree_total = Counter() # Count degrees of all generated nodes
    # Statistics Regarding Bayes & GNN Robustness
    c_bayes_robust = dict() # Degree-depended robustness BC
    c_gnn_robust = dict() # Degree-dependend robustness GNN
    c_bayes_gnn_robust = dict() # Degree-dependend robustness of GNN w.r.t. BC
    c_bayes_robust_when_both = dict() # Degree-dependend robustness of Bayes on GNN w.r.t. Bayes Nodes
    c_gnn_robust_when_both = dict() # Degree-dependend robustness of GNN on GNN w.r.t. Bayes Nodes
    c_bayes_higher_robust = 0 # Number of times BC is more robust than GNN
    c_gnn_higher_robust = 0 # Number of times GNN is "overly robust"
    c_bayes_gnn_equal_robust = 0 # Number of times GNN has perfect robustness w.r.t. BC


    torch.manual_seed(seed)
    np.random.seed(seed)
    for i in range(n_iter):
        # ToDo: Create empty X_, A_, y_ templates & always only fill last row
        X_, A_, y_ = csbm.sample_conditional(n=1, X=X, A=A, y=y)
        deg_n = np.sum(A_[:,n])
        c_degree_total[deg_n] += 1
        # Statistics Bayes Classifier
        feature_separable, _ = csbm.feature_separability(X_, y_, [n])
        structure_separable, _ = csbm.structure_separability(A_, y_, [n])
        bayes_separable, _ = csbm.likelihood_separability(X_, A_, y_, [n])
        if bayes_separable:
            c_acc_bayes += 1
            c_acc_bayes_deg[deg_n] += 1
        if structure_separable:
            c_acc_bayes_structure += 1
            c_acc_bayes_structure_deg[deg_n] += 1
        if feature_separable:
            c_acc_bayes_feature += 1
        # Calculate GNN-prediction
        X_gpu = torch.tensor(X_, dtype=torch.float32, device=device)
        A_gpu = torch.tensor(A_, dtype=torch.float32, device=device)
        y_gpu = torch.tensor(y_, device=device)
        logits = model(X_gpu, A_gpu)
        gnn_separable = round(accuracy(logits, y_gpu, n))
        # Statistics Prediction
        if gnn_separable:
            c_acc_gnn += 1
            c_acc_gnn_deg[deg_n] += 1
            if bayes_separable:
                c_acc_bayes_gnn += 1
                c_acc_bayes_gnn_deg[deg_n] += 1
            else:
                c_acc_gnn_not_bayes += 1
                c_acc_gnn_not_bayes_deg[deg_n] += 1
        elif bayes_separable:
            c_acc_bayes_not_gnn += 1
            c_acc_bayes_not_gnn_deg[deg_n] += 1
        # Investigate Robustness
        c_robustness = 0
        bayes_separable_new = 0
        gnn_separable_new = 0
        # gnn w.r.t. bayes count possible?
        gnn_wrt_bayes_setting = False
        if bayes_separable and gnn_separable:
            gnn_wrt_bayes_setting = True
        while bayes_separable or gnn_separable:
            j = add_adversarial_edge(n, A_, y_) #ToDo: For speed, calc pot_neighbours once!
            A_gpu[n, j] = 1
            A_gpu[j, n] = 1

            # Robustness of BC
            if bayes_separable:
                bayes_separable_new, _ = csbm.likelihood_separability(X_, A_, y_, [n])
                if not bayes_separable_new:
                    if deg_n not in c_bayes_robust:
                        c_bayes_robust[deg_n] = []
                    c_bayes_robust[deg_n].append(c_robustness)
                    if gnn_wrt_bayes_setting:
                        if deg_n not in c_bayes_robust_when_both:
                            c_bayes_robust_when_both[deg_n] = []
                        c_bayes_robust_when_both[deg_n].append(c_robustness)
            # Robustness of GNN
            if gnn_separable:
                logits = model(X_gpu, A_gpu)
                gnn_separable_new = round(accuracy(logits, y_gpu, n))
                if not gnn_separable_new:
                    if deg_n not in c_gnn_robust:
                        c_gnn_robust[deg_n] = []
                    c_gnn_robust[deg_n].append(c_robustness)
                    if gnn_wrt_bayes_setting:
                        if deg_n not in c_gnn_robust_when_both:
                            c_gnn_robust_when_both[deg_n] = []
                        c_gnn_robust_when_both[deg_n].append(c_robustness)
            # Robustness of GNN w.r.t. BC
            if bayes_separable and gnn_separable:
                if deg_n not in c_bayes_gnn_robust:
                    c_bayes_gnn_robust[deg_n] = []
                if not bayes_separable_new and not gnn_separable_new:
                    c_bayes_gnn_equal_robust += 1
                    c_bayes_gnn_robust[deg_n].append(c_robustness)
                if bayes_separable_new and not gnn_separable_new:
                    c_bayes_higher_robust += 1
                    c_bayes_gnn_robust[deg_n].append(c_robustness)
                if not bayes_separable_new and gnn_separable_new:
                    c_gnn_higher_robust += 1
                    c_bayes_gnn_robust[deg_n].append(c_robustness)

            bayes_separable = bayes_separable_new
            gnn_separable = gnn_separable_new
            c_robustness += 1

    # Postprocess robustness counts to averages
    avg_bayes_robust = {}
    med_bayes_robust = {}
    std_bayes_robust = {}
    max_bayes_robust = {}
    for degree in c_acc_bayes_deg:
        avg_bayes_robust[degree] = np.mean(c_bayes_robust[degree])
        med_bayes_robust[degree] = np.median(c_bayes_robust[degree])
        std_bayes_robust[degree] = np.std(c_bayes_robust[degree])
        max_bayes_robust[degree] = np.max(c_bayes_robust[degree])
    avg_gnn_robust = {}
    med_gnn_robust = {}
    std_gnn_robust = {}
    max_gnn_robust = {}
    for degree in c_acc_gnn_deg:
        avg_gnn_robust[degree] = np.mean(c_gnn_robust[degree])
        med_gnn_robust[degree] = np.median(c_gnn_robust[degree])
        std_gnn_robust[degree] = np.std(c_gnn_robust[degree])
        max_gnn_robust[degree] = np.max(c_gnn_robust[degree])
    avg_bayes_gnn_robust = {}
    for degree in c_acc_bayes_gnn_deg:
        avg_bayes_gnn_robust[degree] = np.mean(c_bayes_gnn_robust[degree])
    avg_bayes_robust_when_both = {}
    for degree in c_acc_bayes_gnn_deg:
        avg_bayes_robust_when_both[degree] = np.mean(c_bayes_robust_when_both[degree])
    avg_gnn_robust_when_both = {}
    for degree in c_acc_bayes_gnn_deg:
        avg_gnn_robust_when_both[degree] =  np.mean(c_gnn_robust_when_both[degree])

    # Order average robustness counts
    max_deg = max(c_degree_total.keys())
    ordered_avg_bayes_robust = [avg_bayes_robust[i] if i in avg_bayes_robust else -1 for i in range(max_deg+1)]
    ordered_med_bayes_robust = [med_bayes_robust[i] if i in med_bayes_robust else -1 for i in range(max_deg+1)]
    ordered_std_bayes_robust = [std_bayes_robust[i] if i in std_bayes_robust else -1 for i in range(max_deg+1)]
    ordered_max_bayes_robust = [max_bayes_robust[i] if i in max_bayes_robust else -1 for i in range(max_deg+1)]
    ordered_avg_gnn_robust = [avg_gnn_robust[i] if i in avg_gnn_robust else -1 for i in range(max_deg+1)]
    ordered_med_gnn_robust = [med_gnn_robust[i] if i in med_gnn_robust else -1 for i in range(max_deg+1)]
    ordered_std_gnn_robust = [std_gnn_robust[i] if i in std_gnn_robust else -1 for i in range(max_deg+1)]
    ordered_max_gnn_robust = [max_gnn_robust[i] if i in max_gnn_robust else -1 for i in range(max_deg+1)]
    ordered_bayes_gnn_robust = [avg_bayes_gnn_robust[i] if i in avg_bayes_gnn_robust else -1 for i in range(max_deg+1)]
    ordered_bayes_robust_when_both = [avg_bayes_robust_when_both[i] if i in avg_bayes_robust_when_both else -1 for i in range(max_deg+1)]
    ordered_gnn_robust_when_both = [avg_gnn_robust_when_both[i] if i in avg_gnn_robust_when_both else -1 for i in range(max_deg+1)]

    print(f"Prediction Statistics:")
    print(f"Count BC: {c_acc_bayes}; GNN: {c_acc_gnn}")
    print(f"Count Structure BC: {c_acc_bayes_structure}; Feature BC: {c_acc_bayes_feature}")
    print(f"Count BC and GNN: {c_acc_bayes_gnn} ")
    print(f"Count BC not GNN: {c_acc_bayes_not_gnn}; "
        f"GNN not BC: {c_acc_gnn_not_bayes}")
    print(f"Robustness Statistics:")
    print(f"BC more robust than GNN: {c_bayes_higher_robust}")
    print(f"BC & GNN equal robustness: {c_bayes_gnn_equal_robust}")
    print(f"BC less robust than GNN: {c_gnn_higher_robust}")

    for deg in range(max_deg+1):
        print(f"Degree {deg}: <BC robust>: {ordered_avg_bayes_robust[deg]:.2f}; <GNN robust>: "
            f"{ordered_avg_gnn_robust[deg]:.2f};")

    for deg in range(max_deg+1):
        print(f"Degree {deg}: Median(BC robust): {ordered_med_bayes_robust[deg]:.2f}; Median(GNN robust): "
            f"{ordered_med_gnn_robust[deg]:.2f};")

    for deg in range(max_deg+1):
        print(f"Degree {deg}: Max(BC robust): {ordered_max_bayes_robust[deg]:.2f}; Max(GNN robust): "
            f"{ordered_max_gnn_robust[deg]:.2f};")

    for deg in range(max_deg+1):
        print(f"Degree {deg}: Std(BC robust): {ordered_std_bayes_robust[deg]:.2f}; Std(GNN robust): "
            f"{ordered_std_gnn_robust[deg]:.2f};")

    for deg in range(max_deg+1):
        print(f"Degree {deg}: <GNN wrt BC robust>: {ordered_bayes_gnn_robust[deg]:.2f}/"
            f"{ordered_bayes_robust_when_both[deg]:.2f}. <GNN in wrt BC setting>: "
            f"{ordered_gnn_robust_when_both[deg]:.2f}")

## GCN

In [201]:
model_params = dict(
    label="GCN",
    model="DenseGCN", #GCN or DenseGCN
    n_filters=64,
    dropout=0.5
)
train_params = dict(
    loss_type="CE",
    lr=1e-2,
    weight_decay=1e-3,
    patience=300,
    max_epochs=1000,
    use_selftrain = False, 
    use_advtrain = False,
)
attack = "LocalDICEUndirected"
attack_params = dict()

# Other
split_params = {
    "strategy": "normal", # or "custom"
    "p_trn": 1,
    "p_tst": 0, # "normal" uses 1 - p_trn, only for custom split strategy
    "p_selftrn": 0 # Refers to unlabeled data, which is not test data, 
                    # only for custom split strategy
}
verbosity_params = dict(
    display_steps = 1001
)   
# Device
device = 0
if not torch.cuda.is_available():
    device == "cpu", "CUDA is not availble, set device to 'cpu'"
else:
    device = torch.device(f"cuda:{device}")
    logging.info(f"Currently on gpu device {device}")
attack_params["data_device"] = device

INFO:root:Currently on gpu device cuda:0


In [202]:
n = 1000
n_iter = 1000
avg_intra_degree = 1.5 * 2 # intra_edges_per_node * 2
avg_inter_degree = 0.5 * 2
seed = 1
perform_experiment(n, seed, n_iter, avg_intra_degree, avg_inter_degree)

INFO:root:
Epoch    0: loss_train: 0.69339, loss_val: 0.69347, acc_train: 0.48800, acc_val: 0.49000
INFO:root:
Epoch  250: loss_train: 0.49040, loss_val: 0.59152, acc_train: 0.76800, acc_val: 0.69400


Prediction Statistics:
Count BC: 845; GNN: 674
Count Structure BC: 842; Feature BC: 597
Count BC and GNN: 619 
Count BC not GNN: 226; GNN not BC: 55
Robustness Statistics:
BC more robust than GNN: 135
BC & GNN equal robustness: 57
BC less robust than GNN: 427
Degree 0: <BC robust>: 0.00; <GNN robust>: 1.83;
Degree 1: <BC robust>: 0.61; <GNN robust>: 6.56;
Degree 2: <BC robust>: 1.26; <GNN robust>: 10.17;
Degree 3: <BC robust>: 1.59; <GNN robust>: 9.32;
Degree 4: <BC robust>: 2.17; <GNN robust>: 10.95;
Degree 5: <BC robust>: 2.69; <GNN robust>: 12.38;
Degree 6: <BC robust>: 3.01; <GNN robust>: 11.49;
Degree 7: <BC robust>: 3.58; <GNN robust>: 10.70;
Degree 8: <BC robust>: 4.04; <GNN robust>: 8.15;
Degree 9: <BC robust>: 4.60; <GNN robust>: 15.90;
Degree 10: <BC robust>: 5.00; <GNN robust>: 17.00;
Degree 11: <BC robust>: 7.50; <GNN robust>: 28.50;
Degree 0: Median(BC robust): 0.00; Median(GNN robust): 0.50;
Degree 1: Median(BC robust): 1.00; Median(GNN robust): 3.00;
Degree 2: Median(BC 

# APPNP

In [203]:
model_params = dict(
    label="APPNP",
    model="APPNP", #GCN or DenseGCN
    n_hidden=64,
    dropout=0.,
    K=10,
    alpha=0.1
)
train_params = dict(
    loss_type="CE",
    lr=1e-2,
    weight_decay=1e-3,
    patience=300,
    max_epochs=1000,
    use_selftrain = False, 
    use_advtrain = False,
)

# Other
split_params = {
    "strategy": "normal", # or "custom"
    "p_trn": 1,
    "p_tst": 0, # "normal" uses 1 - p_trn, only for custom split strategy
    "p_selftrn": 0 # Refers to unlabeled data, which is not test data, 
                    # only for custom split strategy
}
verbosity_params = dict(
    display_steps = 1001
)   
# Device
device = 0
if not torch.cuda.is_available():
    device == "cpu", "CUDA is not availble, set device to 'cpu'"
else:
    device = torch.device(f"cuda:{device}")
    logging.info(f"Currently on gpu device {device}")
attack_params["data_device"] = device

INFO:root:Currently on gpu device cuda:0


In [204]:
n = 1000
n_iter = 1000
avg_intra_degree = 1.5 * 2 # intra_edges_per_node * 2
avg_inter_degree = 0.5 * 2
seed = 1
perform_experiment(n, seed, n_iter, avg_intra_degree, avg_inter_degree)

INFO:root:
Epoch    0: loss_train: 0.69477, loss_val: 0.69559, acc_train: 0.47800, acc_val: 0.47800
INFO:root:
Epoch  999: loss_train: 0.48493, loss_val: 0.60393, acc_train: 0.80600, acc_val: 0.69400


Prediction Statistics:
Count BC: 867; GNN: 669
Count Structure BC: 864; Feature BC: 581
Count BC and GNN: 615 
Count BC not GNN: 252; GNN not BC: 54
Robustness Statistics:
BC more robust than GNN: 110
BC & GNN equal robustness: 48
BC less robust than GNN: 457
Degree 0: <BC robust>: 0.00; <GNN robust>: 10.89;
Degree 1: <BC robust>: 0.61; <GNN robust>: 7.07;
Degree 2: <BC robust>: 1.19; <GNN robust>: 10.73;
Degree 3: <BC robust>: 1.57; <GNN robust>: 10.74;
Degree 4: <BC robust>: 2.07; <GNN robust>: 14.31;
Degree 5: <BC robust>: 2.58; <GNN robust>: 12.22;
Degree 6: <BC robust>: 2.86; <GNN robust>: 12.29;
Degree 7: <BC robust>: 3.43; <GNN robust>: 19.03;
Degree 8: <BC robust>: 3.94; <GNN robust>: 15.76;
Degree 9: <BC robust>: 4.23; <GNN robust>: 18.57;
Degree 10: <BC robust>: 5.00; <GNN robust>: 28.20;
Degree 11: <BC robust>: 3.00; <GNN robust>: -1.00;
Degree 12: <BC robust>: 8.00; <GNN robust>: 4.50;
Degree 0: Median(BC robust): 0.00; Median(GNN robust): 5.00;
Degree 1: Median(BC robust):

In [164]:
# Plot
fig, axs = plt.subplots(1, 1)
axs.plot(range(len(ordered_c)), ordered_c, 'o-')
#axs.set_xlim(left=0, right=15)

Counter({3: 205, 4: 183, 5: 180, 2: 149, 6: 103, 1: 69, 7: 55, 8: 28, 0: 11, 9: 11, 10: 3, 11: 2, 12: 1})


In [90]:
np.sum(A_[:,n])

0