In [1]:
import sys
sys.path.append('../src')

import pickle

import torch
import torch.nn.functional as F
import random

from cf_explainer.gcn_conv import GCNConv
from torch.nn import Linear, Sequential, BatchNorm1d, ReLU
import torch_geometric
from torch_geometric.explain import Explainer
from cf_explainer import C2Explainer
from cf_explainer.utils import seed_everything

import numpy as np

import networkx as nx
from pyvis.network import Network

from tqdm.auto import tqdm

import pickle

'''Config parameters'''
use_cuda_if_available = True
device = torch.device('cuda' if torch.cuda.is_available() and use_cuda_if_available else 'cpu')

%env CUBLAS_WORKSPACE_CONFIG=:4096:8

print("PyTorch version:", torch.__version__)
print("PyTorch device:", device)

env: CUBLAS_WORKSPACE_CONFIG=:4096:8
PyTorch version: 2.0.1
PyTorch device: cuda


In [2]:
def results(num_perturbs, prop_perturbs):
    print("######")
    if len(num_perturbs) != 0:
        size = sum(num_perturbs)/(2*len(num_perturbs))
        prop = sum(prop_perturbs)/len(prop_perturbs)
    else:
        size = "N/A"
        prop = "N/A"
    print(f"size: {size}, num_success: {len(num_perturbs)}, prop_perturbs: {prop}")
    print("finished")
    return size, prop


def explain(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    condition = (data.test_mask.cpu() | data.val_mask.cpu())
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

# GCN + BAShapes

In [3]:
class GCN(torch.nn.Module):
    def __init__(self, nhid, nout, dropout):
        super().__init__()
        self.conv1 = GCNConv(10, nhid, normalize=False)
        self.conv2 = GCNConv(nhid, nhid, normalize=False)
        self.conv3 = GCNConv(nhid, nout, normalize=False)
        self.lin = Linear(nout,4)
        self.dropout = dropout

    def forward(self, x, edge_index, edge_weight=None):
        x = self.conv1(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        x = self.lin(x)
        return x

model = GCN(nhid=100, nout=20, dropout=0).to(device)
model.load_state_dict(torch.load("../models/GCN_BAShapes_sd.pt", weights_only=True))

with open("../data/BAShapes.pickle", "rb") as f:
	data = pickle.load(f)
    
data

  return self.fget.__get__(instance, owner)()


Data(x=[700, 10], edge_index=[2, 3958], y=[700], expl_mask=[700], edge_label=[3958], train_mask=[700], val_mask=[700], test_mask=[700])

In [4]:
%%time
explainer = C2Explainer(epochs=1000, lr=0.1, silent_mode=True, undirected=True, subgraph_mode=True)

result1 = explain(model, data, explainer, seed=42)

140


  0%|          | 0/140 [00:00<?, ?it/s]

######
size: 1.8452380952380953, num_success: 84, prop_perturbs: 0.04193354770541191
finished
Fedility: 0.6, Num_perturbs: 1.8452380952380953, Similarity: 0.958066463470459
CPU times: user 6min 36s, sys: 925 ms, total: 6min 37s
Wall time: 6min 38s


# GCN + TreeCycle

In [5]:
class GCN(torch.nn.Module):
    def __init__(self, nhid, nout, dropout):
        super().__init__()
        self.conv1 = GCNConv(10, nhid, normalize=False)
        self.conv2 = GCNConv(nhid, nhid, normalize=False)
        self.conv3 = GCNConv(nhid, nout, normalize=False)
        self.lin = Linear(nout,2)
        self.dropout = dropout

    def forward(self, x, edge_index, edge_weight=None):
        x = self.conv1(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        x = self.lin(x)
        return x
    
model = GCN(nhid=100, nout=20, dropout=0).to(device)
model.load_state_dict(torch.load("../models/GCN_TreeCycle_sd.pt", weights_only=True))

with open("../data/TreeCycle.pickle", "rb") as f:
	data = pickle.load(f)
    
data

Data(x=[871, 10], edge_index=[2, 1940], y=[871], train_mask=[871], val_mask=[871], test_mask=[871])

In [6]:
%%time
explainer = C2Explainer(epochs=1000, lr=0.1, silent_mode=True, undirected=True, subgraph_mode=True)

result2 = explain(model, data, explainer, seed=42)

174


  0%|          | 0/174 [00:00<?, ?it/s]

######
size: 1.2777777777777777, num_success: 126, prop_perturbs: 0.10564383864402771
finished
Fedility: 0.7241379310344828, Num_perturbs: 1.2777777777777777, Similarity: 0.8943561315536499
CPU times: user 7min 55s, sys: 1 s, total: 7min 56s
Wall time: 8min 8s


# GCN + TreeGrid

In [7]:
class GCN(torch.nn.Module):
    def __init__(self, nhid, nout, dropout):
        super().__init__()
        self.conv1 = GCNConv(10, nhid, normalize=False)
        self.conv2 = GCNConv(nhid, nhid, normalize=False)
        self.conv3 = GCNConv(nhid, nout, normalize=False)
        self.lin = Linear(nout,2)
        self.dropout = dropout

    def forward(self, x, edge_index, edge_weight=None):
        x = self.conv1(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        x = self.lin(x)
        return x
    
model = GCN(nhid=100, nout=20, dropout=0).to(device)
model.load_state_dict(torch.load("../models/GCN_TreeGrid_sd.pt", weights_only=True))

with open("../data/TreeGrid.pickle", "rb") as f:
	data = pickle.load(f)
    
data

Data(x=[1231, 10], edge_index=[2, 3410], y=[1231], train_mask=[1231], val_mask=[1231], test_mask=[1231])

In [8]:
%%time
explainer = C2Explainer(epochs=1000, lr=0.1, silent_mode=True, undirected=True, subgraph_mode=True)

result3 = explain(model, data, explainer, seed=42)

246


  0%|          | 0/246 [00:00<?, ?it/s]

######
size: 1.4619883040935673, num_success: 171, prop_perturbs: 0.0688139796257019
finished
Fedility: 0.6951219512195121, Num_perturbs: 1.4619883040935673, Similarity: 0.9311860203742981
CPU times: user 11min 14s, sys: 1.45 s, total: 11min 16s
Wall time: 11min 33s


# GCN + LoanDecision

In [9]:
class GCN(torch.nn.Module):
    def __init__(self, nhid, nout, dropout):
        super().__init__()
        self.conv1 = GCNConv(2, nhid, normalize=True)
        self.conv2 = GCNConv(nhid, nhid, normalize=True)
        self.conv3 = GCNConv(nhid, nout, normalize=True)
        self.lin1 = Linear(nout, nout)
        self.lin2 = Linear(nout,2)
        self.dropout = dropout

    def forward(self, x, edge_index, edge_weight=None):
        x = self.conv1(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        x = self.lin1(x)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lin2(x)
        return x
    
model = GCN(nhid=100, nout=20, dropout=0).to(device)
model.load_state_dict(torch.load("../models/GCN_LoanDecision_sd.pt", weights_only=True))

with open("../data/LoanDecision.pickle", "rb") as f:
	data = pickle.load(f)
    
data

Data(edge_index=[2, 3950], num_nodes=1000, x=[1000, 2], y=[1000], train_mask=[1000], val_mask=[1000], test_mask=[1000])

In [10]:
%%time
explainer = C2Explainer(epochs=1000, lr=0.1, silent_mode=True, undirected=True, subgraph_mode=True)

result4 = explain(model, data, explainer, seed=42)

200


  0%|          | 0/200 [00:00<?, ?it/s]

######
size: 2.1641791044776117, num_success: 67, prop_perturbs: 0.019265322014689445
finished
Fedility: 0.335, Num_perturbs: 2.1641791044776117, Similarity: 0.9807347059249878
CPU times: user 18min 45s, sys: 1.27 s, total: 18min 47s
Wall time: 18min 46s
