In [1]:
import sys
sys.path.append('../src')

import pickle

import torch
import torch.nn.functional as F
import random

from cf_explainer.gcn_conv import GCNConv
from torch.nn import Linear, Sequential, BatchNorm1d, ReLU
import torch_geometric
from torch_geometric.explain import Explainer
from torch_geometric.nn import SAGEConv, GATConv, GINConv, GIN
from cf_explainer import C2Explainer
from cf_explainer.utils import seed_everything

import numpy as np

import networkx as nx
from pyvis.network import Network

from tqdm.auto import tqdm

import pickle

'''Config parameters'''
use_cuda_if_available = True
device = torch.device('cuda' if torch.cuda.is_available() and use_cuda_if_available else 'cpu')

# seed_everything(42, deterministic=True)
# if error when setting use_deterministic_algorithms(True)
# try this:
%env CUBLAS_WORKSPACE_CONFIG=:4096:8

print("PyTorch version:", torch.__version__)
print("PyTorch device:", device)

env: CUBLAS_WORKSPACE_CONFIG=:4096:8
PyTorch version: 2.0.1
PyTorch device: cuda


In [2]:
def results(num_perturbs, prop_perturbs):
    print("######")
    if len(num_perturbs) != 0:
        size = sum(num_perturbs)/(2*len(num_perturbs))
        prop = sum(prop_perturbs)/len(prop_perturbs)
    else:
        size = "N/A"
        prop = "N/A"
    print(f"size: {size}, num_success: {len(num_perturbs)}, prop_perturbs: {prop}")
    print("finished")
    return size, prop


def explaine(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    condition = (data.test_mask.cpu() | data.val_mask.cpu())
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

# LoanDecision

In [3]:
class GCN(torch.nn.Module):
    def __init__(self, nhid, nout, dropout):
        super().__init__()
        self.conv1 = GCNConv(2, nhid, normalize=True)
        self.conv2 = GCNConv(nhid, nhid, normalize=True)
        self.conv3 = GCNConv(nhid, nout, normalize=True)
        self.lin1 = Linear(nout, nout)
        self.lin2 = Linear(nout,2)
        self.dropout = dropout

    def forward(self, x, edge_index, edge_weight=None):
        x = self.conv1(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        x = self.lin1(x)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lin2(x)
        return x
    
model = GCN(nhid=100, nout=20, dropout=0).to(device)
model.load_state_dict(torch.load("../models/GCN_LoanDecision_sd.pt", weights_only=True))

with open("../data/LoanDecision.pickle", "rb") as f:
	data = pickle.load(f)
    
data

  return self.fget.__get__(instance, owner)()


Data(edge_index=[2, 3950], num_nodes=1000, x=[1000, 2], y=[1000], train_mask=[1000], val_mask=[1000], test_mask=[1000])

In [4]:
def explain(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    ###
    # only gender = 1
    condition = ((data.test_mask.cpu() | data.val_mask.cpu()) & data.x[:,1].to(bool).cpu())
    ###
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

explainer = C2Explainer(epochs=1000, lr=0.001, silent_mode=True, undirected=True, AR_mode=True)
result4 = explain(model, data, explainer, seed=42)

118


  0%|          | 0/118 [00:00<?, ?it/s]

######
size: 1.4516129032258065, num_success: 93, prop_perturbs: 0.007129633333534002
finished
Fedility: 0.788135593220339, Num_perturbs: 1.4516129032258065, Similarity: 0.9928703904151917


In [5]:
def explain(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    ###
    # only gender = 0
    condition = ((data.test_mask.cpu() | data.val_mask.cpu()) & ~data.x[:,1].to(bool).cpu())
    ###
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

explainer = C2Explainer(epochs=1000, lr=0.001, silent_mode=True, undirected=True, AR_mode=True)
result4 = explain(model, data, explainer, seed=42)

82


  0%|          | 0/82 [00:00<?, ?it/s]

######
size: 1.7846153846153847, num_success: 65, prop_perturbs: 0.012691770680248737
finished
Fedility: 0.7926829268292683, Num_perturbs: 1.7846153846153847, Similarity: 0.9873082041740417


# Biased model

In [6]:
def explain(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    ###
    # only gender = 1
    condition = ((data.test_mask.cpu() | data.val_mask.cpu()) & data.x[:,1].to(bool).cpu())
    ###
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

model = GCN(nhid=100, nout=20, dropout=0).to(device)
model.load_state_dict(torch.load("../models/Biased_GCN_LoanDecision_sd.pt", weights_only=True))

explainer = C2Explainer(epochs=1000, lr=0.001, silent_mode=True, undirected=True, AR_mode=True)
result4 = explain(model, data, explainer, seed=42)

118


  0%|          | 0/118 [00:00<?, ?it/s]

######
size: 1.6395348837209303, num_success: 86, prop_perturbs: 0.007888738997280598
finished
Fedility: 0.7288135593220338, Num_perturbs: 1.6395348837209303, Similarity: 0.9921112656593323


In [7]:
def explain(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    ###
    # only gender = 0
    condition = ((data.test_mask.cpu() | data.val_mask.cpu()) & ~data.x[:,1].to(bool).cpu())
    ###
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

model = GCN(nhid=100, nout=20, dropout=0).to(device)
model.load_state_dict(torch.load("../models/Biased_GCN_LoanDecision_sd.pt", weights_only=True))

explainer = C2Explainer(epochs=1000, lr=0.001, silent_mode=True, undirected=True, AR_mode=True)
result4 = explain(model, data, explainer, seed=42)

82


  0%|          | 0/82 [00:00<?, ?it/s]

######
size: 2.74468085106383, num_success: 47, prop_perturbs: 0.016860580071806908
finished
Fedility: 0.573170731707317, Num_perturbs: 2.74468085106383, Similarity: 0.9831393957138062
