In [50]:
import sys
sys.path.append('../src')

import pickle

import torch
import torch.nn.functional as F
import random

from cf_explainer.gcn_conv import GCNConv
from torch.nn import Linear, Sequential, BatchNorm1d, ReLU
import torch_geometric
from torch_geometric.explain import Explainer
from torch_geometric.nn import SAGEConv, GATConv, GINConv, GIN
from cf_explainer import C2Explainer
from cf_explainer.utils import seed_everything

import numpy as np

import networkx as nx
from pyvis.network import Network

from tqdm.auto import tqdm

import pickle

'''Config parameters'''
use_cuda_if_available = True
device = torch.device('cuda' if torch.cuda.is_available() and use_cuda_if_available else 'cpu')

# seed_everything(42, deterministic=True)
# if error when setting use_deterministic_algorithms(True)
# try this:
%env CUBLAS_WORKSPACE_CONFIG=:4096:8

print("PyTorch version:", torch.__version__)
print("PyTorch device:", device)

env: CUBLAS_WORKSPACE_CONFIG=:4096:8
PyTorch version: 2.0.1
PyTorch device: cuda


In [51]:
def results(num_perturbs, prop_perturbs):
    print("######")
    if len(num_perturbs) != 0:
        size = sum(num_perturbs)/(2*len(num_perturbs))
        prop = sum(prop_perturbs)/len(prop_perturbs)
    else:
        size = "N/A"
        prop = "N/A"
    print(f"size: {size}, num_success: {len(num_perturbs)}, prop_perturbs: {prop}")
    print("finished")
    return size, prop


def explaine(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    condition = (data.test_mask.cpu() | data.val_mask.cpu())
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

# GNNExplainer + GCN + LoanDecision

In [52]:
class GCN(torch.nn.Module):
    def __init__(self, nhid, nout, dropout):
        super().__init__()
        self.conv1 = GCNConv(2, nhid, normalize=True)
        self.conv2 = GCNConv(nhid, nhid, normalize=True)
        self.conv3 = GCNConv(nhid, nout, normalize=True)
        self.lin1 = Linear(nout, nout)
        self.lin2 = Linear(nout,2)
        self.dropout = dropout

    def forward(self, x, edge_index, edge_weight=None):
        x = self.conv1(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        x = self.lin1(x)
        x = x.relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.lin2(x)
        return x
    
model = torch.load("../models/GCN_LoanDecision.pt")
model

with open("../data/LoanDecision.pickle", "rb") as f:
	data = pickle.load(f)
    
data

Data(edge_index=[2, 3950], num_nodes=1000, x=[1000, 2], y=[1000], train_mask=[1000], val_mask=[1000], test_mask=[1000])

In [53]:
def explaine(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    ###
    # only gender = 1
    condition = ((data.test_mask.cpu() | data.val_mask.cpu()) & data.x[:,1].to(bool).cpu())
    ###
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

explainer = ARExplainer(epochs=1000, lr=0.001, undirected=True, AR_mode=True)
result4 = explaine(model, data, explainer, seed=42)

118


  0%|          | 0/118 [00:00<?, ?it/s]

num_cfs:2, min_perturbs:130.0, prop_perturbs:0.04542278125882149, cf_label 1
num_cfs:201, min_perturbs:368.0, prop_perturbs:0.1588946431875229, cf_label 0
num_cfs:686, min_perturbs:2.0, prop_perturbs:0.001287001301534474, cf_label 0
num_cfs:804, min_perturbs:16.0, prop_perturbs:0.009378663264214993, cf_label 0
num_cfs:571, min_perturbs:2.0, prop_perturbs:0.002583979396149516, cf_label 0
num_cfs:691, min_perturbs:4.0, prop_perturbs:0.007518797181546688, cf_label 1
num_cfs:532, min_perturbs:4.0, prop_perturbs:0.0034246575087308884, cf_label 1
num_cfs:49, min_perturbs:2.0, prop_perturbs:0.0020964359864592552, cf_label 1
num_cfs:23, min_perturbs:26.0, prop_perturbs:0.018465910106897354, cf_label 1
num_cfs:699, min_perturbs:2.0, prop_perturbs:0.0030120480805635452, cf_label 0
num_cfs:65, min_perturbs:2.0, prop_perturbs:0.001251564477570355, cf_label 1
num_cfs:294, min_perturbs:4.0, prop_perturbs:0.00716845877468586, cf_label 1
num_cfs:164, min_perturbs:2.0, prop_perturbs:0.00208333344198763

In [54]:
def explaine(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    ###
    # only gender = 1
    condition = ((data.test_mask.cpu() | data.val_mask.cpu()) & ~data.x[:,1].to(bool).cpu())
    ###
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

explainer = ARExplainer(epochs=1000, lr=0.001, undirected=True, AR_mode=True)
result4 = explaine(model, data, explainer, seed=42)

82


  0%|          | 0/82 [00:00<?, ?it/s]

num_cfs:22, min_perturbs:312.0, prop_perturbs:0.2145804762840271, cf_label 0
num_cfs:790, min_perturbs:2.0, prop_perturbs:0.002061855746433139, cf_label 0
num_cfs:649, min_perturbs:2.0, prop_perturbs:0.0012180268531665206, cf_label 0
num_cfs:528, min_perturbs:2.0, prop_perturbs:0.00119331746827811, cf_label 0
num_cfs:27, min_perturbs:92.0, prop_perturbs:0.33576643466949463, cf_label 1
num_cfs:704, min_perturbs:2.0, prop_perturbs:0.001240694778971374, cf_label 0
num_cfs:76, min_perturbs:6.0, prop_perturbs:0.005217391531914473, cf_label 1
num_cfs:28, min_perturbs:6.0, prop_perturbs:0.014018691144883633, cf_label 1
num_cfs:47, min_perturbs:6.0, prop_perturbs:0.005464480724185705, cf_label 1
num_cfs:497, min_perturbs:2.0, prop_perturbs:0.0049019609577953815, cf_label 0
num_cfs:513, min_perturbs:2.0, prop_perturbs:0.006802720949053764, cf_label 0
num_cfs:179, min_perturbs:2.0, prop_perturbs:0.0032573288772255182, cf_label 1
num_cfs:959, min_perturbs:2.0, prop_perturbs:0.004166666883975267, 

# Biased model

In [47]:
def explaine(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    ###
    # only gender = 1
    condition = ((data.test_mask.cpu() | data.val_mask.cpu()) & data.x[:,1].to(bool).cpu())
    ###
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

model = torch.load("../models/Biased_GCN_LoanDecision.pt")
model

explainer = ARExplainer(epochs=1000, lr=0.001, undirected=True, AR_mode=True)
result4 = explaine(model, data, explainer, seed=42)

118


  0%|          | 0/118 [00:00<?, ?it/s]

num_cfs:895, min_perturbs:4.0, prop_perturbs:0.0014803848462179303, cf_label 0
num_cfs:225, min_perturbs:2.0, prop_perturbs:0.0008635578560642898, cf_label 1
num_cfs:375, min_perturbs:10.0, prop_perturbs:0.006435006391257048, cf_label 1
num_cfs:9, min_perturbs:76.0, prop_perturbs:0.044548649340867996, cf_label 1
num_cfs:429, min_perturbs:26.0, prop_perturbs:0.03359173238277435, cf_label 0
num_cfs:126, min_perturbs:4.0, prop_perturbs:0.007518797181546688, cf_label 1
num_cfs:477, min_perturbs:6.0, prop_perturbs:0.0051369862630963326, cf_label 1
num_cfs:117, min_perturbs:2.0, prop_perturbs:0.0020964359864592552, cf_label 1
num_cfs:306, min_perturbs:2.0, prop_perturbs:0.0025125627871602774, cf_label 1
num_cfs:740, min_perturbs:2.0, prop_perturbs:0.0014204545877873898, cf_label 0
num_cfs:152, min_perturbs:22.0, prop_perturbs:0.03313253074884415, cf_label 1
num_cfs:279, min_perturbs:2.0, prop_perturbs:0.00358422938734293, cf_label 1
num_cfs:357, min_perturbs:2.0, prop_perturbs:0.003584229387

In [48]:
def explaine(model, data, explainer, seed=42):
    seed_everything(seed, deterministic=True)
    result = []
    
    ###
    # only gender = 1
    condition = ((data.test_mask.cpu() | data.val_mask.cpu()) & ~data.x[:,1].to(bool).cpu())
    ###
    df_cf = np.where(condition)[0].tolist()

    print(len(df_cf))
    
    explainer = Explainer(
        model=model,
        algorithm=explainer,
        explanation_type='model',
        node_mask_type=None,
        edge_mask_type='object',
        model_config=dict(
            mode='multiclass_classification',
            task_level='node',
            return_type='raw',
        ), 
    )
    
    # cfs = []
    num_perturbs = []
    prop_perturbs = []

    for i in tqdm(df_cf):
        explanation = explainer(data.x, data.edge_index, index=i)
        
        if hasattr(explanation, "perturbs"):
            if explanation.perturbs < 20:
                # cfs.append(explanation.cf)
                num_perturbs.append(explanation.perturbs)
                prop_perturbs.append(explanation.prop_perturbs)
    
    size, prop = results(num_perturbs, prop_perturbs)
    
    result.append([len(df_cf), size, len(num_perturbs), prop])
    print(f"Fedility: {len(num_perturbs)/len(df_cf)}, Num_perturbs: {size}, Similarity: {1-prop}")
    return result

model = torch.load("../models/Biased_GCN_LoanDecision.pt")
model

explainer = ARExplainer(epochs=1000, lr=0.001, undirected=True, AR_mode=True)
result4 = explaine(model, data, explainer, seed=42)

82


  0%|          | 0/82 [00:00<?, ?it/s]

num_cfs:608, min_perturbs:12.0, prop_perturbs:0.008253094740211964, cf_label 0
num_cfs:401, min_perturbs:4.0, prop_perturbs:0.004123711492866278, cf_label 1
num_cfs:629, min_perturbs:66.0, prop_perturbs:0.04019488766789436, cf_label 0
num_cfs:122, min_perturbs:4.0, prop_perturbs:0.014598540030419827, cf_label 1
num_cfs:585, min_perturbs:2.0, prop_perturbs:0.001240694778971374, cf_label 0
num_cfs:51, min_perturbs:20.0, prop_perturbs:0.04672896862030029, cf_label 1
num_cfs:188, min_perturbs:8.0, prop_perturbs:0.019607843831181526, cf_label 1
num_cfs:334, min_perturbs:34.0, prop_perturbs:0.11564625799655914, cf_label 0
num_cfs:693, min_perturbs:2.0, prop_perturbs:0.004166666883975267, cf_label 1
num_cfs:57, min_perturbs:10.0, prop_perturbs:0.006468305364251137, cf_label 1
num_cfs:262, min_perturbs:2.0, prop_perturbs:0.00283286115154624, cf_label 1
num_cfs:403, min_perturbs:2.0, prop_perturbs:0.0020920501556247473, cf_label 1
num_cfs:123, min_perturbs:2.0, prop_perturbs:0.00113122176844626

In [5]:
# 200
# Loading widget...
# ######
# size: 1.1451612903225807, num_success: 186, prop_perturbs: 0.006411443930119276
# finished
# Fedility: 0.93, Num_perturbs: 1.1451612903225807, Similarity: 0.9935885667800903
# CPU times: user 43min 4s, sys: 1.27 s, total: 43min 5s
# Wall time: 42min 41s