#### Metrics for three interpretability techniques: LIME, ANCHOR, CIU

Metrics tested are identity, stability, separability ....

In [22]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import lime.lime_tabular
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix
import numpy as np
import time
import warnings

warnings.filterwarnings("ignore")

In [23]:
datasets_folder = "datasets"

folder_names = []
attribute_names_list = []
categorical_indicator_list = []
X_list = []
y_list = []

for folder_name in os.listdir(datasets_folder):
    folder_path = os.path.join(datasets_folder, folder_name)
    
    if os.path.isdir(folder_path):
        attribute_names_path = os.path.join(folder_path, "attribute_names.csv")
        categorical_indicator_path = os.path.join(folder_path, "categorical_indicator.csv")
        X_path = os.path.join(folder_path, "X.csv")
        y_path = os.path.join(folder_path, "y.csv")
        
        attribute_names_df = pd.read_csv(attribute_names_path)
        categorical_indicator_df = pd.read_csv(categorical_indicator_path)
        X_df = pd.read_csv(X_path)
        y_df = pd.read_csv(y_path)

        unique_classes = y_df.iloc[:, 0].unique()
        sampled_indices = []
        for cls in unique_classes:
            cls_indices = y_df[y_df.iloc[:, 0] == cls].index
            sampled_indices.append(np.random.choice(cls_indices, 1)[0])

        sampled_indices = np.array(sampled_indices)

        needed_samples = 100 - len(sampled_indices)
        seed_value = 42 
        np.random.seed(seed_value)

        if needed_samples > 0:
            additional_indices = np.random.choice(y_df.index, needed_samples, replace=False)
            sampled_indices = np.concatenate([sampled_indices, additional_indices])
        
        
        X_list.append(X_df.loc[sampled_indices])
        y_list.append(y_df.loc[sampled_indices])

        folder_names.append(folder_name)
        attribute_names_list.append(attribute_names_df)
        categorical_indicator_list.append(categorical_indicator_df)


In [24]:
def convert_to_numeric_and_impute(X_list, y_list):
    imputer = SimpleImputer(strategy='mean')
    label_encoder = LabelEncoder()

    def process_X_dataframe(df):
        for column in df.columns:
            if isinstance(df[column].iloc[0], csr_matrix):
                df[column] = df[column].apply(lambda x: x.toarray()[0,0] if x.shape[1] == 1 else x.toarray())

            df[column] = pd.to_numeric(df[column], errors='coerce')

            if df[column].dtype == 'object':
                df[column] = df[column].fillna('Missing')
                df[column] = label_encoder.fit_transform(df[column])
            else:
                if df[column].notna().any():
                    df[column] = imputer.fit_transform(df[[column]]).ravel()
                else:
                    df[column] = df[column].fillna(0)
        return df

    def process_y_dataframe(df):
        if df.dtypes[0] == 'object' or not np.issubdtype(df.dtypes[0], np.number):
            df_encoded = df.apply(lambda x: label_encoder.fit_transform(x))
            df_encoded = df_encoded.rename(columns={df_encoded.columns[0]: 'class'})
            return df_encoded
        
        else:
            return df
            print('aaaah')


        
    X_list = [process_X_dataframe(df) for df in X_list]
    y_list = [process_y_dataframe(df) for df in y_list]

    return X_list, y_list

X_list, y_list = convert_to_numeric_and_impute(X_list, y_list)

In [25]:

def calc_fidelity(instance, black_box_model, lime_explanation, num_samples=100):
    num_features = len(instance)
    perturbed_samples = generate_perturbed_samples(instance, num_features, num_samples)
    bb_predictions = black_box_model.predict_proba(perturbed_samples)

    class_idx = np.argmax(bb_predictions[0])
    lime_weights = lime_explanation.as_list(label=class_idx)

    lime_predictions = approximate_lime_predictions(lime_weights, perturbed_samples)

    fidelity = accuracy_score(np.argmax(bb_predictions, axis=1), (lime_predictions > 0.5).astype(int))
    return fidelity


def calc_identity(exp1, exp2):
    dis = np.array([np.array_equal(exp1[i],exp2[i]) for i in range(len(exp1))])
    total = dis.shape[0]
    true = np.sum(dis)
    score = (total-true)/total
    return score*100, true, total

def calc_separability(exp):
    wrong = 0
    for i in range(exp.shape[0]):
        for j in range(exp.shape[0]):
            if i == j:
                continue
            eq = np.array_equal(exp[i],exp[j])
            if eq:
                wrong = wrong + 1
    total = exp.shape[0]
    score = 100*abs(wrong)/total**2
    return wrong,total,total**2,score

def calc_stability(exp, labels):
    total = labels.shape[0]
    label_values = np.unique(labels)
    n_clusters = label_values.shape[0]
    init = np.array([[np.average(exp[np.where(labels == i)], axis = 0)] for i in label_values]).squeeze()
    ct = sklearn.cluster.KMeans(n_clusters = n_clusters, random_state=42)
    ct.fit(exp)
    error = np.sum(np.abs(labels-ct.labels_))
    if error/total > 0.5:
        error = total-error
    return error, total


def normalize_test(X_train, X_test):
    X_test_norm = X_test.copy()
    for i in X_train.columns:
        scaler = sklearn.preprocessing.MinMaxScaler()
        scaler.fit(X_train[i].values.reshape(-1,1))
        X_test_norm[i] = scaler.transform(X_test[i].values.reshape(-1,1))

    return X_test_norm

def calc_similarity(exp, X_test_norm):
    dbscan = sklearn.cluster.DBSCAN(eps=0.5, min_samples=10)
    dbscan.fit(X_test_norm[:400])
    labels = dbscan.labels_
    mean_dist = []
    for i in np.unique(labels):
        mean_dist.append(np.mean(sklearn.metrics.pairwise_distances(exp[np.where(labels == i), :, 1].squeeze(), metric='euclidean')))
    return np.min(mean_dist)

def permute(x, x_dash):
    x = x.copy()
    x_dash = x_dash.copy()
    x_rand = np.random.random(x.shape[0])
    x_new = [x[i] if x_rand[i] > 0.5 else x_dash[i] for i in range(len(x))]
    x_dash_new = [x_dash[i] if x_rand[i] > 0.5 else x[i] for i in range(len(x))]
    return x_new, x_dash_new

def calc_trust_score(test_x, exp, m, feat_list):
    total_recalls = []
    for i in range(len(test_x)):
        feat_score = np.zeros((len(feat_list)))
        for _ in range(m):
            x = test_x[i].copy()
            x_dash = test_x[np.random.randint(0,len(test_x))].copy()
            x_perm, x_dash_perm = permute(x, x_dash)
            for j in range(len(feat_list)):
                z = np.concatenate((x_perm[:j+1], x_dash_perm[j+1:]))
                z_dash = np.concatenate((x_dash_perm[:j], x_perm[j:]))
                p_z = model.predict_proba(z.reshape(1,-1))
                p_z_dash = model.predict_proba(z_dash.reshape(1,-1))
                feat_score[j] = feat_score[j] + np.linalg.norm(p_z-p_z_dash)
        feat_score = feat_score/m
        gold_feat_fs = np.argpartition(feat_score, -6)[-6:]
        recall = len(set(exp[i][:6, 0]).intersection(set(gold_feat_fs)))/6
        total_recalls.append(recall)
    return np.mean(total_recalls)


def approximate_lime_predictions(lime_weights, perturbed_samples):
    lime_predictions = np.zeros(len(perturbed_samples))
    for i, sample in enumerate(perturbed_samples):
        prediction = 0
        for feature_index, weight in lime_weights:
            prediction += sample[feature_index] * weight
        lime_predictions[i] = prediction
    return lime_predictions


def apply_lime_model_to_test_set(X_test, explanation, num_classes):
    lime_weights = explanation.local_exp
    lime_model_predictions = np.zeros((X_test.shape[0], num_classes))

    for i in range(X_test.shape[0]):
        for class_idx in range(num_classes):
            class_weights = dict(lime_weights[class_idx])
            prediction = sum(X_test.iloc[i][feature] * weight for feature, weight in class_weights.items())
            lime_model_predictions[i, class_idx] = prediction

    return np.argmax(lime_model_predictions, axis=1)

def generate_perturbed_samples(instance, num_features, num_samples=100):
    perturbed_samples = []
    for _ in range(num_samples):
        perturbed_instance = instance.copy()
        for feature in range(num_features):
            perturbation = np.random.normal(0, 0.01)
            perturbed_instance[feature] += perturbation
        perturbed_samples.append(perturbed_instance)
    return np.array(perturbed_samples)

def approximate_lime_predictions(lime_weights, perturbed_samples, num_classes):
    lime_predictions = np.zeros((len(perturbed_samples), num_classes))
    for i, sample in enumerate(perturbed_samples):
        for class_idx in range(num_classes):
            if class_idx in lime_weights:
                class_weights = dict(lime_weights[class_idx])
                prediction = sum(sample[feature] * weight for feature, weight in class_weights.items())
                lime_predictions[i, class_idx] = prediction
    return lime_predictions

def calc_lime_model_accuracy(X_test, y_test, model, lime_explainer, num_classes, num_samples=100):
    lime_accuracies = []
    for index, instance in enumerate(X_test.values):
        try:
            explanation = lime_explainer.explain_instance(instance, model.predict_proba, num_features=len(X_test.columns))
            perturbed_samples = generate_perturbed_samples(instance, len(X_test.columns), num_samples)
            lime_weights = explanation.local_exp
            lime_predictions = approximate_lime_predictions(lime_weights, perturbed_samples, num_classes)
            predicted_classes = np.argmax(lime_predictions, axis=1)
            lime_accuracy = accuracy_score(y_test.iloc[index:index+1].repeat(num_samples), predicted_classes)
            lime_accuracies.append(lime_accuracy)
        except Exception as e:
            print(f"Error in LIME model accuracy calculation for instance {index}: {e}")
            lime_accuracies.append(None)
    return np.nanmean(lime_accuracies)


def calc_identity(exp1, exp2):
    dis = np.array([np.array_equal(exp1[i],exp2[i]) for i in range(len(exp1))])
    total = dis.shape[0]
    true = np.sum(dis)
    score = (total-true)/total
    return score*100, true, total

def calc_separability(exp):
    wrong = 0
    for i in range(exp.shape[0]):
        for j in range(exp.shape[0]):
            if i == j:
                continue
            eq = np.array_equal(exp[i],exp[j])
            if eq:
                wrong = wrong + 1
    total = exp.shape[0]
    score = 100*abs(wrong)/total**2
    return wrong,total,total**2,score

def calc_stability(exp, labels):
    total = labels.shape[0]
    label_values = np.unique(labels)
    n_clusters = label_values.shape[0]
    init = np.array([[np.average(exp[np.where(labels == i)], axis = 0)] for i in label_values]).squeeze()
    ct = sklearn.cluster.KMeans(n_clusters = n_clusters, random_state=1, n_init=10, init = init)
    ct.fit(exp)
    error = np.sum(np.abs(labels-ct.labels_))
    if error/total > 0.5:
        error = total-error
    return error, total

In [None]:
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import lime.lime_tabular
import pandas as pd
import tqdm
import metrics_rules

df_interp = pd.DataFrame(columns=["Dataset", "Fidelity", "Identity", "Separability", "Speed"])

for i in range(len(X_list)):
    print(f"Dataset {i}")
    X, y = X_list[i], y_list[i].squeeze()

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=555)
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    lime_explainer = lime.lime_tabular.LimeTabularExplainer(
        training_data=X_train.values, 
        feature_names=X_train.columns.tolist(), 
        class_names=np.unique(y).tolist(), 
        discretize_continuous=True
    )

    # Define the function to create an explanation for one instance
    def exp_fn(i):
        instance = X_test.iloc[i].values
        return lime_explainer.explain_instance(instance, model.predict_proba, num_features=len(X_train.columns))

    # Define the function to apply the explanation function to a block of instances
    def exp_fn_blk(xtest):
        exp1 = []
        for i in tqdm.tqdm(range(len(xtest))):
            exp = exp_fn(i)
            exp1.append(exp.as_map()[exp.available_labels()[0]])
        return np.array(exp1)

    
    start_time = time.time() 
    exp1 = exp_fn_blk(X_test.values)
    exp2 = exp_fn_blk(X_test.values)
    end_time = time.time()
    speed = end_time - start_time 

    lime_accuracy = calc_lime_model_accuracy(X_test, y_test, model, lime_explainer, num_classes)

    # Add the results to the dataframe
    df_interp = df_interp.append({
        "Dataset": i,
        "Fidelity": lime_accuracy,  # Placeholder for Fidelity
        "Identity": metrics_rules.calc_identity_rules(exp1[0], exp2[0]),
        "Separability": metrics_rules.calc_separability_rules(exp1[0]),
        "Speed": speed # Placeholder for Speed
    }, ignore_index=True)

print(df_interp)


Dataset 0


100%|███████████████████████████████████████████| 20/20 [00:55<00:00,  2.77s/it]
100%|███████████████████████████████████████████| 20/20 [00:55<00:00,  2.76s/it]


Dataset 1


100%|███████████████████████████████████████████| 20/20 [01:29<00:00,  4.46s/it]
100%|███████████████████████████████████████████| 20/20 [01:27<00:00,  4.40s/it]


Dataset 2


100%|███████████████████████████████████████████| 20/20 [00:00<00:00, 48.62it/s]
100%|███████████████████████████████████████████| 20/20 [00:00<00:00, 47.72it/s]


Dataset 3


100%|███████████████████████████████████████████| 20/20 [01:23<00:00,  4.15s/it]
100%|███████████████████████████████████████████| 20/20 [01:22<00:00,  4.15s/it]


Dataset 4


100%|███████████████████████████████████████████| 20/20 [01:55<00:00,  5.78s/it]
100%|███████████████████████████████████████████| 20/20 [01:55<00:00,  5.76s/it]


Dataset 5


100%|███████████████████████████████████████████| 20/20 [00:33<00:00,  1.65s/it]
100%|███████████████████████████████████████████| 20/20 [00:33<00:00,  1.66s/it]


Dataset 6


100%|███████████████████████████████████████████| 20/20 [02:47<00:00,  8.38s/it]
 90%|██████████████████████████████████████▋    | 18/20 [02:32<00:17,  8.62s/it]

In [None]:
df_interp

In [None]:
df_interp.to_csv('records_lime.csv')